benchbox 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchbox/__init__.py +1 -1
- benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query72.tpl +1 -1
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/ansi.tpl → templates/query_templates/sqlserver.tpl} +1 -1
- benchbox/_binaries/tpc-ds/templates/query_variants/README +6 -0
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query10.tpl → templates/query_variants/query10a.tpl} +13 -14
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/query14.tpl → templates/query_variants/query14a.tpl} +30 -26
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query18.tpl → templates/query_variants/query18a.tpl} +40 -19
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query22.tpl → templates/query_variants/query22a.tpl} +31 -9
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/query27.tpl → templates/query_variants/query27a.tpl} +23 -10
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query35.tpl → templates/query_variants/query35a.tpl} +9 -8
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query36.tpl → templates/query_variants/query36a.tpl} +24 -12
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query51.tpl → templates/query_variants/query51a.tpl} +37 -20
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query5.tpl → templates/query_variants/query5a.tpl} +15 -10
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query67.tpl → templates/query_variants/query67a.tpl} +46 -18
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query70.tpl → templates/query_variants/query70a.tpl} +31 -27
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query77.tpl → templates/query_variants/query77a.tpl} +22 -15
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query80.tpl → templates/query_variants/query80a.tpl} +22 -8
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query86.tpl → templates/query_variants/query86a.tpl} +22 -13
- benchbox/_binaries/tpc-h/templates/dists.dss +836 -0
- benchbox/_binaries/tpc-h/templates/queries/1.sql +28 -0
- benchbox/_binaries/tpc-h/templates/queries/10.sql +38 -0
- benchbox/_binaries/tpc-h/templates/queries/11.sql +34 -0
- benchbox/_binaries/tpc-h/templates/queries/12.sql +35 -0
- benchbox/_binaries/tpc-h/templates/queries/13.sql +27 -0
- benchbox/_binaries/tpc-h/templates/queries/14.sql +20 -0
- benchbox/_binaries/tpc-h/templates/queries/15.sql +40 -0
- benchbox/_binaries/tpc-h/templates/queries/16.sql +37 -0
- benchbox/_binaries/tpc-h/templates/queries/17.sql +24 -0
- benchbox/_binaries/tpc-h/templates/queries/18.sql +39 -0
- benchbox/_binaries/tpc-h/templates/queries/19.sql +42 -0
- benchbox/_binaries/tpc-h/templates/queries/2.sql +50 -0
- benchbox/_binaries/tpc-h/templates/queries/20.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/21.sql +46 -0
- benchbox/_binaries/tpc-h/templates/queries/22.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/3.sql +29 -0
- benchbox/_binaries/tpc-h/templates/queries/4.sql +28 -0
- benchbox/_binaries/tpc-h/templates/queries/5.sql +31 -0
- benchbox/_binaries/tpc-h/templates/queries/6.sql +16 -0
- benchbox/_binaries/tpc-h/templates/queries/7.sql +46 -0
- benchbox/_binaries/tpc-h/templates/queries/8.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/9.sql +39 -0
- benchbox/_binaries/tpc-h/templates/variants/12a.sql +27 -0
- benchbox/_binaries/tpc-h/templates/variants/13a.sql +30 -0
- benchbox/_binaries/tpc-h/templates/variants/14a.sql +18 -0
- benchbox/_binaries/tpc-h/templates/variants/15a.sql +39 -0
- benchbox/_binaries/tpc-h/templates/variants/8a.sql +77 -0
- benchbox/base.py +88 -121
- benchbox/cli/benchmarks.py +3 -3
- benchbox/cli/commands/calculate_qphh.py +55 -14
- benchbox/cli/commands/checks.py +1 -4
- benchbox/cli/commands/convert.py +8 -3
- benchbox/cli/commands/metrics.py +55 -14
- benchbox/cli/commands/results.py +131 -3
- benchbox/cli/commands/run.py +157 -22
- benchbox/cli/commands/visualize.py +3 -3
- benchbox/cli/composite_params.py +1 -1
- benchbox/cli/config.py +13 -3
- benchbox/cli/database.py +3 -3
- benchbox/cli/dryrun.py +30 -4
- benchbox/cli/exceptions.py +2 -1
- benchbox/cli/execution_pipeline.py +2 -1
- benchbox/cli/orchestrator.py +25 -71
- benchbox/cli/tuning.py +1 -1
- benchbox/core/ai_primitives/benchmark.py +53 -0
- benchbox/core/ai_primitives/dataframe_operations.py +1217 -0
- benchbox/core/base_benchmark.py +90 -68
- benchbox/core/coffeeshop/queries.py +1 -1
- benchbox/core/coffeeshop/schema.py +1 -1
- benchbox/core/comparison/plotter.py +5 -4
- benchbox/core/dataframe/__init__.py +26 -0
- benchbox/core/dataframe/benchmark_suite.py +5 -4
- benchbox/core/dataframe/context.py +45 -0
- benchbox/core/dataframe/data_loader.py +180 -79
- benchbox/core/dataframe/maintenance_interface.py +866 -0
- benchbox/core/dryrun.py +152 -22
- benchbox/core/expected_results/registry.py +22 -5
- benchbox/core/manifest/io.py +4 -3
- benchbox/core/metadata_primitives/__init__.py +31 -0
- benchbox/core/metadata_primitives/benchmark.py +337 -0
- benchbox/core/metadata_primitives/dataframe_operations.py +1824 -0
- benchbox/core/platform_registry.py +134 -45
- benchbox/core/read_primitives/benchmark.py +56 -4
- benchbox/core/read_primitives/dataframe_queries.py +6547 -0
- benchbox/core/results/__init__.py +47 -6
- benchbox/core/results/builder.py +909 -0
- benchbox/core/results/database.py +5 -5
- benchbox/core/results/exporter.py +58 -96
- benchbox/core/results/filenames.py +102 -0
- benchbox/core/results/loader.py +10 -9
- benchbox/core/results/metrics.py +211 -0
- benchbox/core/results/models.py +3 -1
- benchbox/core/results/normalizer.py +346 -0
- benchbox/core/results/platform_info.py +235 -0
- benchbox/core/results/query_normalizer.py +200 -0
- benchbox/core/results/schema.py +368 -69
- benchbox/core/runner/conversion.py +2 -0
- benchbox/core/runner/dataframe_runner.py +135 -131
- benchbox/core/runner/runner.py +111 -18
- benchbox/core/schemas.py +145 -3
- benchbox/core/ssb/generator.py +14 -2
- benchbox/core/tpc_compliance.py +4 -4
- benchbox/core/tpc_metrics.py +9 -4
- benchbox/core/tpcdi/generator/manifest.py +15 -2
- benchbox/core/tpcds/benchmark/runner.py +3 -7
- benchbox/core/tpcds/c_tools.py +34 -28
- benchbox/core/tpcds/dataframe_queries/queries.py +44 -21
- benchbox/core/tpcds/generator/filesystem.py +23 -11
- benchbox/core/tpcds/generator/manager.py +3 -2
- benchbox/core/tpcds/maintenance_test.py +281 -0
- benchbox/core/tpcds/power_test.py +21 -11
- benchbox/core/tpcds/throughput_test.py +27 -9
- benchbox/core/tpcds_obt/etl/transformer.py +24 -5
- benchbox/core/tpch/dataframe_queries.py +46 -43
- benchbox/core/tpch/generator.py +21 -8
- benchbox/core/tpch/maintenance_test.py +87 -0
- benchbox/core/tpch/power_test.py +21 -5
- benchbox/core/tpch/queries.py +2 -7
- benchbox/core/tpch/streams.py +3 -19
- benchbox/core/transaction_primitives/benchmark.py +99 -0
- benchbox/core/transaction_primitives/dataframe_operations.py +1294 -0
- benchbox/core/transaction_primitives/generator.py +11 -4
- benchbox/core/visualization/__init__.py +2 -2
- benchbox/core/visualization/charts.py +4 -4
- benchbox/core/visualization/dependencies.py +1 -12
- benchbox/core/visualization/exporters.py +15 -26
- benchbox/core/visualization/result_plotter.py +90 -49
- benchbox/core/visualization/templates.py +6 -6
- benchbox/core/write_primitives/__init__.py +13 -0
- benchbox/core/write_primitives/benchmark.py +66 -0
- benchbox/core/write_primitives/dataframe_operations.py +912 -0
- benchbox/core/write_primitives/generator.py +11 -4
- benchbox/mcp/__init__.py +5 -1
- benchbox/mcp/errors.py +29 -0
- benchbox/mcp/resources/registry.py +12 -7
- benchbox/mcp/schemas.py +62 -0
- benchbox/mcp/server.py +17 -14
- benchbox/mcp/tools/__init__.py +3 -0
- benchbox/mcp/tools/analytics.py +550 -582
- benchbox/mcp/tools/benchmark.py +603 -611
- benchbox/mcp/tools/discovery.py +156 -205
- benchbox/mcp/tools/results.py +332 -533
- benchbox/mcp/tools/visualization.py +449 -0
- benchbox/platforms/__init__.py +740 -622
- benchbox/platforms/adapter_factory.py +6 -6
- benchbox/platforms/azure_synapse.py +3 -7
- benchbox/platforms/base/adapter.py +189 -49
- benchbox/platforms/base/cloud_spark/config.py +8 -0
- benchbox/platforms/base/cloud_spark/mixins.py +96 -0
- benchbox/platforms/base/cloud_spark/session.py +4 -2
- benchbox/platforms/base/cloud_spark/staging.py +15 -7
- benchbox/platforms/base/data_loading.py +315 -1
- benchbox/platforms/base/format_capabilities.py +37 -2
- benchbox/platforms/base/utils.py +6 -4
- benchbox/platforms/bigquery.py +5 -6
- benchbox/platforms/clickhouse_cloud.py +263 -0
- benchbox/platforms/databricks/adapter.py +16 -15
- benchbox/platforms/databricks/dataframe_adapter.py +4 -1
- benchbox/platforms/dataframe/__init__.py +31 -0
- benchbox/platforms/dataframe/benchmark_mixin.py +779 -0
- benchbox/platforms/dataframe/cudf_df.py +3 -3
- benchbox/platforms/dataframe/dask_df.py +3 -3
- benchbox/platforms/dataframe/datafusion_df.py +152 -15
- benchbox/platforms/dataframe/delta_lake_maintenance.py +341 -0
- benchbox/platforms/dataframe/ducklake_maintenance.py +402 -0
- benchbox/platforms/dataframe/expression_family.py +47 -8
- benchbox/platforms/dataframe/hudi_maintenance.py +437 -0
- benchbox/platforms/dataframe/iceberg_maintenance.py +605 -0
- benchbox/platforms/dataframe/modin_df.py +3 -3
- benchbox/platforms/dataframe/pandas_df.py +3 -3
- benchbox/platforms/dataframe/pandas_family.py +59 -8
- benchbox/platforms/dataframe/platform_checker.py +16 -49
- benchbox/platforms/dataframe/polars_df.py +14 -12
- benchbox/platforms/dataframe/polars_maintenance.py +630 -0
- benchbox/platforms/dataframe/pyspark_df.py +15 -0
- benchbox/platforms/dataframe/pyspark_maintenance.py +613 -0
- benchbox/platforms/datafusion.py +5 -6
- benchbox/platforms/duckdb.py +2 -1
- benchbox/platforms/fabric_warehouse.py +15 -15
- benchbox/platforms/firebolt.py +3 -2
- benchbox/platforms/influxdb/adapter.py +7 -3
- benchbox/platforms/motherduck.py +3 -2
- benchbox/platforms/onehouse/__init__.py +39 -0
- benchbox/platforms/onehouse/onehouse_client.py +509 -0
- benchbox/platforms/onehouse/quanton_adapter.py +646 -0
- benchbox/platforms/postgresql.py +5 -9
- benchbox/platforms/presto.py +2 -2
- benchbox/platforms/pyspark/session.py +3 -3
- benchbox/platforms/pyspark/sql_adapter.py +2 -3
- benchbox/platforms/redshift.py +7 -7
- benchbox/platforms/snowflake.py +4 -4
- benchbox/platforms/snowpark_connect.py +2 -1
- benchbox/platforms/trino.py +2 -2
- benchbox/release/__init__.py +17 -0
- benchbox/release/content_validation.py +745 -0
- benchbox/release/workflow.py +17 -0
- benchbox/utils/VERSION_MANAGEMENT.md +1 -1
- benchbox/utils/cloud_storage.py +7 -5
- benchbox/utils/compression.py +8 -8
- benchbox/utils/compression_mixin.py +2 -1
- benchbox/utils/data_validation.py +23 -14
- benchbox/utils/dependencies.py +47 -7
- benchbox/utils/file_format.py +407 -0
- benchbox/utils/format_converters/__init__.py +5 -1
- benchbox/utils/format_converters/ducklake_converter.py +227 -0
- benchbox/utils/format_converters/vortex_converter.py +168 -0
- benchbox/utils/tpc_compilation.py +43 -0
- benchbox/utils/version.py +14 -2
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/METADATA +15 -15
- benchbox-0.1.1.dist-info/RECORD +839 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/WHEEL +1 -1
- benchbox/_binaries/tpc-ds/darwin-arm64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/templates.lst +0 -99
- benchbox-0.1.0.dist-info/RECORD +0 -1192
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/README +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/ansi.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/db2.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/netezza.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/oracle.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query1.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query10.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query11.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query12.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query13.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query14.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query15.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query16.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query17.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query18.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query19.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query2.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query20.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query21.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query22.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query23.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query24.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query25.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query26.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query27.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query28.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query29.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query3.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query30.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query31.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query32.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query33.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query34.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query35.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query36.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query37.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query38.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query39.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query4.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query40.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query41.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query42.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query43.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query44.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query45.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query46.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query47.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query48.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query49.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query5.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query50.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query51.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query52.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query53.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query54.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query55.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query56.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query57.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query58.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query59.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query6.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query60.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query61.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query62.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query63.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query64.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query65.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query66.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query67.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query68.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query69.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query7.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query70.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query71.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query73.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query74.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query75.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query76.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query77.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query78.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query79.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query8.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query80.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query81.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query82.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query83.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query84.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query85.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query86.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query87.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query88.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query89.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query9.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query90.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query91.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query92.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query93.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query94.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query95.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query96.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query97.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query98.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query99.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/templates.lst +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/entry_points.txt +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,630 @@
|
|
|
1
|
+
"""Polars Maintenance Operations Implementation.
|
|
2
|
+
|
|
3
|
+
This module implements DataFrame maintenance operations for Polars,
|
|
4
|
+
enabling TPC-H RF1/RF2 and TPC-DS maintenance testing.
|
|
5
|
+
|
|
6
|
+
Polars supports all maintenance operations via read-modify-write pattern:
|
|
7
|
+
- INSERT: Append new data to Parquet files
|
|
8
|
+
- DELETE: Read-filter-write pattern (rewrites table)
|
|
9
|
+
- UPDATE: Read-modify-write pattern (rewrites table)
|
|
10
|
+
- MERGE: Read-join-write pattern (rewrites table)
|
|
11
|
+
|
|
12
|
+
Implementation Note:
|
|
13
|
+
Polars operates entirely in RAM, so the full table rewrite approach is
|
|
14
|
+
acceptable and matches Polars' standard data processing patterns. For
|
|
15
|
+
very large datasets that exceed available RAM, use Delta Lake or Iceberg
|
|
16
|
+
which support incremental operations with transaction logs.
|
|
17
|
+
|
|
18
|
+
The read-modify-write pattern provides:
|
|
19
|
+
- Full TPC-H and TPC-DS maintenance compliance
|
|
20
|
+
- Atomic operations via backup-and-swap
|
|
21
|
+
- Consistent behavior with Polars' immutable data model
|
|
22
|
+
|
|
23
|
+
Copyright 2026 Joe Harris / BenchBox Project
|
|
24
|
+
|
|
25
|
+
Licensed under the MIT License. See LICENSE file in the project root for details.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import logging
|
|
31
|
+
import os
|
|
32
|
+
import shutil
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
from typing import TYPE_CHECKING, Any
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
import polars as pl
|
|
38
|
+
|
|
39
|
+
POLARS_AVAILABLE = True
|
|
40
|
+
except ImportError:
|
|
41
|
+
pl = None # type: ignore[assignment]
|
|
42
|
+
POLARS_AVAILABLE = False
|
|
43
|
+
|
|
44
|
+
from benchbox.core.dataframe.maintenance_interface import (
|
|
45
|
+
POLARS_CAPABILITIES,
|
|
46
|
+
BaseDataFrameMaintenanceOperations,
|
|
47
|
+
DataFrameMaintenanceCapabilities,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
if TYPE_CHECKING:
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
logger = logging.getLogger(__name__)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class PolarsMaintenanceOperations(BaseDataFrameMaintenanceOperations):
|
|
57
|
+
"""Polars maintenance operations implementation.
|
|
58
|
+
|
|
59
|
+
Implements all TPC maintenance operations via read-modify-write patterns:
|
|
60
|
+
- INSERT: Append new Parquet files
|
|
61
|
+
- DELETE: Read-filter-write (rewrites table without matching rows)
|
|
62
|
+
- UPDATE: Read-modify-write (rewrites table with modified values)
|
|
63
|
+
- MERGE: Read-join-write (rewrites table with merged data)
|
|
64
|
+
|
|
65
|
+
All operations use atomic backup-and-swap to ensure data integrity.
|
|
66
|
+
|
|
67
|
+
Note:
|
|
68
|
+
Polars operates entirely in RAM, so the full table rewrite approach
|
|
69
|
+
is standard. For datasets exceeding available RAM, use Delta Lake
|
|
70
|
+
or Iceberg which support incremental operations.
|
|
71
|
+
|
|
72
|
+
Example:
|
|
73
|
+
ops = PolarsMaintenanceOperations()
|
|
74
|
+
|
|
75
|
+
# Insert new rows
|
|
76
|
+
result = ops.insert_rows(
|
|
77
|
+
table_path="/data/orders",
|
|
78
|
+
dataframe=new_orders_df,
|
|
79
|
+
mode="append"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Delete old rows (rewrites table)
|
|
83
|
+
result = ops.delete_rows(
|
|
84
|
+
table_path="/data/orders",
|
|
85
|
+
condition="order_date < '2020-01-01'"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Update rows (rewrites table)
|
|
89
|
+
result = ops.update_rows(
|
|
90
|
+
table_path="/data/orders",
|
|
91
|
+
condition="status = 'pending'",
|
|
92
|
+
updates={"status": "'cancelled'"}
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Merge/upsert (rewrites table)
|
|
96
|
+
result = ops.merge_rows(
|
|
97
|
+
table_path="/data/orders",
|
|
98
|
+
source_dataframe=new_orders,
|
|
99
|
+
merge_condition="target.id = source.id",
|
|
100
|
+
when_matched={"status": "source.status"},
|
|
101
|
+
when_not_matched={"id": "source.id", "status": "source.status"}
|
|
102
|
+
)
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
def __init__(self, working_dir: str | Path | None = None) -> None:
|
|
106
|
+
"""Initialize Polars maintenance operations.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
working_dir: Optional working directory for temporary files
|
|
110
|
+
|
|
111
|
+
Raises:
|
|
112
|
+
ImportError: If Polars is not installed
|
|
113
|
+
"""
|
|
114
|
+
super().__init__()
|
|
115
|
+
|
|
116
|
+
if not POLARS_AVAILABLE:
|
|
117
|
+
raise ImportError(
|
|
118
|
+
"Polars is not installed. Install with: pip install polars\n"
|
|
119
|
+
"For TPC-H/TPC-DS maintenance tests, install the full package:\n"
|
|
120
|
+
"pip install 'benchbox[dataframe]'"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
self.working_dir = Path(working_dir) if working_dir else None
|
|
124
|
+
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
|
125
|
+
|
|
126
|
+
def _get_capabilities(self) -> DataFrameMaintenanceCapabilities:
|
|
127
|
+
"""Return Polars maintenance capabilities.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
POLARS_CAPABILITIES (insert + partition delete only)
|
|
131
|
+
"""
|
|
132
|
+
return POLARS_CAPABILITIES
|
|
133
|
+
|
|
134
|
+
def _do_insert(
|
|
135
|
+
self,
|
|
136
|
+
table_path: Path | str,
|
|
137
|
+
dataframe: Any,
|
|
138
|
+
partition_columns: list[str] | None,
|
|
139
|
+
mode: str,
|
|
140
|
+
) -> int:
|
|
141
|
+
"""Insert rows by writing to Parquet files.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
table_path: Path to the table directory
|
|
145
|
+
dataframe: Polars DataFrame to insert
|
|
146
|
+
partition_columns: Columns to partition by (creates subdirectories)
|
|
147
|
+
mode: Write mode ("append" or "overwrite")
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
Number of rows inserted
|
|
151
|
+
|
|
152
|
+
Raises:
|
|
153
|
+
ValueError: If mode is invalid
|
|
154
|
+
TypeError: If dataframe is not a Polars DataFrame
|
|
155
|
+
"""
|
|
156
|
+
table_path = Path(table_path)
|
|
157
|
+
|
|
158
|
+
# Validate and convert dataframe
|
|
159
|
+
if isinstance(dataframe, pl.LazyFrame):
|
|
160
|
+
df = dataframe.collect()
|
|
161
|
+
elif isinstance(dataframe, pl.DataFrame):
|
|
162
|
+
df = dataframe
|
|
163
|
+
else:
|
|
164
|
+
raise TypeError(f"Expected Polars DataFrame or LazyFrame, got {type(dataframe)}")
|
|
165
|
+
|
|
166
|
+
row_count = df.height
|
|
167
|
+
|
|
168
|
+
if row_count == 0:
|
|
169
|
+
self.logger.info("No rows to insert")
|
|
170
|
+
return 0
|
|
171
|
+
|
|
172
|
+
# Ensure directory exists
|
|
173
|
+
table_path.mkdir(parents=True, exist_ok=True)
|
|
174
|
+
|
|
175
|
+
if mode == "overwrite":
|
|
176
|
+
# Remove existing files
|
|
177
|
+
for f in table_path.glob("*.parquet"):
|
|
178
|
+
f.unlink()
|
|
179
|
+
|
|
180
|
+
if partition_columns:
|
|
181
|
+
# Write partitioned data
|
|
182
|
+
self._write_partitioned(df, table_path, partition_columns, mode)
|
|
183
|
+
else:
|
|
184
|
+
# Write to a single file or append
|
|
185
|
+
self._write_single(df, table_path, mode)
|
|
186
|
+
|
|
187
|
+
self.logger.info(f"Inserted {row_count} rows to {table_path}")
|
|
188
|
+
return row_count
|
|
189
|
+
|
|
190
|
+
def _write_single(self, df: Any, table_path: Path, mode: str) -> None:
|
|
191
|
+
"""Write DataFrame to a single Parquet file.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
df: Polars DataFrame
|
|
195
|
+
table_path: Directory path
|
|
196
|
+
mode: "append" or "overwrite"
|
|
197
|
+
"""
|
|
198
|
+
if mode == "append":
|
|
199
|
+
# Generate unique filename
|
|
200
|
+
existing = list(table_path.glob("part-*.parquet"))
|
|
201
|
+
part_num = len(existing)
|
|
202
|
+
file_path = table_path / f"part-{part_num:05d}.parquet"
|
|
203
|
+
else:
|
|
204
|
+
file_path = table_path / "part-00000.parquet"
|
|
205
|
+
|
|
206
|
+
df.write_parquet(file_path)
|
|
207
|
+
self.logger.debug(f"Wrote {df.height} rows to {file_path}")
|
|
208
|
+
|
|
209
|
+
def _write_partitioned(
|
|
210
|
+
self,
|
|
211
|
+
df: Any,
|
|
212
|
+
table_path: Path,
|
|
213
|
+
partition_columns: list[str],
|
|
214
|
+
mode: str,
|
|
215
|
+
) -> None:
|
|
216
|
+
"""Write DataFrame with Hive-style partitioning.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
df: Polars DataFrame
|
|
220
|
+
table_path: Base directory path
|
|
221
|
+
partition_columns: Columns to partition by
|
|
222
|
+
mode: Write mode
|
|
223
|
+
"""
|
|
224
|
+
# Group by partition columns
|
|
225
|
+
for partition_vals, partition_df in df.group_by(partition_columns):
|
|
226
|
+
# Build partition path
|
|
227
|
+
if isinstance(partition_vals, tuple):
|
|
228
|
+
parts = zip(partition_columns, partition_vals)
|
|
229
|
+
else:
|
|
230
|
+
parts = [(partition_columns[0], partition_vals)]
|
|
231
|
+
|
|
232
|
+
partition_path = table_path
|
|
233
|
+
for col, val in parts:
|
|
234
|
+
partition_path = partition_path / f"{col}={val}"
|
|
235
|
+
|
|
236
|
+
partition_path.mkdir(parents=True, exist_ok=True)
|
|
237
|
+
|
|
238
|
+
# Write partition data
|
|
239
|
+
self._write_single(partition_df, partition_path, mode)
|
|
240
|
+
|
|
241
|
+
def _do_delete(
|
|
242
|
+
self,
|
|
243
|
+
table_path: Path | str,
|
|
244
|
+
condition: str | Any,
|
|
245
|
+
) -> int:
|
|
246
|
+
"""Delete rows using read-filter-write pattern.
|
|
247
|
+
|
|
248
|
+
Since Polars doesn't support row-level deletes, we:
|
|
249
|
+
1. Read all existing data
|
|
250
|
+
2. Filter out rows matching the condition
|
|
251
|
+
3. Write the remaining data back
|
|
252
|
+
|
|
253
|
+
This is expensive for large datasets. For better performance,
|
|
254
|
+
use Delta Lake or Iceberg which support row-level deletes.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
table_path: Path to the table directory
|
|
258
|
+
condition: SQL-like condition string (e.g., "id > 100")
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
Number of rows deleted
|
|
262
|
+
|
|
263
|
+
Note:
|
|
264
|
+
The condition is parsed using Polars SQL syntax.
|
|
265
|
+
Example conditions:
|
|
266
|
+
- "order_date < '2020-01-01'"
|
|
267
|
+
- "status = 'cancelled'"
|
|
268
|
+
- "amount > 1000 AND region = 'US'"
|
|
269
|
+
"""
|
|
270
|
+
table_path = Path(table_path)
|
|
271
|
+
|
|
272
|
+
if not table_path.exists():
|
|
273
|
+
self.logger.warning(f"Table path does not exist: {table_path}")
|
|
274
|
+
return 0
|
|
275
|
+
|
|
276
|
+
# Read all existing data
|
|
277
|
+
parquet_files = list(table_path.glob("**/*.parquet"))
|
|
278
|
+
if not parquet_files:
|
|
279
|
+
self.logger.warning(f"No Parquet files found in {table_path}")
|
|
280
|
+
return 0
|
|
281
|
+
|
|
282
|
+
# Read all data
|
|
283
|
+
self.logger.debug(f"Reading {len(parquet_files)} files from {table_path}")
|
|
284
|
+
df = pl.scan_parquet(parquet_files).collect()
|
|
285
|
+
original_count = df.height
|
|
286
|
+
|
|
287
|
+
if original_count == 0:
|
|
288
|
+
return 0
|
|
289
|
+
|
|
290
|
+
# Apply filter to keep rows NOT matching the delete condition
|
|
291
|
+
# We need to negate the condition
|
|
292
|
+
try:
|
|
293
|
+
# Use Polars SQL to evaluate the condition
|
|
294
|
+
df_with_ctx = pl.SQLContext(register_globals=True)
|
|
295
|
+
df_with_ctx.register("__table__", df)
|
|
296
|
+
|
|
297
|
+
# Query to find rows TO DELETE (matching condition)
|
|
298
|
+
delete_query = f"SELECT COUNT(*) as cnt FROM __table__ WHERE {condition}"
|
|
299
|
+
delete_count_df = df_with_ctx.execute(delete_query).collect()
|
|
300
|
+
delete_count = delete_count_df["cnt"][0]
|
|
301
|
+
|
|
302
|
+
if delete_count == 0:
|
|
303
|
+
self.logger.info("No rows match delete condition")
|
|
304
|
+
return 0
|
|
305
|
+
|
|
306
|
+
# Query to keep rows NOT matching condition
|
|
307
|
+
keep_query = f"SELECT * FROM __table__ WHERE NOT ({condition})"
|
|
308
|
+
remaining_df = df_with_ctx.execute(keep_query).collect()
|
|
309
|
+
|
|
310
|
+
except Exception as e:
|
|
311
|
+
# Fallback: try to parse condition as Polars expression
|
|
312
|
+
self.logger.warning(f"SQL condition parsing failed: {e}. Trying expression parse.")
|
|
313
|
+
raise ValueError(
|
|
314
|
+
f"Failed to parse delete condition: {condition}\n"
|
|
315
|
+
f'Use SQL-like syntax: "column > value" or "column = \'value\'"\n'
|
|
316
|
+
f"Error: {e}"
|
|
317
|
+
) from e
|
|
318
|
+
|
|
319
|
+
rows_deleted = original_count - remaining_df.height
|
|
320
|
+
|
|
321
|
+
if rows_deleted > 0:
|
|
322
|
+
# Backup and rewrite
|
|
323
|
+
backup_path = table_path.parent / f"{table_path.name}_backup_{os.getpid()}"
|
|
324
|
+
try:
|
|
325
|
+
# Move existing files to backup
|
|
326
|
+
shutil.move(str(table_path), str(backup_path))
|
|
327
|
+
|
|
328
|
+
# Write remaining data
|
|
329
|
+
table_path.mkdir(parents=True, exist_ok=True)
|
|
330
|
+
if remaining_df.height > 0:
|
|
331
|
+
remaining_df.write_parquet(table_path / "part-00000.parquet")
|
|
332
|
+
|
|
333
|
+
# Remove backup on success
|
|
334
|
+
shutil.rmtree(backup_path)
|
|
335
|
+
|
|
336
|
+
except Exception as e:
|
|
337
|
+
# Restore from backup on failure
|
|
338
|
+
if backup_path.exists():
|
|
339
|
+
shutil.rmtree(table_path, ignore_errors=True)
|
|
340
|
+
shutil.move(str(backup_path), str(table_path))
|
|
341
|
+
raise RuntimeError(f"Delete failed, restored from backup: {e}") from e
|
|
342
|
+
|
|
343
|
+
self.logger.info(f"Deleted {rows_deleted} rows from {table_path}")
|
|
344
|
+
return rows_deleted
|
|
345
|
+
|
|
346
|
+
def _do_update(
|
|
347
|
+
self,
|
|
348
|
+
table_path: Path | str,
|
|
349
|
+
condition: str | Any,
|
|
350
|
+
updates: dict[str, Any],
|
|
351
|
+
) -> int:
|
|
352
|
+
"""Update rows using read-modify-write pattern.
|
|
353
|
+
|
|
354
|
+
Since Polars uses immutable DataFrames, we:
|
|
355
|
+
1. Read all existing data
|
|
356
|
+
2. Apply updates to rows matching the condition
|
|
357
|
+
3. Write the modified data back
|
|
358
|
+
|
|
359
|
+
This is Polars' standard approach for data modification. For datasets
|
|
360
|
+
exceeding available RAM, use Delta Lake or Iceberg.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
table_path: Path to the table directory
|
|
364
|
+
condition: SQL-like condition string (e.g., "status = 'pending'")
|
|
365
|
+
updates: Dict mapping column names to new values or expressions
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
Number of rows updated
|
|
369
|
+
|
|
370
|
+
Example:
|
|
371
|
+
result = ops.update_rows(
|
|
372
|
+
table_path="/data/orders",
|
|
373
|
+
condition="status = 'pending'",
|
|
374
|
+
updates={"status": "'cancelled'", "updated_at": "CURRENT_DATE"}
|
|
375
|
+
)
|
|
376
|
+
"""
|
|
377
|
+
table_path = Path(table_path)
|
|
378
|
+
|
|
379
|
+
if not table_path.exists():
|
|
380
|
+
self.logger.warning(f"Table path does not exist: {table_path}")
|
|
381
|
+
return 0
|
|
382
|
+
|
|
383
|
+
# Read all existing data
|
|
384
|
+
parquet_files = list(table_path.glob("**/*.parquet"))
|
|
385
|
+
if not parquet_files:
|
|
386
|
+
self.logger.warning(f"No Parquet files found in {table_path}")
|
|
387
|
+
return 0
|
|
388
|
+
|
|
389
|
+
df = pl.scan_parquet(parquet_files).collect()
|
|
390
|
+
original_count = df.height
|
|
391
|
+
|
|
392
|
+
if original_count == 0:
|
|
393
|
+
return 0
|
|
394
|
+
|
|
395
|
+
try:
|
|
396
|
+
# Use Polars SQL to identify rows to update and apply changes
|
|
397
|
+
df_with_ctx = pl.SQLContext(register_globals=True)
|
|
398
|
+
df_with_ctx.register("__table__", df)
|
|
399
|
+
|
|
400
|
+
# Count matching rows
|
|
401
|
+
count_query = f"SELECT COUNT(*) as cnt FROM __table__ WHERE {condition}"
|
|
402
|
+
match_count = df_with_ctx.execute(count_query).collect()["cnt"][0]
|
|
403
|
+
|
|
404
|
+
if match_count == 0:
|
|
405
|
+
self.logger.info("No rows match update condition")
|
|
406
|
+
return 0
|
|
407
|
+
|
|
408
|
+
# Polars SQL doesn't support UPDATE directly, so we use CASE expressions
|
|
409
|
+
# Build a SELECT that applies updates via CASE WHEN
|
|
410
|
+
select_cols = []
|
|
411
|
+
for col_name in df.columns:
|
|
412
|
+
if col_name in updates:
|
|
413
|
+
value = updates[col_name]
|
|
414
|
+
select_cols.append(f"CASE WHEN ({condition}) THEN {value} ELSE {col_name} END AS {col_name}")
|
|
415
|
+
else:
|
|
416
|
+
select_cols.append(col_name)
|
|
417
|
+
|
|
418
|
+
update_query = f"SELECT {', '.join(select_cols)} FROM __table__"
|
|
419
|
+
updated_df = df_with_ctx.execute(update_query).collect()
|
|
420
|
+
|
|
421
|
+
except Exception as e:
|
|
422
|
+
raise ValueError(
|
|
423
|
+
f"Failed to parse update condition or values: {condition}\n"
|
|
424
|
+
f"Updates: {updates}\n"
|
|
425
|
+
f"Use SQL-like syntax for conditions and values.\n"
|
|
426
|
+
f"Error: {e}"
|
|
427
|
+
) from e
|
|
428
|
+
|
|
429
|
+
# Atomic write with backup
|
|
430
|
+
self._atomic_rewrite(table_path, updated_df)
|
|
431
|
+
|
|
432
|
+
self.logger.info(f"Updated {match_count} rows in {table_path}")
|
|
433
|
+
return match_count
|
|
434
|
+
|
|
435
|
+
def _do_merge(
|
|
436
|
+
self,
|
|
437
|
+
table_path: Path | str,
|
|
438
|
+
source_dataframe: Any,
|
|
439
|
+
merge_condition: str | Any,
|
|
440
|
+
when_matched: dict[str, Any] | None,
|
|
441
|
+
when_not_matched: dict[str, Any] | None,
|
|
442
|
+
) -> int:
|
|
443
|
+
"""Merge (upsert) rows using read-join-write pattern.
|
|
444
|
+
|
|
445
|
+
Since Polars uses immutable DataFrames, we:
|
|
446
|
+
1. Read target table
|
|
447
|
+
2. Join with source on merge key
|
|
448
|
+
3. Apply when_matched updates to matching rows
|
|
449
|
+
4. Insert when_not_matched rows
|
|
450
|
+
5. Write the merged data back
|
|
451
|
+
|
|
452
|
+
This is Polars' standard approach for upsert operations.
|
|
453
|
+
|
|
454
|
+
Args:
|
|
455
|
+
table_path: Path to the table directory
|
|
456
|
+
source_dataframe: Polars DataFrame with source data
|
|
457
|
+
merge_condition: Join condition (e.g., "target.id = source.id")
|
|
458
|
+
when_matched: Dict of column updates for matched rows
|
|
459
|
+
when_not_matched: Dict of column values for inserted rows
|
|
460
|
+
|
|
461
|
+
Returns:
|
|
462
|
+
Number of rows affected (updated + inserted)
|
|
463
|
+
|
|
464
|
+
Example:
|
|
465
|
+
result = ops.merge_rows(
|
|
466
|
+
table_path="/data/dim_customer",
|
|
467
|
+
source_dataframe=updated_customers,
|
|
468
|
+
merge_condition="target.c_custkey = source.c_custkey",
|
|
469
|
+
when_matched={"c_name": "source.c_name", "c_address": "source.c_address"},
|
|
470
|
+
when_not_matched={"c_custkey": "source.c_custkey", "c_name": "source.c_name"}
|
|
471
|
+
)
|
|
472
|
+
"""
|
|
473
|
+
table_path = Path(table_path)
|
|
474
|
+
|
|
475
|
+
# Convert source to Polars DataFrame
|
|
476
|
+
if isinstance(source_dataframe, pl.LazyFrame):
|
|
477
|
+
source_df = source_dataframe.collect()
|
|
478
|
+
elif isinstance(source_dataframe, pl.DataFrame):
|
|
479
|
+
source_df = source_dataframe
|
|
480
|
+
else:
|
|
481
|
+
raise TypeError(f"Expected Polars DataFrame or LazyFrame, got {type(source_dataframe)}")
|
|
482
|
+
|
|
483
|
+
source_count = source_df.height
|
|
484
|
+
if source_count == 0:
|
|
485
|
+
self.logger.info("Source DataFrame is empty, nothing to merge")
|
|
486
|
+
return 0
|
|
487
|
+
|
|
488
|
+
# Read target table
|
|
489
|
+
if not table_path.exists():
|
|
490
|
+
# No target table - just insert all source rows
|
|
491
|
+
self.logger.info(f"Target table doesn't exist, inserting {source_count} rows")
|
|
492
|
+
return self._do_insert(table_path, source_df, None, "append")
|
|
493
|
+
|
|
494
|
+
parquet_files = list(table_path.glob("**/*.parquet"))
|
|
495
|
+
if not parquet_files:
|
|
496
|
+
return self._do_insert(table_path, source_df, None, "append")
|
|
497
|
+
|
|
498
|
+
target_df = pl.scan_parquet(parquet_files).collect()
|
|
499
|
+
|
|
500
|
+
# Parse merge condition to extract key column(s)
|
|
501
|
+
# Expected format: "target.col = source.col" or just "col"
|
|
502
|
+
merge_key = self._parse_merge_key(str(merge_condition))
|
|
503
|
+
|
|
504
|
+
rows_updated = 0
|
|
505
|
+
rows_inserted = 0
|
|
506
|
+
|
|
507
|
+
# Identify matching and non-matching rows
|
|
508
|
+
source_keys = set(source_df[merge_key].to_list())
|
|
509
|
+
target_keys = set(target_df[merge_key].to_list())
|
|
510
|
+
|
|
511
|
+
matching_keys = source_keys & target_keys
|
|
512
|
+
new_keys = source_keys - target_keys
|
|
513
|
+
|
|
514
|
+
# Apply when_matched updates
|
|
515
|
+
if when_matched and matching_keys:
|
|
516
|
+
# Update matching rows in target with values from source
|
|
517
|
+
for key_val in matching_keys:
|
|
518
|
+
# Get source row for this key
|
|
519
|
+
source_row = source_df.filter(pl.col(merge_key) == key_val)
|
|
520
|
+
|
|
521
|
+
# Build update expressions
|
|
522
|
+
for col, expr in when_matched.items():
|
|
523
|
+
if isinstance(expr, str) and expr.startswith("source."):
|
|
524
|
+
source_col = expr[7:] # Remove "source." prefix
|
|
525
|
+
new_value = source_row[source_col][0]
|
|
526
|
+
target_df = target_df.with_columns(
|
|
527
|
+
pl.when(pl.col(merge_key) == key_val)
|
|
528
|
+
.then(pl.lit(new_value))
|
|
529
|
+
.otherwise(pl.col(col))
|
|
530
|
+
.alias(col)
|
|
531
|
+
)
|
|
532
|
+
rows_updated += 1
|
|
533
|
+
|
|
534
|
+
# Insert when_not_matched rows
|
|
535
|
+
if when_not_matched and new_keys:
|
|
536
|
+
new_rows_df = source_df.filter(pl.col(merge_key).is_in(list(new_keys)))
|
|
537
|
+
rows_inserted = new_rows_df.height
|
|
538
|
+
|
|
539
|
+
# Ensure columns match target schema
|
|
540
|
+
for col in target_df.columns:
|
|
541
|
+
if col not in new_rows_df.columns:
|
|
542
|
+
# Add missing column with null
|
|
543
|
+
new_rows_df = new_rows_df.with_columns(pl.lit(None).alias(col))
|
|
544
|
+
|
|
545
|
+
# Select only target columns in correct order
|
|
546
|
+
new_rows_df = new_rows_df.select(target_df.columns)
|
|
547
|
+
|
|
548
|
+
# Append new rows
|
|
549
|
+
target_df = pl.concat([target_df, new_rows_df])
|
|
550
|
+
|
|
551
|
+
# Atomic write
|
|
552
|
+
self._atomic_rewrite(table_path, target_df)
|
|
553
|
+
|
|
554
|
+
total_affected = rows_updated + rows_inserted
|
|
555
|
+
self.logger.info(f"Merged {total_affected} rows ({rows_updated} updated, {rows_inserted} inserted)")
|
|
556
|
+
return total_affected
|
|
557
|
+
|
|
558
|
+
def _parse_merge_key(self, merge_condition: str) -> str:
|
|
559
|
+
"""Extract merge key column from condition string.
|
|
560
|
+
|
|
561
|
+
Args:
|
|
562
|
+
merge_condition: Condition like "target.id = source.id" or "id"
|
|
563
|
+
|
|
564
|
+
Returns:
|
|
565
|
+
Column name to use as merge key
|
|
566
|
+
"""
|
|
567
|
+
condition = merge_condition.strip()
|
|
568
|
+
|
|
569
|
+
# Handle "target.col = source.col" format
|
|
570
|
+
if "=" in condition:
|
|
571
|
+
left, right = condition.split("=", 1)
|
|
572
|
+
left = left.strip()
|
|
573
|
+
right = right.strip()
|
|
574
|
+
|
|
575
|
+
# Extract column name from "target.col" or "source.col"
|
|
576
|
+
if "." in left:
|
|
577
|
+
return left.split(".", 1)[1]
|
|
578
|
+
return left
|
|
579
|
+
|
|
580
|
+
# Handle simple column name
|
|
581
|
+
return condition
|
|
582
|
+
|
|
583
|
+
def _atomic_rewrite(self, table_path: Path, df: Any) -> None:
|
|
584
|
+
"""Atomically rewrite table using backup-and-swap.
|
|
585
|
+
|
|
586
|
+
Args:
|
|
587
|
+
table_path: Path to table directory
|
|
588
|
+
df: New DataFrame to write
|
|
589
|
+
|
|
590
|
+
Raises:
|
|
591
|
+
RuntimeError: If write fails (original data is restored)
|
|
592
|
+
"""
|
|
593
|
+
backup_path = table_path.parent / f"{table_path.name}_backup_{os.getpid()}"
|
|
594
|
+
|
|
595
|
+
try:
|
|
596
|
+
# Move existing files to backup
|
|
597
|
+
if table_path.exists():
|
|
598
|
+
shutil.move(str(table_path), str(backup_path))
|
|
599
|
+
|
|
600
|
+
# Write new data
|
|
601
|
+
table_path.mkdir(parents=True, exist_ok=True)
|
|
602
|
+
if df.height > 0:
|
|
603
|
+
df.write_parquet(table_path / "part-00000.parquet")
|
|
604
|
+
|
|
605
|
+
# Remove backup on success
|
|
606
|
+
if backup_path.exists():
|
|
607
|
+
shutil.rmtree(backup_path)
|
|
608
|
+
|
|
609
|
+
except Exception as e:
|
|
610
|
+
# Restore from backup on failure
|
|
611
|
+
if backup_path.exists():
|
|
612
|
+
shutil.rmtree(table_path, ignore_errors=True)
|
|
613
|
+
shutil.move(str(backup_path), str(table_path))
|
|
614
|
+
raise RuntimeError(f"Atomic rewrite failed, restored from backup: {e}") from e
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
def get_polars_maintenance_operations(working_dir: str | Path | None = None) -> PolarsMaintenanceOperations | None:
|
|
618
|
+
"""Get Polars maintenance operations if Polars is available.
|
|
619
|
+
|
|
620
|
+
Args:
|
|
621
|
+
working_dir: Optional working directory
|
|
622
|
+
|
|
623
|
+
Returns:
|
|
624
|
+
PolarsMaintenanceOperations if Polars is available, None otherwise
|
|
625
|
+
"""
|
|
626
|
+
if not POLARS_AVAILABLE:
|
|
627
|
+
logger.debug("Polars not available, cannot create maintenance operations")
|
|
628
|
+
return None
|
|
629
|
+
|
|
630
|
+
return PolarsMaintenanceOperations(working_dir=working_dir)
|
|
@@ -559,6 +559,21 @@ class PySparkDataFrameAdapter(ExpressionFamilyAdapter[PySparkDF, PySparkLazyDF,
|
|
|
559
559
|
# Return first column value
|
|
560
560
|
return first_row[0]
|
|
561
561
|
|
|
562
|
+
def scalar_to_df(self, data: dict[str, Any]) -> PySparkDF:
|
|
563
|
+
"""Create a single-row PySpark DataFrame from scalar values.
|
|
564
|
+
|
|
565
|
+
Args:
|
|
566
|
+
data: Dictionary mapping column names to scalar values
|
|
567
|
+
|
|
568
|
+
Returns:
|
|
569
|
+
PySpark DataFrame with a single row
|
|
570
|
+
"""
|
|
571
|
+
self._ensure_spark()
|
|
572
|
+
from pyspark.sql import Row
|
|
573
|
+
|
|
574
|
+
row = Row(**data)
|
|
575
|
+
return self._spark.createDataFrame([row])
|
|
576
|
+
|
|
562
577
|
# =========================================================================
|
|
563
578
|
# Window Functions
|
|
564
579
|
# =========================================================================
|