benchbox 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchbox/__init__.py +1 -1
- benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query72.tpl +1 -1
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/ansi.tpl → templates/query_templates/sqlserver.tpl} +1 -1
- benchbox/_binaries/tpc-ds/templates/query_variants/README +6 -0
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query10.tpl → templates/query_variants/query10a.tpl} +13 -14
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/query14.tpl → templates/query_variants/query14a.tpl} +30 -26
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query18.tpl → templates/query_variants/query18a.tpl} +40 -19
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query22.tpl → templates/query_variants/query22a.tpl} +31 -9
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/query27.tpl → templates/query_variants/query27a.tpl} +23 -10
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query35.tpl → templates/query_variants/query35a.tpl} +9 -8
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query36.tpl → templates/query_variants/query36a.tpl} +24 -12
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query51.tpl → templates/query_variants/query51a.tpl} +37 -20
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query5.tpl → templates/query_variants/query5a.tpl} +15 -10
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query67.tpl → templates/query_variants/query67a.tpl} +46 -18
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query70.tpl → templates/query_variants/query70a.tpl} +31 -27
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query77.tpl → templates/query_variants/query77a.tpl} +22 -15
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query80.tpl → templates/query_variants/query80a.tpl} +22 -8
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query86.tpl → templates/query_variants/query86a.tpl} +22 -13
- benchbox/_binaries/tpc-h/templates/dists.dss +836 -0
- benchbox/_binaries/tpc-h/templates/queries/1.sql +28 -0
- benchbox/_binaries/tpc-h/templates/queries/10.sql +38 -0
- benchbox/_binaries/tpc-h/templates/queries/11.sql +34 -0
- benchbox/_binaries/tpc-h/templates/queries/12.sql +35 -0
- benchbox/_binaries/tpc-h/templates/queries/13.sql +27 -0
- benchbox/_binaries/tpc-h/templates/queries/14.sql +20 -0
- benchbox/_binaries/tpc-h/templates/queries/15.sql +40 -0
- benchbox/_binaries/tpc-h/templates/queries/16.sql +37 -0
- benchbox/_binaries/tpc-h/templates/queries/17.sql +24 -0
- benchbox/_binaries/tpc-h/templates/queries/18.sql +39 -0
- benchbox/_binaries/tpc-h/templates/queries/19.sql +42 -0
- benchbox/_binaries/tpc-h/templates/queries/2.sql +50 -0
- benchbox/_binaries/tpc-h/templates/queries/20.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/21.sql +46 -0
- benchbox/_binaries/tpc-h/templates/queries/22.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/3.sql +29 -0
- benchbox/_binaries/tpc-h/templates/queries/4.sql +28 -0
- benchbox/_binaries/tpc-h/templates/queries/5.sql +31 -0
- benchbox/_binaries/tpc-h/templates/queries/6.sql +16 -0
- benchbox/_binaries/tpc-h/templates/queries/7.sql +46 -0
- benchbox/_binaries/tpc-h/templates/queries/8.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/9.sql +39 -0
- benchbox/_binaries/tpc-h/templates/variants/12a.sql +27 -0
- benchbox/_binaries/tpc-h/templates/variants/13a.sql +30 -0
- benchbox/_binaries/tpc-h/templates/variants/14a.sql +18 -0
- benchbox/_binaries/tpc-h/templates/variants/15a.sql +39 -0
- benchbox/_binaries/tpc-h/templates/variants/8a.sql +77 -0
- benchbox/base.py +88 -121
- benchbox/cli/benchmarks.py +3 -3
- benchbox/cli/commands/calculate_qphh.py +55 -14
- benchbox/cli/commands/checks.py +1 -4
- benchbox/cli/commands/convert.py +8 -3
- benchbox/cli/commands/metrics.py +55 -14
- benchbox/cli/commands/results.py +131 -3
- benchbox/cli/commands/run.py +157 -22
- benchbox/cli/commands/visualize.py +3 -3
- benchbox/cli/composite_params.py +1 -1
- benchbox/cli/config.py +13 -3
- benchbox/cli/database.py +3 -3
- benchbox/cli/dryrun.py +30 -4
- benchbox/cli/exceptions.py +2 -1
- benchbox/cli/execution_pipeline.py +2 -1
- benchbox/cli/orchestrator.py +25 -71
- benchbox/cli/tuning.py +1 -1
- benchbox/core/ai_primitives/benchmark.py +53 -0
- benchbox/core/ai_primitives/dataframe_operations.py +1217 -0
- benchbox/core/base_benchmark.py +90 -68
- benchbox/core/coffeeshop/queries.py +1 -1
- benchbox/core/coffeeshop/schema.py +1 -1
- benchbox/core/comparison/plotter.py +5 -4
- benchbox/core/dataframe/__init__.py +26 -0
- benchbox/core/dataframe/benchmark_suite.py +5 -4
- benchbox/core/dataframe/context.py +45 -0
- benchbox/core/dataframe/data_loader.py +180 -79
- benchbox/core/dataframe/maintenance_interface.py +866 -0
- benchbox/core/dryrun.py +152 -22
- benchbox/core/expected_results/registry.py +22 -5
- benchbox/core/manifest/io.py +4 -3
- benchbox/core/metadata_primitives/__init__.py +31 -0
- benchbox/core/metadata_primitives/benchmark.py +337 -0
- benchbox/core/metadata_primitives/dataframe_operations.py +1824 -0
- benchbox/core/platform_registry.py +134 -45
- benchbox/core/read_primitives/benchmark.py +56 -4
- benchbox/core/read_primitives/dataframe_queries.py +6547 -0
- benchbox/core/results/__init__.py +47 -6
- benchbox/core/results/builder.py +909 -0
- benchbox/core/results/database.py +5 -5
- benchbox/core/results/exporter.py +58 -96
- benchbox/core/results/filenames.py +102 -0
- benchbox/core/results/loader.py +10 -9
- benchbox/core/results/metrics.py +211 -0
- benchbox/core/results/models.py +3 -1
- benchbox/core/results/normalizer.py +346 -0
- benchbox/core/results/platform_info.py +235 -0
- benchbox/core/results/query_normalizer.py +200 -0
- benchbox/core/results/schema.py +368 -69
- benchbox/core/runner/conversion.py +2 -0
- benchbox/core/runner/dataframe_runner.py +135 -131
- benchbox/core/runner/runner.py +111 -18
- benchbox/core/schemas.py +145 -3
- benchbox/core/ssb/generator.py +14 -2
- benchbox/core/tpc_compliance.py +4 -4
- benchbox/core/tpc_metrics.py +9 -4
- benchbox/core/tpcdi/generator/manifest.py +15 -2
- benchbox/core/tpcds/benchmark/runner.py +3 -7
- benchbox/core/tpcds/c_tools.py +34 -28
- benchbox/core/tpcds/dataframe_queries/queries.py +44 -21
- benchbox/core/tpcds/generator/filesystem.py +23 -11
- benchbox/core/tpcds/generator/manager.py +3 -2
- benchbox/core/tpcds/maintenance_test.py +281 -0
- benchbox/core/tpcds/power_test.py +21 -11
- benchbox/core/tpcds/throughput_test.py +27 -9
- benchbox/core/tpcds_obt/etl/transformer.py +24 -5
- benchbox/core/tpch/dataframe_queries.py +46 -43
- benchbox/core/tpch/generator.py +21 -8
- benchbox/core/tpch/maintenance_test.py +87 -0
- benchbox/core/tpch/power_test.py +21 -5
- benchbox/core/tpch/queries.py +2 -7
- benchbox/core/tpch/streams.py +3 -19
- benchbox/core/transaction_primitives/benchmark.py +99 -0
- benchbox/core/transaction_primitives/dataframe_operations.py +1294 -0
- benchbox/core/transaction_primitives/generator.py +11 -4
- benchbox/core/visualization/__init__.py +2 -2
- benchbox/core/visualization/charts.py +4 -4
- benchbox/core/visualization/dependencies.py +1 -12
- benchbox/core/visualization/exporters.py +15 -26
- benchbox/core/visualization/result_plotter.py +90 -49
- benchbox/core/visualization/templates.py +6 -6
- benchbox/core/write_primitives/__init__.py +13 -0
- benchbox/core/write_primitives/benchmark.py +66 -0
- benchbox/core/write_primitives/dataframe_operations.py +912 -0
- benchbox/core/write_primitives/generator.py +11 -4
- benchbox/mcp/__init__.py +5 -1
- benchbox/mcp/errors.py +29 -0
- benchbox/mcp/resources/registry.py +12 -7
- benchbox/mcp/schemas.py +62 -0
- benchbox/mcp/server.py +17 -14
- benchbox/mcp/tools/__init__.py +3 -0
- benchbox/mcp/tools/analytics.py +550 -582
- benchbox/mcp/tools/benchmark.py +603 -611
- benchbox/mcp/tools/discovery.py +156 -205
- benchbox/mcp/tools/results.py +332 -533
- benchbox/mcp/tools/visualization.py +449 -0
- benchbox/platforms/__init__.py +740 -622
- benchbox/platforms/adapter_factory.py +6 -6
- benchbox/platforms/azure_synapse.py +3 -7
- benchbox/platforms/base/adapter.py +189 -49
- benchbox/platforms/base/cloud_spark/config.py +8 -0
- benchbox/platforms/base/cloud_spark/mixins.py +96 -0
- benchbox/platforms/base/cloud_spark/session.py +4 -2
- benchbox/platforms/base/cloud_spark/staging.py +15 -7
- benchbox/platforms/base/data_loading.py +315 -1
- benchbox/platforms/base/format_capabilities.py +37 -2
- benchbox/platforms/base/utils.py +6 -4
- benchbox/platforms/bigquery.py +5 -6
- benchbox/platforms/clickhouse_cloud.py +263 -0
- benchbox/platforms/databricks/adapter.py +16 -15
- benchbox/platforms/databricks/dataframe_adapter.py +4 -1
- benchbox/platforms/dataframe/__init__.py +31 -0
- benchbox/platforms/dataframe/benchmark_mixin.py +779 -0
- benchbox/platforms/dataframe/cudf_df.py +3 -3
- benchbox/platforms/dataframe/dask_df.py +3 -3
- benchbox/platforms/dataframe/datafusion_df.py +152 -15
- benchbox/platforms/dataframe/delta_lake_maintenance.py +341 -0
- benchbox/platforms/dataframe/ducklake_maintenance.py +402 -0
- benchbox/platforms/dataframe/expression_family.py +47 -8
- benchbox/platforms/dataframe/hudi_maintenance.py +437 -0
- benchbox/platforms/dataframe/iceberg_maintenance.py +605 -0
- benchbox/platforms/dataframe/modin_df.py +3 -3
- benchbox/platforms/dataframe/pandas_df.py +3 -3
- benchbox/platforms/dataframe/pandas_family.py +59 -8
- benchbox/platforms/dataframe/platform_checker.py +16 -49
- benchbox/platforms/dataframe/polars_df.py +14 -12
- benchbox/platforms/dataframe/polars_maintenance.py +630 -0
- benchbox/platforms/dataframe/pyspark_df.py +15 -0
- benchbox/platforms/dataframe/pyspark_maintenance.py +613 -0
- benchbox/platforms/datafusion.py +5 -6
- benchbox/platforms/duckdb.py +2 -1
- benchbox/platforms/fabric_warehouse.py +15 -15
- benchbox/platforms/firebolt.py +3 -2
- benchbox/platforms/influxdb/adapter.py +7 -3
- benchbox/platforms/motherduck.py +3 -2
- benchbox/platforms/onehouse/__init__.py +39 -0
- benchbox/platforms/onehouse/onehouse_client.py +509 -0
- benchbox/platforms/onehouse/quanton_adapter.py +646 -0
- benchbox/platforms/postgresql.py +5 -9
- benchbox/platforms/presto.py +2 -2
- benchbox/platforms/pyspark/session.py +3 -3
- benchbox/platforms/pyspark/sql_adapter.py +2 -3
- benchbox/platforms/redshift.py +7 -7
- benchbox/platforms/snowflake.py +4 -4
- benchbox/platforms/snowpark_connect.py +2 -1
- benchbox/platforms/trino.py +2 -2
- benchbox/release/__init__.py +17 -0
- benchbox/release/content_validation.py +745 -0
- benchbox/release/workflow.py +17 -0
- benchbox/utils/VERSION_MANAGEMENT.md +1 -1
- benchbox/utils/cloud_storage.py +7 -5
- benchbox/utils/compression.py +8 -8
- benchbox/utils/compression_mixin.py +2 -1
- benchbox/utils/data_validation.py +23 -14
- benchbox/utils/dependencies.py +47 -7
- benchbox/utils/file_format.py +407 -0
- benchbox/utils/format_converters/__init__.py +5 -1
- benchbox/utils/format_converters/ducklake_converter.py +227 -0
- benchbox/utils/format_converters/vortex_converter.py +168 -0
- benchbox/utils/tpc_compilation.py +43 -0
- benchbox/utils/version.py +14 -2
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/METADATA +15 -15
- benchbox-0.1.1.dist-info/RECORD +839 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/WHEEL +1 -1
- benchbox/_binaries/tpc-ds/darwin-arm64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/templates.lst +0 -99
- benchbox-0.1.0.dist-info/RECORD +0 -1192
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/README +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/ansi.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/db2.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/netezza.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/oracle.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query1.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query10.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query11.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query12.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query13.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query14.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query15.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query16.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query17.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query18.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query19.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query2.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query20.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query21.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query22.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query23.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query24.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query25.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query26.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query27.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query28.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query29.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query3.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query30.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query31.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query32.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query33.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query34.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query35.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query36.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query37.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query38.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query39.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query4.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query40.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query41.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query42.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query43.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query44.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query45.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query46.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query47.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query48.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query49.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query5.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query50.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query51.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query52.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query53.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query54.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query55.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query56.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query57.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query58.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query59.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query6.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query60.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query61.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query62.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query63.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query64.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query65.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query66.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query67.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query68.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query69.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query7.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query70.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query71.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query73.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query74.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query75.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query76.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query77.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query78.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query79.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query8.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query80.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query81.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query82.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query83.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query84.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query85.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query86.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query87.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query88.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query89.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query9.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query90.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query91.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query92.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query93.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query94.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query95.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query96.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query97.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query98.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query99.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/templates.lst +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/entry_points.txt +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,866 @@
|
|
|
1
|
+
"""DataFrame Maintenance Operations Interface.
|
|
2
|
+
|
|
3
|
+
This module defines the protocol and base classes for DataFrame maintenance operations,
|
|
4
|
+
enabling TPC-H and TPC-DS compliance testing on DataFrame platforms like Delta Lake,
|
|
5
|
+
Iceberg, and Parquet-based systems.
|
|
6
|
+
|
|
7
|
+
Architecture:
|
|
8
|
+
- DataFrameMaintenanceOperations: Protocol defining maintenance operations
|
|
9
|
+
- DataFrameMaintenanceCapabilities: Capabilities declaration for platforms
|
|
10
|
+
- MaintenanceResult: Standardized result container
|
|
11
|
+
|
|
12
|
+
Supported Operation Types:
|
|
13
|
+
- INSERT: Add new rows (RF1-like operations)
|
|
14
|
+
- UPDATE: Modify existing rows (dimension updates)
|
|
15
|
+
- DELETE: Remove rows (RF2-like operations)
|
|
16
|
+
- MERGE: Upsert operations (Delta Lake, Iceberg)
|
|
17
|
+
|
|
18
|
+
Platform Compatibility:
|
|
19
|
+
- Delta Lake: Full ACID support, MERGE/UPDATE/DELETE
|
|
20
|
+
- Iceberg: Row-level operations, partition-based deletes
|
|
21
|
+
- Parquet: File-level append/overwrite (no row-level operations)
|
|
22
|
+
- Polars: LazyFrame append, file replacement
|
|
23
|
+
- PySpark: Delta Lake or file-based operations
|
|
24
|
+
|
|
25
|
+
Copyright 2026 Joe Harris / BenchBox Project
|
|
26
|
+
|
|
27
|
+
Licensed under the MIT License. See LICENSE file in the project root for details.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import logging
|
|
33
|
+
import time
|
|
34
|
+
from abc import ABC, abstractmethod
|
|
35
|
+
from dataclasses import dataclass, field
|
|
36
|
+
from enum import Enum
|
|
37
|
+
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
|
|
38
|
+
|
|
39
|
+
if TYPE_CHECKING:
|
|
40
|
+
from pathlib import Path
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
import pyarrow as pa
|
|
44
|
+
|
|
45
|
+
PYARROW_AVAILABLE = True
|
|
46
|
+
except ImportError:
|
|
47
|
+
pa = None # type: ignore[assignment]
|
|
48
|
+
PYARROW_AVAILABLE = False
|
|
49
|
+
|
|
50
|
+
logger = logging.getLogger(__name__)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class MaintenanceOperationType(Enum):
|
|
54
|
+
"""Types of maintenance operations supported.
|
|
55
|
+
|
|
56
|
+
These operations map to TPC-H RF1/RF2 and TPC-DS DM1-DM4 operations.
|
|
57
|
+
Not all platforms support all operations.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
# Basic operations (all platforms)
|
|
61
|
+
INSERT = "insert" # RF1: Insert new rows
|
|
62
|
+
DELETE = "delete" # RF2: Delete rows
|
|
63
|
+
|
|
64
|
+
# Advanced operations (ACID-compliant platforms only)
|
|
65
|
+
UPDATE = "update" # Modify existing rows
|
|
66
|
+
MERGE = "merge" # Upsert: insert or update
|
|
67
|
+
|
|
68
|
+
# Batch operations
|
|
69
|
+
BULK_INSERT = "bulk_insert" # Large batch insert
|
|
70
|
+
BULK_DELETE = "bulk_delete" # Partition-based delete
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class TransactionIsolation(Enum):
|
|
74
|
+
"""Transaction isolation levels for maintenance operations."""
|
|
75
|
+
|
|
76
|
+
NONE = "none" # No transaction support (file-based)
|
|
77
|
+
READ_COMMITTED = "read_committed"
|
|
78
|
+
REPEATABLE_READ = "repeatable_read"
|
|
79
|
+
SERIALIZABLE = "serializable"
|
|
80
|
+
SNAPSHOT = "snapshot" # Optimistic concurrency (Delta Lake, Iceberg)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class DataFrameMaintenanceCapabilities:
|
|
85
|
+
"""Declares what maintenance operations a DataFrame platform supports.
|
|
86
|
+
|
|
87
|
+
Each DataFrame platform adapter should declare its capabilities,
|
|
88
|
+
allowing the runner to validate operations before execution and
|
|
89
|
+
choose appropriate implementation strategies.
|
|
90
|
+
|
|
91
|
+
Attributes:
|
|
92
|
+
platform_name: Name of the platform (e.g., "delta-lake", "iceberg")
|
|
93
|
+
supports_insert: Can add new rows
|
|
94
|
+
supports_delete: Can remove rows
|
|
95
|
+
supports_update: Can modify existing rows in place
|
|
96
|
+
supports_merge: Can perform upsert operations
|
|
97
|
+
supports_transactions: Has transaction support for atomicity
|
|
98
|
+
transaction_isolation: Highest isolation level supported
|
|
99
|
+
supports_partitioned_delete: Can delete entire partitions efficiently
|
|
100
|
+
supports_row_level_delete: Can delete individual rows
|
|
101
|
+
supports_time_travel: Can query historical versions
|
|
102
|
+
max_batch_size: Recommended maximum rows per operation
|
|
103
|
+
notes: Additional platform-specific notes
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
platform_name: str
|
|
107
|
+
supports_insert: bool = True # Most platforms support append
|
|
108
|
+
supports_delete: bool = False # Row-level delete requires ACID
|
|
109
|
+
supports_update: bool = False # In-place update requires ACID
|
|
110
|
+
supports_merge: bool = False # MERGE requires advanced support
|
|
111
|
+
supports_transactions: bool = False
|
|
112
|
+
transaction_isolation: TransactionIsolation = TransactionIsolation.NONE
|
|
113
|
+
supports_partitioned_delete: bool = False # File-level deletion
|
|
114
|
+
supports_row_level_delete: bool = False # Row-level deletion
|
|
115
|
+
supports_time_travel: bool = False
|
|
116
|
+
max_batch_size: int = 100000
|
|
117
|
+
notes: str = ""
|
|
118
|
+
|
|
119
|
+
def supports_operation(self, operation: MaintenanceOperationType) -> bool:
|
|
120
|
+
"""Check if a specific operation type is supported.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
operation: The operation type to check
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
True if the operation is supported
|
|
127
|
+
"""
|
|
128
|
+
mapping = {
|
|
129
|
+
MaintenanceOperationType.INSERT: self.supports_insert,
|
|
130
|
+
MaintenanceOperationType.DELETE: self.supports_delete or self.supports_partitioned_delete,
|
|
131
|
+
MaintenanceOperationType.UPDATE: self.supports_update,
|
|
132
|
+
MaintenanceOperationType.MERGE: self.supports_merge,
|
|
133
|
+
MaintenanceOperationType.BULK_INSERT: self.supports_insert,
|
|
134
|
+
MaintenanceOperationType.BULK_DELETE: self.supports_partitioned_delete,
|
|
135
|
+
}
|
|
136
|
+
return mapping.get(operation, False)
|
|
137
|
+
|
|
138
|
+
def validate_tpc_compliance(self) -> tuple[bool, list[str]]:
|
|
139
|
+
"""Check if the platform meets TPC maintenance requirements.
|
|
140
|
+
|
|
141
|
+
TPC-H requires RF1 (insert) and RF2 (delete).
|
|
142
|
+
TPC-DS requires INSERT, UPDATE, DELETE operations.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
Tuple of (is_compliant, list of missing capabilities)
|
|
146
|
+
"""
|
|
147
|
+
issues = []
|
|
148
|
+
|
|
149
|
+
# TPC-H minimum requirements
|
|
150
|
+
if not self.supports_insert:
|
|
151
|
+
issues.append("INSERT required for TPC-H RF1")
|
|
152
|
+
if not (self.supports_delete or self.supports_partitioned_delete):
|
|
153
|
+
issues.append("DELETE required for TPC-H RF2")
|
|
154
|
+
|
|
155
|
+
# TPC-DS requirements
|
|
156
|
+
if not self.supports_update:
|
|
157
|
+
issues.append("UPDATE required for TPC-DS dimension updates (DM3)")
|
|
158
|
+
|
|
159
|
+
return len(issues) == 0, issues
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# Pre-defined capability profiles for common platforms
|
|
163
|
+
DELTA_LAKE_CAPABILITIES = DataFrameMaintenanceCapabilities(
|
|
164
|
+
platform_name="delta-lake",
|
|
165
|
+
supports_insert=True,
|
|
166
|
+
supports_delete=True,
|
|
167
|
+
supports_update=True,
|
|
168
|
+
supports_merge=True,
|
|
169
|
+
supports_transactions=True,
|
|
170
|
+
transaction_isolation=TransactionIsolation.SNAPSHOT,
|
|
171
|
+
supports_partitioned_delete=True,
|
|
172
|
+
supports_row_level_delete=True,
|
|
173
|
+
supports_time_travel=True,
|
|
174
|
+
max_batch_size=1000000,
|
|
175
|
+
notes="Full ACID compliance via Delta Lake protocol",
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
ICEBERG_CAPABILITIES = DataFrameMaintenanceCapabilities(
|
|
179
|
+
platform_name="iceberg",
|
|
180
|
+
supports_insert=True,
|
|
181
|
+
supports_delete=True,
|
|
182
|
+
supports_update=True,
|
|
183
|
+
supports_merge=True,
|
|
184
|
+
supports_transactions=True,
|
|
185
|
+
transaction_isolation=TransactionIsolation.SNAPSHOT,
|
|
186
|
+
supports_partitioned_delete=True,
|
|
187
|
+
supports_row_level_delete=True,
|
|
188
|
+
supports_time_travel=True,
|
|
189
|
+
max_batch_size=1000000,
|
|
190
|
+
notes="Full ACID compliance via Apache Iceberg",
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
HUDI_CAPABILITIES = DataFrameMaintenanceCapabilities(
|
|
194
|
+
platform_name="hudi",
|
|
195
|
+
supports_insert=True,
|
|
196
|
+
supports_delete=True,
|
|
197
|
+
supports_update=True,
|
|
198
|
+
supports_merge=True,
|
|
199
|
+
supports_transactions=True,
|
|
200
|
+
transaction_isolation=TransactionIsolation.SNAPSHOT,
|
|
201
|
+
supports_partitioned_delete=True,
|
|
202
|
+
supports_row_level_delete=True,
|
|
203
|
+
supports_time_travel=True,
|
|
204
|
+
max_batch_size=1000000,
|
|
205
|
+
notes=(
|
|
206
|
+
"Full ACID compliance via Apache Hudi. Requires PySpark with hudi-spark-bundle. "
|
|
207
|
+
"All maintenance operations use Spark SQL (no pure Python library like delta-rs/pyiceberg)."
|
|
208
|
+
),
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
PARQUET_CAPABILITIES = DataFrameMaintenanceCapabilities(
|
|
212
|
+
platform_name="parquet",
|
|
213
|
+
supports_insert=True, # Append new files
|
|
214
|
+
supports_delete=False, # No row-level delete
|
|
215
|
+
supports_update=False, # No in-place update
|
|
216
|
+
supports_merge=False, # No merge support
|
|
217
|
+
supports_transactions=False,
|
|
218
|
+
transaction_isolation=TransactionIsolation.NONE,
|
|
219
|
+
supports_partitioned_delete=True, # Can delete partition directories
|
|
220
|
+
supports_row_level_delete=False,
|
|
221
|
+
supports_time_travel=False,
|
|
222
|
+
max_batch_size=10000000,
|
|
223
|
+
notes="File-based operations only. Updates require read-filter-write pattern.",
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
POLARS_CAPABILITIES = DataFrameMaintenanceCapabilities(
|
|
227
|
+
platform_name="polars",
|
|
228
|
+
supports_insert=True, # Append to Parquet files
|
|
229
|
+
supports_delete=True, # Read-filter-write pattern
|
|
230
|
+
supports_update=True, # Read-modify-write pattern
|
|
231
|
+
supports_merge=True, # Read-join-write pattern
|
|
232
|
+
supports_transactions=False, # No transaction log
|
|
233
|
+
transaction_isolation=TransactionIsolation.NONE,
|
|
234
|
+
supports_partitioned_delete=True, # Can manage partition files
|
|
235
|
+
supports_row_level_delete=True, # Via full table rewrite
|
|
236
|
+
supports_time_travel=False, # No versioning
|
|
237
|
+
max_batch_size=10000000,
|
|
238
|
+
notes="Full TPC compliance via read-modify-write. RAM-limited; use Delta Lake/Iceberg for large datasets.",
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
@dataclass
|
|
243
|
+
class MaintenanceResult:
|
|
244
|
+
"""Result of a maintenance operation.
|
|
245
|
+
|
|
246
|
+
Provides standardized result reporting across all DataFrame platforms,
|
|
247
|
+
enabling consistent TPC metrics calculation.
|
|
248
|
+
|
|
249
|
+
Attributes:
|
|
250
|
+
operation_type: Type of operation performed
|
|
251
|
+
success: Whether the operation completed successfully
|
|
252
|
+
start_time: Operation start timestamp (Unix time)
|
|
253
|
+
end_time: Operation end timestamp (Unix time)
|
|
254
|
+
duration: Operation duration in seconds
|
|
255
|
+
rows_affected: Number of rows inserted/updated/deleted
|
|
256
|
+
error_message: Error description if operation failed
|
|
257
|
+
transaction_id: Platform-specific transaction identifier
|
|
258
|
+
metrics: Additional platform-specific metrics
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
operation_type: MaintenanceOperationType
|
|
262
|
+
success: bool
|
|
263
|
+
start_time: float
|
|
264
|
+
end_time: float
|
|
265
|
+
duration: float
|
|
266
|
+
rows_affected: int
|
|
267
|
+
error_message: str | None = None
|
|
268
|
+
transaction_id: str | None = None
|
|
269
|
+
metrics: dict[str, Any] = field(default_factory=dict)
|
|
270
|
+
|
|
271
|
+
@classmethod
|
|
272
|
+
def failure(
|
|
273
|
+
cls,
|
|
274
|
+
operation_type: MaintenanceOperationType,
|
|
275
|
+
error_message: str,
|
|
276
|
+
start_time: float | None = None,
|
|
277
|
+
) -> MaintenanceResult:
|
|
278
|
+
"""Create a failure result.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
operation_type: The operation that failed
|
|
282
|
+
error_message: Description of the failure
|
|
283
|
+
start_time: Optional start time (defaults to now)
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
MaintenanceResult indicating failure
|
|
287
|
+
"""
|
|
288
|
+
now = time.time()
|
|
289
|
+
return cls(
|
|
290
|
+
operation_type=operation_type,
|
|
291
|
+
success=False,
|
|
292
|
+
start_time=start_time or now,
|
|
293
|
+
end_time=now,
|
|
294
|
+
duration=0.0 if start_time is None else (now - start_time),
|
|
295
|
+
rows_affected=0,
|
|
296
|
+
error_message=error_message,
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
@runtime_checkable
|
|
301
|
+
class DataFrameMaintenanceOperations(Protocol):
|
|
302
|
+
"""Protocol defining DataFrame maintenance operations.
|
|
303
|
+
|
|
304
|
+
This protocol captures the essential maintenance operations needed for
|
|
305
|
+
TPC-H RF1/RF2 and TPC-DS DM1-DM4 compliance on DataFrame platforms.
|
|
306
|
+
|
|
307
|
+
Implementations should:
|
|
308
|
+
1. Declare their capabilities via get_capabilities()
|
|
309
|
+
2. Implement supported operations (raise NotImplementedError for unsupported)
|
|
310
|
+
3. Return standardized MaintenanceResult for all operations
|
|
311
|
+
4. Handle batching internally based on platform limits
|
|
312
|
+
|
|
313
|
+
Example Implementation:
|
|
314
|
+
class DeltaLakeMaintenanceOperations:
|
|
315
|
+
def get_capabilities(self) -> DataFrameMaintenanceCapabilities:
|
|
316
|
+
return DELTA_LAKE_CAPABILITIES
|
|
317
|
+
|
|
318
|
+
def insert_rows(self, table_path, dataframe, ...) -> MaintenanceResult:
|
|
319
|
+
# Delta Lake-specific INSERT implementation
|
|
320
|
+
...
|
|
321
|
+
"""
|
|
322
|
+
|
|
323
|
+
def get_capabilities(self) -> DataFrameMaintenanceCapabilities:
|
|
324
|
+
"""Return the platform's maintenance capabilities.
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
DataFrameMaintenanceCapabilities describing what operations
|
|
328
|
+
this platform supports.
|
|
329
|
+
"""
|
|
330
|
+
...
|
|
331
|
+
|
|
332
|
+
def insert_rows(
|
|
333
|
+
self,
|
|
334
|
+
table_path: Path | str,
|
|
335
|
+
dataframe: Any,
|
|
336
|
+
partition_columns: list[str] | None = None,
|
|
337
|
+
mode: str = "append",
|
|
338
|
+
) -> MaintenanceResult:
|
|
339
|
+
"""Insert rows into a table.
|
|
340
|
+
|
|
341
|
+
This implements TPC-H RF1-like operations.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
table_path: Path to the table/directory
|
|
345
|
+
dataframe: DataFrame containing rows to insert
|
|
346
|
+
partition_columns: Columns to partition by (if applicable)
|
|
347
|
+
mode: Write mode ("append" or "overwrite")
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
MaintenanceResult with operation outcome
|
|
351
|
+
|
|
352
|
+
Raises:
|
|
353
|
+
NotImplementedError: If INSERT is not supported
|
|
354
|
+
"""
|
|
355
|
+
...
|
|
356
|
+
|
|
357
|
+
def delete_rows(
|
|
358
|
+
self,
|
|
359
|
+
table_path: Path | str,
|
|
360
|
+
condition: str | Any,
|
|
361
|
+
) -> MaintenanceResult:
|
|
362
|
+
"""Delete rows matching a condition.
|
|
363
|
+
|
|
364
|
+
This implements TPC-H RF2-like operations.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
table_path: Path to the table/directory
|
|
368
|
+
condition: Delete condition (SQL string or platform-specific predicate)
|
|
369
|
+
|
|
370
|
+
Returns:
|
|
371
|
+
MaintenanceResult with operation outcome
|
|
372
|
+
|
|
373
|
+
Raises:
|
|
374
|
+
NotImplementedError: If DELETE is not supported
|
|
375
|
+
"""
|
|
376
|
+
...
|
|
377
|
+
|
|
378
|
+
def update_rows(
|
|
379
|
+
self,
|
|
380
|
+
table_path: Path | str,
|
|
381
|
+
condition: str | Any,
|
|
382
|
+
updates: dict[str, Any],
|
|
383
|
+
) -> MaintenanceResult:
|
|
384
|
+
"""Update rows matching a condition.
|
|
385
|
+
|
|
386
|
+
This implements TPC-DS dimension update operations.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
table_path: Path to the table/directory
|
|
390
|
+
condition: Update condition (SQL string or platform-specific predicate)
|
|
391
|
+
updates: Column name to new value mapping
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
MaintenanceResult with operation outcome
|
|
395
|
+
|
|
396
|
+
Raises:
|
|
397
|
+
NotImplementedError: If UPDATE is not supported
|
|
398
|
+
"""
|
|
399
|
+
...
|
|
400
|
+
|
|
401
|
+
def merge_rows(
|
|
402
|
+
self,
|
|
403
|
+
table_path: Path | str,
|
|
404
|
+
source_dataframe: Any,
|
|
405
|
+
merge_condition: str | Any,
|
|
406
|
+
when_matched: dict[str, Any] | None = None,
|
|
407
|
+
when_not_matched: dict[str, Any] | None = None,
|
|
408
|
+
) -> MaintenanceResult:
|
|
409
|
+
"""Merge (upsert) rows from a source into the target table.
|
|
410
|
+
|
|
411
|
+
Implements MERGE semantics: update existing rows if matched,
|
|
412
|
+
insert new rows if not matched.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
table_path: Path to the target table
|
|
416
|
+
source_dataframe: DataFrame containing source rows
|
|
417
|
+
merge_condition: Join condition for matching rows
|
|
418
|
+
when_matched: Updates to apply when matched (None = no update)
|
|
419
|
+
when_not_matched: Values for insert when not matched (None = no insert)
|
|
420
|
+
|
|
421
|
+
Returns:
|
|
422
|
+
MaintenanceResult with operation outcome
|
|
423
|
+
|
|
424
|
+
Raises:
|
|
425
|
+
NotImplementedError: If MERGE is not supported
|
|
426
|
+
"""
|
|
427
|
+
...
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
class BaseDataFrameMaintenanceOperations(ABC):
|
|
431
|
+
"""Abstract base class for DataFrame maintenance implementations.
|
|
432
|
+
|
|
433
|
+
Provides common functionality for maintenance operations including:
|
|
434
|
+
- Capability checking before operations
|
|
435
|
+
- Timing and result construction
|
|
436
|
+
- Error handling
|
|
437
|
+
- DataFrame type conversion (_convert_to_arrow)
|
|
438
|
+
- Batch processing helpers
|
|
439
|
+
|
|
440
|
+
Subclasses must implement:
|
|
441
|
+
- _get_capabilities(): Return platform-specific capabilities
|
|
442
|
+
- _do_insert(): Platform-specific INSERT implementation
|
|
443
|
+
- _do_delete(): Platform-specific DELETE implementation
|
|
444
|
+
- _do_update(): Platform-specific UPDATE implementation (if supported)
|
|
445
|
+
- _do_merge(): Platform-specific MERGE implementation (if supported)
|
|
446
|
+
"""
|
|
447
|
+
|
|
448
|
+
def __init__(self) -> None:
|
|
449
|
+
"""Initialize the maintenance operations handler."""
|
|
450
|
+
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
|
451
|
+
self._capabilities: DataFrameMaintenanceCapabilities | None = None
|
|
452
|
+
|
|
453
|
+
def _convert_to_arrow(self, dataframe: Any) -> Any:
|
|
454
|
+
"""Convert various DataFrame types to PyArrow Table.
|
|
455
|
+
|
|
456
|
+
This is a shared utility method for converting Polars, Pandas, and other
|
|
457
|
+
DataFrame types to PyArrow format, which is the common interchange format
|
|
458
|
+
for Delta Lake, Iceberg, DuckLake, and other table formats.
|
|
459
|
+
|
|
460
|
+
Args:
|
|
461
|
+
dataframe: Input DataFrame (Polars, Pandas, or PyArrow)
|
|
462
|
+
|
|
463
|
+
Returns:
|
|
464
|
+
PyArrow Table
|
|
465
|
+
|
|
466
|
+
Raises:
|
|
467
|
+
ImportError: If PyArrow is not available
|
|
468
|
+
TypeError: If dataframe type is not supported
|
|
469
|
+
"""
|
|
470
|
+
if not PYARROW_AVAILABLE:
|
|
471
|
+
raise ImportError(
|
|
472
|
+
"PyArrow is not installed. Install with: pip install pyarrow\n"
|
|
473
|
+
"PyArrow is required for maintenance operations."
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
# Already PyArrow
|
|
477
|
+
if isinstance(dataframe, pa.Table):
|
|
478
|
+
return dataframe
|
|
479
|
+
|
|
480
|
+
# Polars DataFrame/LazyFrame
|
|
481
|
+
if hasattr(dataframe, "to_arrow"):
|
|
482
|
+
# Polars LazyFrame needs collection first
|
|
483
|
+
if hasattr(dataframe, "collect"):
|
|
484
|
+
dataframe = dataframe.collect()
|
|
485
|
+
return dataframe.to_arrow()
|
|
486
|
+
|
|
487
|
+
# Pandas DataFrame
|
|
488
|
+
if hasattr(dataframe, "to_parquet") and hasattr(dataframe, "columns"):
|
|
489
|
+
return pa.Table.from_pandas(dataframe)
|
|
490
|
+
|
|
491
|
+
raise TypeError(
|
|
492
|
+
f"Unsupported DataFrame type: {type(dataframe)}. "
|
|
493
|
+
f"Expected Polars DataFrame, Pandas DataFrame, or PyArrow Table."
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
@abstractmethod
|
|
497
|
+
def _get_capabilities(self) -> DataFrameMaintenanceCapabilities:
|
|
498
|
+
"""Return platform-specific capabilities.
|
|
499
|
+
|
|
500
|
+
Returns:
|
|
501
|
+
DataFrameMaintenanceCapabilities for this platform
|
|
502
|
+
"""
|
|
503
|
+
...
|
|
504
|
+
|
|
505
|
+
def get_capabilities(self) -> DataFrameMaintenanceCapabilities:
|
|
506
|
+
"""Return the platform's maintenance capabilities (cached).
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
DataFrameMaintenanceCapabilities describing what operations
|
|
510
|
+
this platform supports.
|
|
511
|
+
"""
|
|
512
|
+
if self._capabilities is None:
|
|
513
|
+
self._capabilities = self._get_capabilities()
|
|
514
|
+
return self._capabilities
|
|
515
|
+
|
|
516
|
+
def _check_capability(self, operation: MaintenanceOperationType) -> None:
|
|
517
|
+
"""Check if an operation is supported, raise if not.
|
|
518
|
+
|
|
519
|
+
Args:
|
|
520
|
+
operation: The operation to check
|
|
521
|
+
|
|
522
|
+
Raises:
|
|
523
|
+
NotImplementedError: If the operation is not supported
|
|
524
|
+
"""
|
|
525
|
+
caps = self.get_capabilities()
|
|
526
|
+
if not caps.supports_operation(operation):
|
|
527
|
+
raise NotImplementedError(
|
|
528
|
+
f"{caps.platform_name} does not support {operation.value} operations. "
|
|
529
|
+
f"Consider using Delta Lake or Iceberg for full maintenance support."
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
@abstractmethod
|
|
533
|
+
def _do_insert(
|
|
534
|
+
self,
|
|
535
|
+
table_path: Path | str,
|
|
536
|
+
dataframe: Any,
|
|
537
|
+
partition_columns: list[str] | None,
|
|
538
|
+
mode: str,
|
|
539
|
+
) -> int:
|
|
540
|
+
"""Platform-specific INSERT implementation.
|
|
541
|
+
|
|
542
|
+
Args:
|
|
543
|
+
table_path: Path to the table/directory
|
|
544
|
+
dataframe: DataFrame containing rows to insert
|
|
545
|
+
partition_columns: Columns to partition by
|
|
546
|
+
mode: Write mode
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
Number of rows inserted
|
|
550
|
+
"""
|
|
551
|
+
...
|
|
552
|
+
|
|
553
|
+
def insert_rows(
|
|
554
|
+
self,
|
|
555
|
+
table_path: Path | str,
|
|
556
|
+
dataframe: Any,
|
|
557
|
+
partition_columns: list[str] | None = None,
|
|
558
|
+
mode: str = "append",
|
|
559
|
+
) -> MaintenanceResult:
|
|
560
|
+
"""Insert rows into a table.
|
|
561
|
+
|
|
562
|
+
Args:
|
|
563
|
+
table_path: Path to the table/directory
|
|
564
|
+
dataframe: DataFrame containing rows to insert
|
|
565
|
+
partition_columns: Columns to partition by
|
|
566
|
+
mode: Write mode ("append" or "overwrite")
|
|
567
|
+
|
|
568
|
+
Returns:
|
|
569
|
+
MaintenanceResult with operation outcome
|
|
570
|
+
"""
|
|
571
|
+
start_time = time.time()
|
|
572
|
+
operation = MaintenanceOperationType.INSERT
|
|
573
|
+
|
|
574
|
+
try:
|
|
575
|
+
self._check_capability(operation)
|
|
576
|
+
rows_affected = self._do_insert(table_path, dataframe, partition_columns, mode)
|
|
577
|
+
|
|
578
|
+
end_time = time.time()
|
|
579
|
+
return MaintenanceResult(
|
|
580
|
+
operation_type=operation,
|
|
581
|
+
success=True,
|
|
582
|
+
start_time=start_time,
|
|
583
|
+
end_time=end_time,
|
|
584
|
+
duration=end_time - start_time,
|
|
585
|
+
rows_affected=rows_affected,
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
except NotImplementedError:
|
|
589
|
+
raise
|
|
590
|
+
except Exception as e:
|
|
591
|
+
self.logger.error(f"INSERT failed: {e}")
|
|
592
|
+
return MaintenanceResult.failure(operation, str(e), start_time)
|
|
593
|
+
|
|
594
|
+
@abstractmethod
|
|
595
|
+
def _do_delete(
|
|
596
|
+
self,
|
|
597
|
+
table_path: Path | str,
|
|
598
|
+
condition: str | Any,
|
|
599
|
+
) -> int:
|
|
600
|
+
"""Platform-specific DELETE implementation.
|
|
601
|
+
|
|
602
|
+
Args:
|
|
603
|
+
table_path: Path to the table/directory
|
|
604
|
+
condition: Delete condition
|
|
605
|
+
|
|
606
|
+
Returns:
|
|
607
|
+
Number of rows deleted
|
|
608
|
+
"""
|
|
609
|
+
...
|
|
610
|
+
|
|
611
|
+
def delete_rows(
|
|
612
|
+
self,
|
|
613
|
+
table_path: Path | str,
|
|
614
|
+
condition: str | Any,
|
|
615
|
+
) -> MaintenanceResult:
|
|
616
|
+
"""Delete rows matching a condition.
|
|
617
|
+
|
|
618
|
+
Args:
|
|
619
|
+
table_path: Path to the table/directory
|
|
620
|
+
condition: Delete condition
|
|
621
|
+
|
|
622
|
+
Returns:
|
|
623
|
+
MaintenanceResult with operation outcome
|
|
624
|
+
"""
|
|
625
|
+
start_time = time.time()
|
|
626
|
+
operation = MaintenanceOperationType.DELETE
|
|
627
|
+
|
|
628
|
+
try:
|
|
629
|
+
self._check_capability(operation)
|
|
630
|
+
rows_affected = self._do_delete(table_path, condition)
|
|
631
|
+
|
|
632
|
+
end_time = time.time()
|
|
633
|
+
return MaintenanceResult(
|
|
634
|
+
operation_type=operation,
|
|
635
|
+
success=True,
|
|
636
|
+
start_time=start_time,
|
|
637
|
+
end_time=end_time,
|
|
638
|
+
duration=end_time - start_time,
|
|
639
|
+
rows_affected=rows_affected,
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
except NotImplementedError:
|
|
643
|
+
raise
|
|
644
|
+
except Exception as e:
|
|
645
|
+
self.logger.error(f"DELETE failed: {e}")
|
|
646
|
+
return MaintenanceResult.failure(operation, str(e), start_time)
|
|
647
|
+
|
|
648
|
+
def _do_update(
|
|
649
|
+
self,
|
|
650
|
+
table_path: Path | str,
|
|
651
|
+
condition: str | Any,
|
|
652
|
+
updates: dict[str, Any],
|
|
653
|
+
) -> int:
|
|
654
|
+
"""Platform-specific UPDATE implementation.
|
|
655
|
+
|
|
656
|
+
Default implementation raises NotImplementedError.
|
|
657
|
+
Override in platforms that support UPDATE.
|
|
658
|
+
|
|
659
|
+
Args:
|
|
660
|
+
table_path: Path to the table/directory
|
|
661
|
+
condition: Update condition
|
|
662
|
+
updates: Column updates
|
|
663
|
+
|
|
664
|
+
Returns:
|
|
665
|
+
Number of rows updated
|
|
666
|
+
"""
|
|
667
|
+
raise NotImplementedError("UPDATE not implemented for this platform")
|
|
668
|
+
|
|
669
|
+
def update_rows(
|
|
670
|
+
self,
|
|
671
|
+
table_path: Path | str,
|
|
672
|
+
condition: str | Any,
|
|
673
|
+
updates: dict[str, Any],
|
|
674
|
+
) -> MaintenanceResult:
|
|
675
|
+
"""Update rows matching a condition.
|
|
676
|
+
|
|
677
|
+
Args:
|
|
678
|
+
table_path: Path to the table/directory
|
|
679
|
+
condition: Update condition
|
|
680
|
+
updates: Column name to new value mapping
|
|
681
|
+
|
|
682
|
+
Returns:
|
|
683
|
+
MaintenanceResult with operation outcome
|
|
684
|
+
"""
|
|
685
|
+
start_time = time.time()
|
|
686
|
+
operation = MaintenanceOperationType.UPDATE
|
|
687
|
+
|
|
688
|
+
try:
|
|
689
|
+
self._check_capability(operation)
|
|
690
|
+
rows_affected = self._do_update(table_path, condition, updates)
|
|
691
|
+
|
|
692
|
+
end_time = time.time()
|
|
693
|
+
return MaintenanceResult(
|
|
694
|
+
operation_type=operation,
|
|
695
|
+
success=True,
|
|
696
|
+
start_time=start_time,
|
|
697
|
+
end_time=end_time,
|
|
698
|
+
duration=end_time - start_time,
|
|
699
|
+
rows_affected=rows_affected,
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
except NotImplementedError:
|
|
703
|
+
raise
|
|
704
|
+
except Exception as e:
|
|
705
|
+
self.logger.error(f"UPDATE failed: {e}")
|
|
706
|
+
return MaintenanceResult.failure(operation, str(e), start_time)
|
|
707
|
+
|
|
708
|
+
def _do_merge(
|
|
709
|
+
self,
|
|
710
|
+
table_path: Path | str,
|
|
711
|
+
source_dataframe: Any,
|
|
712
|
+
merge_condition: str | Any,
|
|
713
|
+
when_matched: dict[str, Any] | None,
|
|
714
|
+
when_not_matched: dict[str, Any] | None,
|
|
715
|
+
) -> int:
|
|
716
|
+
"""Platform-specific MERGE implementation.
|
|
717
|
+
|
|
718
|
+
Default implementation raises NotImplementedError.
|
|
719
|
+
Override in platforms that support MERGE.
|
|
720
|
+
|
|
721
|
+
Args:
|
|
722
|
+
table_path: Path to the target table
|
|
723
|
+
source_dataframe: Source DataFrame
|
|
724
|
+
merge_condition: Merge condition
|
|
725
|
+
when_matched: Updates when matched
|
|
726
|
+
when_not_matched: Inserts when not matched
|
|
727
|
+
|
|
728
|
+
Returns:
|
|
729
|
+
Number of rows affected
|
|
730
|
+
"""
|
|
731
|
+
raise NotImplementedError("MERGE not implemented for this platform")
|
|
732
|
+
|
|
733
|
+
def merge_rows(
|
|
734
|
+
self,
|
|
735
|
+
table_path: Path | str,
|
|
736
|
+
source_dataframe: Any,
|
|
737
|
+
merge_condition: str | Any,
|
|
738
|
+
when_matched: dict[str, Any] | None = None,
|
|
739
|
+
when_not_matched: dict[str, Any] | None = None,
|
|
740
|
+
) -> MaintenanceResult:
|
|
741
|
+
"""Merge rows from source into target.
|
|
742
|
+
|
|
743
|
+
Args:
|
|
744
|
+
table_path: Path to the target table
|
|
745
|
+
source_dataframe: Source DataFrame
|
|
746
|
+
merge_condition: Join condition for matching
|
|
747
|
+
when_matched: Updates when matched
|
|
748
|
+
when_not_matched: Inserts when not matched
|
|
749
|
+
|
|
750
|
+
Returns:
|
|
751
|
+
MaintenanceResult with operation outcome
|
|
752
|
+
"""
|
|
753
|
+
start_time = time.time()
|
|
754
|
+
operation = MaintenanceOperationType.MERGE
|
|
755
|
+
|
|
756
|
+
try:
|
|
757
|
+
self._check_capability(operation)
|
|
758
|
+
rows_affected = self._do_merge(
|
|
759
|
+
table_path,
|
|
760
|
+
source_dataframe,
|
|
761
|
+
merge_condition,
|
|
762
|
+
when_matched,
|
|
763
|
+
when_not_matched,
|
|
764
|
+
)
|
|
765
|
+
|
|
766
|
+
end_time = time.time()
|
|
767
|
+
return MaintenanceResult(
|
|
768
|
+
operation_type=operation,
|
|
769
|
+
success=True,
|
|
770
|
+
start_time=start_time,
|
|
771
|
+
end_time=end_time,
|
|
772
|
+
duration=end_time - start_time,
|
|
773
|
+
rows_affected=rows_affected,
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
except NotImplementedError:
|
|
777
|
+
raise
|
|
778
|
+
except Exception as e:
|
|
779
|
+
self.logger.error(f"MERGE failed: {e}")
|
|
780
|
+
return MaintenanceResult.failure(operation, str(e), start_time)
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def get_maintenance_operations_for_platform(platform_name: str) -> DataFrameMaintenanceOperations | None:
|
|
784
|
+
"""Get the maintenance operations handler for a platform.
|
|
785
|
+
|
|
786
|
+
Args:
|
|
787
|
+
platform_name: Platform name (e.g., "delta-lake", "iceberg", "polars-df")
|
|
788
|
+
|
|
789
|
+
Returns:
|
|
790
|
+
Maintenance operations handler if available, None if not implemented
|
|
791
|
+
|
|
792
|
+
Note:
|
|
793
|
+
Implementations are loaded lazily to avoid import errors when
|
|
794
|
+
optional dependencies are not installed.
|
|
795
|
+
"""
|
|
796
|
+
platform_lower = platform_name.lower()
|
|
797
|
+
|
|
798
|
+
# Polars implementation
|
|
799
|
+
if platform_lower in ("polars-df", "polars"):
|
|
800
|
+
try:
|
|
801
|
+
from benchbox.platforms.dataframe.polars_maintenance import (
|
|
802
|
+
get_polars_maintenance_operations,
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
return get_polars_maintenance_operations()
|
|
806
|
+
except ImportError:
|
|
807
|
+
logger.debug("Polars maintenance not available (polars not installed)")
|
|
808
|
+
return None
|
|
809
|
+
|
|
810
|
+
# Delta Lake implementation
|
|
811
|
+
if platform_lower in ("delta-lake", "delta", "deltalake"):
|
|
812
|
+
try:
|
|
813
|
+
from benchbox.platforms.dataframe.delta_lake_maintenance import (
|
|
814
|
+
get_delta_lake_maintenance_operations,
|
|
815
|
+
)
|
|
816
|
+
|
|
817
|
+
return get_delta_lake_maintenance_operations()
|
|
818
|
+
except ImportError:
|
|
819
|
+
logger.debug("Delta Lake maintenance not available (deltalake not installed)")
|
|
820
|
+
return None
|
|
821
|
+
|
|
822
|
+
# Iceberg implementation
|
|
823
|
+
if platform_lower in ("iceberg", "apache-iceberg", "pyiceberg"):
|
|
824
|
+
try:
|
|
825
|
+
from benchbox.platforms.dataframe.iceberg_maintenance import (
|
|
826
|
+
get_iceberg_maintenance_operations,
|
|
827
|
+
)
|
|
828
|
+
|
|
829
|
+
return get_iceberg_maintenance_operations()
|
|
830
|
+
except ImportError:
|
|
831
|
+
logger.debug("Iceberg maintenance not available (pyiceberg not installed)")
|
|
832
|
+
return None
|
|
833
|
+
|
|
834
|
+
# DuckLake implementation
|
|
835
|
+
if platform_lower in ("ducklake", "duck-lake", "duckdb-lake"):
|
|
836
|
+
try:
|
|
837
|
+
from benchbox.platforms.dataframe.ducklake_maintenance import (
|
|
838
|
+
get_ducklake_maintenance_operations,
|
|
839
|
+
)
|
|
840
|
+
|
|
841
|
+
return get_ducklake_maintenance_operations()
|
|
842
|
+
except ImportError:
|
|
843
|
+
logger.debug("DuckLake maintenance not available (duckdb not installed)")
|
|
844
|
+
return None
|
|
845
|
+
|
|
846
|
+
# Hudi implementation
|
|
847
|
+
# Note: Hudi requires a SparkSession which must be passed separately.
|
|
848
|
+
# For Hudi, use get_hudi_maintenance_operations() directly with a session.
|
|
849
|
+
if platform_lower in ("hudi", "apache-hudi"):
|
|
850
|
+
logger.debug(
|
|
851
|
+
"Hudi maintenance requires SparkSession. Use get_hudi_maintenance_operations(spark_session=spark) directly."
|
|
852
|
+
)
|
|
853
|
+
return None
|
|
854
|
+
|
|
855
|
+
# PySpark implementation
|
|
856
|
+
# Note: PySpark requires a SparkSession which must be passed separately.
|
|
857
|
+
# For PySpark, use get_pyspark_maintenance_operations() directly with a session.
|
|
858
|
+
if platform_lower in ("pyspark-df", "pyspark", "spark"):
|
|
859
|
+
logger.debug(
|
|
860
|
+
"PySpark maintenance requires SparkSession. "
|
|
861
|
+
"Use get_pyspark_maintenance_operations(spark_session=spark) directly."
|
|
862
|
+
)
|
|
863
|
+
return None
|
|
864
|
+
|
|
865
|
+
logger.debug(f"No maintenance operations implementation for platform: {platform_name}")
|
|
866
|
+
return None
|