benchbox 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchbox/__init__.py +1 -1
- benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query72.tpl +1 -1
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/ansi.tpl → templates/query_templates/sqlserver.tpl} +1 -1
- benchbox/_binaries/tpc-ds/templates/query_variants/README +6 -0
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query10.tpl → templates/query_variants/query10a.tpl} +13 -14
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/query14.tpl → templates/query_variants/query14a.tpl} +30 -26
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query18.tpl → templates/query_variants/query18a.tpl} +40 -19
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query22.tpl → templates/query_variants/query22a.tpl} +31 -9
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/query27.tpl → templates/query_variants/query27a.tpl} +23 -10
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query35.tpl → templates/query_variants/query35a.tpl} +9 -8
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query36.tpl → templates/query_variants/query36a.tpl} +24 -12
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query51.tpl → templates/query_variants/query51a.tpl} +37 -20
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query5.tpl → templates/query_variants/query5a.tpl} +15 -10
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query67.tpl → templates/query_variants/query67a.tpl} +46 -18
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query70.tpl → templates/query_variants/query70a.tpl} +31 -27
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query77.tpl → templates/query_variants/query77a.tpl} +22 -15
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query80.tpl → templates/query_variants/query80a.tpl} +22 -8
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query86.tpl → templates/query_variants/query86a.tpl} +22 -13
- benchbox/_binaries/tpc-h/templates/dists.dss +836 -0
- benchbox/_binaries/tpc-h/templates/queries/1.sql +28 -0
- benchbox/_binaries/tpc-h/templates/queries/10.sql +38 -0
- benchbox/_binaries/tpc-h/templates/queries/11.sql +34 -0
- benchbox/_binaries/tpc-h/templates/queries/12.sql +35 -0
- benchbox/_binaries/tpc-h/templates/queries/13.sql +27 -0
- benchbox/_binaries/tpc-h/templates/queries/14.sql +20 -0
- benchbox/_binaries/tpc-h/templates/queries/15.sql +40 -0
- benchbox/_binaries/tpc-h/templates/queries/16.sql +37 -0
- benchbox/_binaries/tpc-h/templates/queries/17.sql +24 -0
- benchbox/_binaries/tpc-h/templates/queries/18.sql +39 -0
- benchbox/_binaries/tpc-h/templates/queries/19.sql +42 -0
- benchbox/_binaries/tpc-h/templates/queries/2.sql +50 -0
- benchbox/_binaries/tpc-h/templates/queries/20.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/21.sql +46 -0
- benchbox/_binaries/tpc-h/templates/queries/22.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/3.sql +29 -0
- benchbox/_binaries/tpc-h/templates/queries/4.sql +28 -0
- benchbox/_binaries/tpc-h/templates/queries/5.sql +31 -0
- benchbox/_binaries/tpc-h/templates/queries/6.sql +16 -0
- benchbox/_binaries/tpc-h/templates/queries/7.sql +46 -0
- benchbox/_binaries/tpc-h/templates/queries/8.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/9.sql +39 -0
- benchbox/_binaries/tpc-h/templates/variants/12a.sql +27 -0
- benchbox/_binaries/tpc-h/templates/variants/13a.sql +30 -0
- benchbox/_binaries/tpc-h/templates/variants/14a.sql +18 -0
- benchbox/_binaries/tpc-h/templates/variants/15a.sql +39 -0
- benchbox/_binaries/tpc-h/templates/variants/8a.sql +77 -0
- benchbox/base.py +88 -121
- benchbox/cli/benchmarks.py +3 -3
- benchbox/cli/commands/calculate_qphh.py +55 -14
- benchbox/cli/commands/checks.py +1 -4
- benchbox/cli/commands/convert.py +8 -3
- benchbox/cli/commands/metrics.py +55 -14
- benchbox/cli/commands/results.py +131 -3
- benchbox/cli/commands/run.py +157 -22
- benchbox/cli/commands/visualize.py +3 -3
- benchbox/cli/composite_params.py +1 -1
- benchbox/cli/config.py +13 -3
- benchbox/cli/database.py +3 -3
- benchbox/cli/dryrun.py +30 -4
- benchbox/cli/exceptions.py +2 -1
- benchbox/cli/execution_pipeline.py +2 -1
- benchbox/cli/orchestrator.py +25 -71
- benchbox/cli/tuning.py +1 -1
- benchbox/core/ai_primitives/benchmark.py +53 -0
- benchbox/core/ai_primitives/dataframe_operations.py +1217 -0
- benchbox/core/base_benchmark.py +90 -68
- benchbox/core/coffeeshop/queries.py +1 -1
- benchbox/core/coffeeshop/schema.py +1 -1
- benchbox/core/comparison/plotter.py +5 -4
- benchbox/core/dataframe/__init__.py +26 -0
- benchbox/core/dataframe/benchmark_suite.py +5 -4
- benchbox/core/dataframe/context.py +45 -0
- benchbox/core/dataframe/data_loader.py +180 -79
- benchbox/core/dataframe/maintenance_interface.py +866 -0
- benchbox/core/dryrun.py +152 -22
- benchbox/core/expected_results/registry.py +22 -5
- benchbox/core/manifest/io.py +4 -3
- benchbox/core/metadata_primitives/__init__.py +31 -0
- benchbox/core/metadata_primitives/benchmark.py +337 -0
- benchbox/core/metadata_primitives/dataframe_operations.py +1824 -0
- benchbox/core/platform_registry.py +134 -45
- benchbox/core/read_primitives/benchmark.py +56 -4
- benchbox/core/read_primitives/dataframe_queries.py +6547 -0
- benchbox/core/results/__init__.py +47 -6
- benchbox/core/results/builder.py +909 -0
- benchbox/core/results/database.py +5 -5
- benchbox/core/results/exporter.py +58 -96
- benchbox/core/results/filenames.py +102 -0
- benchbox/core/results/loader.py +10 -9
- benchbox/core/results/metrics.py +211 -0
- benchbox/core/results/models.py +3 -1
- benchbox/core/results/normalizer.py +346 -0
- benchbox/core/results/platform_info.py +235 -0
- benchbox/core/results/query_normalizer.py +200 -0
- benchbox/core/results/schema.py +368 -69
- benchbox/core/runner/conversion.py +2 -0
- benchbox/core/runner/dataframe_runner.py +135 -131
- benchbox/core/runner/runner.py +111 -18
- benchbox/core/schemas.py +145 -3
- benchbox/core/ssb/generator.py +14 -2
- benchbox/core/tpc_compliance.py +4 -4
- benchbox/core/tpc_metrics.py +9 -4
- benchbox/core/tpcdi/generator/manifest.py +15 -2
- benchbox/core/tpcds/benchmark/runner.py +3 -7
- benchbox/core/tpcds/c_tools.py +34 -28
- benchbox/core/tpcds/dataframe_queries/queries.py +44 -21
- benchbox/core/tpcds/generator/filesystem.py +23 -11
- benchbox/core/tpcds/generator/manager.py +3 -2
- benchbox/core/tpcds/maintenance_test.py +281 -0
- benchbox/core/tpcds/power_test.py +21 -11
- benchbox/core/tpcds/throughput_test.py +27 -9
- benchbox/core/tpcds_obt/etl/transformer.py +24 -5
- benchbox/core/tpch/dataframe_queries.py +46 -43
- benchbox/core/tpch/generator.py +21 -8
- benchbox/core/tpch/maintenance_test.py +87 -0
- benchbox/core/tpch/power_test.py +21 -5
- benchbox/core/tpch/queries.py +2 -7
- benchbox/core/tpch/streams.py +3 -19
- benchbox/core/transaction_primitives/benchmark.py +99 -0
- benchbox/core/transaction_primitives/dataframe_operations.py +1294 -0
- benchbox/core/transaction_primitives/generator.py +11 -4
- benchbox/core/visualization/__init__.py +2 -2
- benchbox/core/visualization/charts.py +4 -4
- benchbox/core/visualization/dependencies.py +1 -12
- benchbox/core/visualization/exporters.py +15 -26
- benchbox/core/visualization/result_plotter.py +90 -49
- benchbox/core/visualization/templates.py +6 -6
- benchbox/core/write_primitives/__init__.py +13 -0
- benchbox/core/write_primitives/benchmark.py +66 -0
- benchbox/core/write_primitives/dataframe_operations.py +912 -0
- benchbox/core/write_primitives/generator.py +11 -4
- benchbox/mcp/__init__.py +5 -1
- benchbox/mcp/errors.py +29 -0
- benchbox/mcp/resources/registry.py +12 -7
- benchbox/mcp/schemas.py +62 -0
- benchbox/mcp/server.py +17 -14
- benchbox/mcp/tools/__init__.py +3 -0
- benchbox/mcp/tools/analytics.py +550 -582
- benchbox/mcp/tools/benchmark.py +603 -611
- benchbox/mcp/tools/discovery.py +156 -205
- benchbox/mcp/tools/results.py +332 -533
- benchbox/mcp/tools/visualization.py +449 -0
- benchbox/platforms/__init__.py +740 -622
- benchbox/platforms/adapter_factory.py +6 -6
- benchbox/platforms/azure_synapse.py +3 -7
- benchbox/platforms/base/adapter.py +189 -49
- benchbox/platforms/base/cloud_spark/config.py +8 -0
- benchbox/platforms/base/cloud_spark/mixins.py +96 -0
- benchbox/platforms/base/cloud_spark/session.py +4 -2
- benchbox/platforms/base/cloud_spark/staging.py +15 -7
- benchbox/platforms/base/data_loading.py +315 -1
- benchbox/platforms/base/format_capabilities.py +37 -2
- benchbox/platforms/base/utils.py +6 -4
- benchbox/platforms/bigquery.py +5 -6
- benchbox/platforms/clickhouse_cloud.py +263 -0
- benchbox/platforms/databricks/adapter.py +16 -15
- benchbox/platforms/databricks/dataframe_adapter.py +4 -1
- benchbox/platforms/dataframe/__init__.py +31 -0
- benchbox/platforms/dataframe/benchmark_mixin.py +779 -0
- benchbox/platforms/dataframe/cudf_df.py +3 -3
- benchbox/platforms/dataframe/dask_df.py +3 -3
- benchbox/platforms/dataframe/datafusion_df.py +152 -15
- benchbox/platforms/dataframe/delta_lake_maintenance.py +341 -0
- benchbox/platforms/dataframe/ducklake_maintenance.py +402 -0
- benchbox/platforms/dataframe/expression_family.py +47 -8
- benchbox/platforms/dataframe/hudi_maintenance.py +437 -0
- benchbox/platforms/dataframe/iceberg_maintenance.py +605 -0
- benchbox/platforms/dataframe/modin_df.py +3 -3
- benchbox/platforms/dataframe/pandas_df.py +3 -3
- benchbox/platforms/dataframe/pandas_family.py +59 -8
- benchbox/platforms/dataframe/platform_checker.py +16 -49
- benchbox/platforms/dataframe/polars_df.py +14 -12
- benchbox/platforms/dataframe/polars_maintenance.py +630 -0
- benchbox/platforms/dataframe/pyspark_df.py +15 -0
- benchbox/platforms/dataframe/pyspark_maintenance.py +613 -0
- benchbox/platforms/datafusion.py +5 -6
- benchbox/platforms/duckdb.py +2 -1
- benchbox/platforms/fabric_warehouse.py +15 -15
- benchbox/platforms/firebolt.py +3 -2
- benchbox/platforms/influxdb/adapter.py +7 -3
- benchbox/platforms/motherduck.py +3 -2
- benchbox/platforms/onehouse/__init__.py +39 -0
- benchbox/platforms/onehouse/onehouse_client.py +509 -0
- benchbox/platforms/onehouse/quanton_adapter.py +646 -0
- benchbox/platforms/postgresql.py +5 -9
- benchbox/platforms/presto.py +2 -2
- benchbox/platforms/pyspark/session.py +3 -3
- benchbox/platforms/pyspark/sql_adapter.py +2 -3
- benchbox/platforms/redshift.py +7 -7
- benchbox/platforms/snowflake.py +4 -4
- benchbox/platforms/snowpark_connect.py +2 -1
- benchbox/platforms/trino.py +2 -2
- benchbox/release/__init__.py +17 -0
- benchbox/release/content_validation.py +745 -0
- benchbox/release/workflow.py +17 -0
- benchbox/utils/VERSION_MANAGEMENT.md +1 -1
- benchbox/utils/cloud_storage.py +7 -5
- benchbox/utils/compression.py +8 -8
- benchbox/utils/compression_mixin.py +2 -1
- benchbox/utils/data_validation.py +23 -14
- benchbox/utils/dependencies.py +47 -7
- benchbox/utils/file_format.py +407 -0
- benchbox/utils/format_converters/__init__.py +5 -1
- benchbox/utils/format_converters/ducklake_converter.py +227 -0
- benchbox/utils/format_converters/vortex_converter.py +168 -0
- benchbox/utils/tpc_compilation.py +43 -0
- benchbox/utils/version.py +14 -2
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/METADATA +15 -15
- benchbox-0.1.1.dist-info/RECORD +839 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/WHEEL +1 -1
- benchbox/_binaries/tpc-ds/darwin-arm64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/templates.lst +0 -99
- benchbox-0.1.0.dist-info/RECORD +0 -1192
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/README +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/ansi.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/db2.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/netezza.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/oracle.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query1.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query10.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query11.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query12.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query13.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query14.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query15.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query16.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query17.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query18.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query19.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query2.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query20.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query21.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query22.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query23.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query24.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query25.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query26.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query27.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query28.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query29.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query3.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query30.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query31.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query32.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query33.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query34.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query35.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query36.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query37.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query38.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query39.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query4.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query40.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query41.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query42.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query43.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query44.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query45.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query46.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query47.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query48.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query49.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query5.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query50.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query51.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query52.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query53.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query54.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query55.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query56.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query57.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query58.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query59.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query6.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query60.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query61.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query62.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query63.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query64.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query65.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query66.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query67.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query68.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query69.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query7.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query70.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query71.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query73.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query74.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query75.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query76.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query77.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query78.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query79.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query8.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query80.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query81.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query82.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query83.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query84.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query85.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query86.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query87.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query88.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query89.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query9.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query90.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query91.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query92.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query93.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query94.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query95.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query96.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query97.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query98.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query99.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/templates.lst +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/entry_points.txt +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,613 @@
|
|
|
1
|
+
"""PySpark Maintenance Operations Implementation.
|
|
2
|
+
|
|
3
|
+
This module implements DataFrame maintenance operations for PySpark,
|
|
4
|
+
enabling TPC-H RF1/RF2 and TPC-DS maintenance testing.
|
|
5
|
+
|
|
6
|
+
PySpark supports different capabilities based on the table format:
|
|
7
|
+
- With Delta Lake: Full ACID support via delta-spark
|
|
8
|
+
- INSERT: df.write.mode().format("delta").save()
|
|
9
|
+
- DELETE: DeltaTable.forPath().delete()
|
|
10
|
+
- UPDATE: DeltaTable.forPath().update()
|
|
11
|
+
- MERGE: DeltaTable.forPath().merge()
|
|
12
|
+
|
|
13
|
+
- Plain Parquet: File-level operations only
|
|
14
|
+
- INSERT: df.write.mode().parquet()
|
|
15
|
+
- BULK_LOAD: spark.read().write()
|
|
16
|
+
- No row-level DELETE/UPDATE/MERGE
|
|
17
|
+
|
|
18
|
+
SCOPE: This implementation supports Delta Lake for row-level operations.
|
|
19
|
+
PySpark+Iceberg would require iceberg-spark-runtime and different API patterns,
|
|
20
|
+
which should be implemented as a separate module if needed.
|
|
21
|
+
|
|
22
|
+
Copyright 2026 Joe Harris / BenchBox Project
|
|
23
|
+
|
|
24
|
+
Licensed under the MIT License. See LICENSE file in the project root for details.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import logging
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import Any
|
|
32
|
+
|
|
33
|
+
# Check for PySpark availability
|
|
34
|
+
try:
|
|
35
|
+
from pyspark.sql import SparkSession
|
|
36
|
+
|
|
37
|
+
PYSPARK_AVAILABLE = True
|
|
38
|
+
except ImportError:
|
|
39
|
+
SparkSession = None # type: ignore[assignment, misc]
|
|
40
|
+
PYSPARK_AVAILABLE = False
|
|
41
|
+
|
|
42
|
+
# Check for Delta Lake (delta-spark) availability
|
|
43
|
+
try:
|
|
44
|
+
from delta.tables import DeltaTable
|
|
45
|
+
|
|
46
|
+
DELTA_SPARK_AVAILABLE = True
|
|
47
|
+
except ImportError:
|
|
48
|
+
DeltaTable = None # type: ignore[assignment, misc]
|
|
49
|
+
DELTA_SPARK_AVAILABLE = False
|
|
50
|
+
|
|
51
|
+
from benchbox.core.dataframe.maintenance_interface import (
|
|
52
|
+
BaseDataFrameMaintenanceOperations,
|
|
53
|
+
DataFrameMaintenanceCapabilities,
|
|
54
|
+
TransactionIsolation,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
logger = logging.getLogger(__name__)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# Capability profiles for PySpark based on table format
|
|
61
|
+
PYSPARK_DELTA_CAPABILITIES = DataFrameMaintenanceCapabilities(
|
|
62
|
+
platform_name="pyspark-delta",
|
|
63
|
+
supports_insert=True,
|
|
64
|
+
supports_delete=True,
|
|
65
|
+
supports_update=True,
|
|
66
|
+
supports_merge=True,
|
|
67
|
+
supports_transactions=True,
|
|
68
|
+
transaction_isolation=TransactionIsolation.SNAPSHOT,
|
|
69
|
+
supports_partitioned_delete=True,
|
|
70
|
+
supports_row_level_delete=True,
|
|
71
|
+
supports_time_travel=True,
|
|
72
|
+
max_batch_size=10000000,
|
|
73
|
+
notes="Full ACID compliance via Delta Lake (delta-spark)",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
PYSPARK_PARQUET_CAPABILITIES = DataFrameMaintenanceCapabilities(
|
|
77
|
+
platform_name="pyspark-parquet",
|
|
78
|
+
supports_insert=True, # df.write.mode("append")
|
|
79
|
+
supports_delete=False, # No row-level delete for Parquet
|
|
80
|
+
supports_update=False, # No row-level update for Parquet
|
|
81
|
+
supports_merge=False, # No MERGE for Parquet
|
|
82
|
+
supports_transactions=False,
|
|
83
|
+
transaction_isolation=TransactionIsolation.NONE,
|
|
84
|
+
supports_partitioned_delete=True, # Can overwrite partitions
|
|
85
|
+
supports_row_level_delete=False,
|
|
86
|
+
supports_time_travel=False,
|
|
87
|
+
max_batch_size=10000000,
|
|
88
|
+
notes="File-level operations only. Use Delta Lake for row-level operations.",
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class PySparkMaintenanceOperations(BaseDataFrameMaintenanceOperations):
|
|
93
|
+
"""PySpark maintenance operations implementation.
|
|
94
|
+
|
|
95
|
+
Implements maintenance operations for PySpark DataFrames. Row-level operations
|
|
96
|
+
(UPDATE, DELETE, MERGE) require Delta Lake table format.
|
|
97
|
+
|
|
98
|
+
Table Format Detection:
|
|
99
|
+
The implementation auto-detects Delta Lake tables by checking for
|
|
100
|
+
_delta_log directory. Tables without Delta Lake support only INSERT
|
|
101
|
+
and BULK_LOAD operations.
|
|
102
|
+
|
|
103
|
+
Example:
|
|
104
|
+
from pyspark.sql import SparkSession
|
|
105
|
+
|
|
106
|
+
spark = SparkSession.builder.appName("benchbox").getOrCreate()
|
|
107
|
+
ops = PySparkMaintenanceOperations(spark_session=spark)
|
|
108
|
+
|
|
109
|
+
# Insert new rows (works with any format)
|
|
110
|
+
result = ops.insert_rows(
|
|
111
|
+
table_path="/data/orders",
|
|
112
|
+
dataframe=new_orders_df,
|
|
113
|
+
mode="append"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Row-level operations require Delta Lake
|
|
117
|
+
if ops.is_delta_table("/data/orders"):
|
|
118
|
+
result = ops.delete_rows(
|
|
119
|
+
table_path="/data/orders",
|
|
120
|
+
condition="order_date < '2020-01-01'"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
Note:
|
|
124
|
+
Requires pyspark. For row-level operations, also requires delta-spark.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
def __init__(
|
|
128
|
+
self,
|
|
129
|
+
spark_session: Any,
|
|
130
|
+
working_dir: str | Path | None = None,
|
|
131
|
+
prefer_delta: bool = True,
|
|
132
|
+
) -> None:
|
|
133
|
+
"""Initialize PySpark maintenance operations.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
spark_session: Active SparkSession instance
|
|
137
|
+
working_dir: Optional working directory for temporary files
|
|
138
|
+
prefer_delta: If True, attempt Delta operations when available
|
|
139
|
+
|
|
140
|
+
Raises:
|
|
141
|
+
ImportError: If PySpark is not installed
|
|
142
|
+
ValueError: If spark_session is None
|
|
143
|
+
"""
|
|
144
|
+
super().__init__()
|
|
145
|
+
|
|
146
|
+
if not PYSPARK_AVAILABLE:
|
|
147
|
+
raise ImportError(
|
|
148
|
+
"PySpark is not installed. Install with: pip install pyspark\n"
|
|
149
|
+
"For Delta Lake support, also install: pip install delta-spark"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
if spark_session is None:
|
|
153
|
+
raise ValueError(
|
|
154
|
+
"spark_session is required. Create one with:\n"
|
|
155
|
+
" spark = SparkSession.builder.appName('benchbox').getOrCreate()"
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
self.spark = spark_session
|
|
159
|
+
self.working_dir = Path(working_dir) if working_dir else None
|
|
160
|
+
self.prefer_delta = prefer_delta and DELTA_SPARK_AVAILABLE
|
|
161
|
+
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
|
162
|
+
|
|
163
|
+
if prefer_delta and not DELTA_SPARK_AVAILABLE:
|
|
164
|
+
self.logger.warning(
|
|
165
|
+
"Delta Lake (delta-spark) not available. "
|
|
166
|
+
"Row-level operations (UPDATE/DELETE/MERGE) will not be supported. "
|
|
167
|
+
"Install with: pip install delta-spark"
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
def _get_capabilities(self) -> DataFrameMaintenanceCapabilities:
|
|
171
|
+
"""Return PySpark maintenance capabilities.
|
|
172
|
+
|
|
173
|
+
Returns capabilities based on whether Delta Lake is available.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
PYSPARK_DELTA_CAPABILITIES if delta-spark available,
|
|
177
|
+
PYSPARK_PARQUET_CAPABILITIES otherwise
|
|
178
|
+
"""
|
|
179
|
+
if self.prefer_delta and DELTA_SPARK_AVAILABLE:
|
|
180
|
+
return PYSPARK_DELTA_CAPABILITIES
|
|
181
|
+
return PYSPARK_PARQUET_CAPABILITIES
|
|
182
|
+
|
|
183
|
+
def is_delta_table(self, table_path: str | Path) -> bool:
|
|
184
|
+
"""Check if a path contains a Delta Lake table.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
table_path: Path to check
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
True if path contains a Delta Lake table (_delta_log exists)
|
|
191
|
+
"""
|
|
192
|
+
if not DELTA_SPARK_AVAILABLE:
|
|
193
|
+
return False
|
|
194
|
+
|
|
195
|
+
path = Path(table_path)
|
|
196
|
+
delta_log = path / "_delta_log"
|
|
197
|
+
return delta_log.exists() and delta_log.is_dir()
|
|
198
|
+
|
|
199
|
+
def _convert_to_spark_df(self, dataframe: Any) -> Any:
|
|
200
|
+
"""Convert various DataFrame types to Spark DataFrame.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
dataframe: Input DataFrame (PySpark, Pandas, or dict for createDataFrame)
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Spark DataFrame
|
|
207
|
+
|
|
208
|
+
Raises:
|
|
209
|
+
TypeError: If dataframe type is not supported
|
|
210
|
+
"""
|
|
211
|
+
from pyspark.sql import DataFrame as SparkDataFrame
|
|
212
|
+
|
|
213
|
+
# Already a Spark DataFrame
|
|
214
|
+
if isinstance(dataframe, SparkDataFrame):
|
|
215
|
+
return dataframe
|
|
216
|
+
|
|
217
|
+
# Pandas DataFrame - convert using spark.createDataFrame
|
|
218
|
+
if hasattr(dataframe, "to_dict") and hasattr(dataframe, "columns"):
|
|
219
|
+
return self.spark.createDataFrame(dataframe)
|
|
220
|
+
|
|
221
|
+
# Polars DataFrame - convert via Pandas
|
|
222
|
+
if hasattr(dataframe, "to_pandas"):
|
|
223
|
+
return self.spark.createDataFrame(dataframe.to_pandas())
|
|
224
|
+
|
|
225
|
+
# List of dicts or tuples
|
|
226
|
+
if isinstance(dataframe, list):
|
|
227
|
+
return self.spark.createDataFrame(dataframe)
|
|
228
|
+
|
|
229
|
+
raise TypeError(
|
|
230
|
+
f"Unsupported DataFrame type: {type(dataframe)}. "
|
|
231
|
+
f"Expected Spark DataFrame, Pandas DataFrame, Polars DataFrame, or list."
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
def _to_spark_column(self, value: Any) -> Any:
|
|
235
|
+
"""Convert a value to a Spark Column expression.
|
|
236
|
+
|
|
237
|
+
Handles conversion of string expressions, column references, and literals
|
|
238
|
+
to proper Spark Column objects for use in update/merge operations.
|
|
239
|
+
|
|
240
|
+
Value interpretation:
|
|
241
|
+
- Strings starting with "col:" → Column reference (e.g., "col:source.name")
|
|
242
|
+
- Strings starting with "expr:" → SQL expression (e.g., "expr:amount * 1.1")
|
|
243
|
+
- Strings starting with "lit:" → Literal value (e.g., "lit:completed")
|
|
244
|
+
- Strings starting with "source." → Column reference (shorthand for merge)
|
|
245
|
+
- Other strings → Treated as SQL expressions (for backwards compatibility)
|
|
246
|
+
- Non-strings → Literal values
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
value: The value to convert. Can be:
|
|
250
|
+
- A string with optional prefix (col:, expr:, lit:)
|
|
251
|
+
- A string SQL expression
|
|
252
|
+
- Any other value (treated as literal)
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
A Spark Column object
|
|
256
|
+
|
|
257
|
+
Examples:
|
|
258
|
+
>>> ops._to_spark_column("col:source.name") # Column reference
|
|
259
|
+
>>> ops._to_spark_column("expr:amount * 1.1") # SQL expression
|
|
260
|
+
>>> ops._to_spark_column("lit:completed") # Literal string
|
|
261
|
+
>>> ops._to_spark_column("source.id") # Column reference (merge shorthand)
|
|
262
|
+
>>> ops._to_spark_column(42) # Literal int
|
|
263
|
+
"""
|
|
264
|
+
from pyspark.sql import functions as spark_functions
|
|
265
|
+
|
|
266
|
+
if not isinstance(value, str):
|
|
267
|
+
return spark_functions.lit(value)
|
|
268
|
+
|
|
269
|
+
# Explicit prefixes for unambiguous interpretation
|
|
270
|
+
if value.startswith("col:"):
|
|
271
|
+
return spark_functions.col(value[4:])
|
|
272
|
+
if value.startswith("expr:"):
|
|
273
|
+
return spark_functions.expr(value[5:])
|
|
274
|
+
if value.startswith("lit:"):
|
|
275
|
+
return spark_functions.lit(value[4:])
|
|
276
|
+
|
|
277
|
+
# Shorthand for merge operations: source.column references
|
|
278
|
+
if value.startswith("source.") or value.startswith("target."):
|
|
279
|
+
return spark_functions.col(value)
|
|
280
|
+
|
|
281
|
+
# Default: treat as SQL expression for backwards compatibility
|
|
282
|
+
# This handles cases like "amount * 1.1", "UPPER(name)", etc.
|
|
283
|
+
return spark_functions.expr(value)
|
|
284
|
+
|
|
285
|
+
def _do_insert(
|
|
286
|
+
self,
|
|
287
|
+
table_path: Path | str,
|
|
288
|
+
dataframe: Any,
|
|
289
|
+
partition_columns: list[str] | None,
|
|
290
|
+
mode: str,
|
|
291
|
+
) -> int:
|
|
292
|
+
"""Insert rows using df.write.mode().format().save().
|
|
293
|
+
|
|
294
|
+
Uses the actual Spark DataFrame write API, not SQL execution.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
table_path: Path to the table directory
|
|
298
|
+
dataframe: DataFrame containing rows to insert
|
|
299
|
+
partition_columns: Columns to partition by
|
|
300
|
+
mode: Write mode ("append" or "overwrite")
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
Number of rows inserted
|
|
304
|
+
"""
|
|
305
|
+
table_path = str(table_path)
|
|
306
|
+
|
|
307
|
+
# Convert to Spark DataFrame
|
|
308
|
+
spark_df = self._convert_to_spark_df(dataframe)
|
|
309
|
+
row_count = spark_df.count()
|
|
310
|
+
|
|
311
|
+
if row_count == 0:
|
|
312
|
+
self.logger.info("No rows to insert")
|
|
313
|
+
return 0
|
|
314
|
+
|
|
315
|
+
# Map mode to Spark mode
|
|
316
|
+
spark_mode = "append" if mode == "append" else "overwrite"
|
|
317
|
+
|
|
318
|
+
# Determine format
|
|
319
|
+
is_delta = self.is_delta_table(table_path)
|
|
320
|
+
write_format = "delta" if is_delta else "parquet"
|
|
321
|
+
|
|
322
|
+
# Build writer
|
|
323
|
+
writer = spark_df.write.mode(spark_mode)
|
|
324
|
+
|
|
325
|
+
if partition_columns:
|
|
326
|
+
writer = writer.partitionBy(*partition_columns)
|
|
327
|
+
|
|
328
|
+
# Write using DataFrame API
|
|
329
|
+
if write_format == "delta":
|
|
330
|
+
writer.format("delta").save(table_path)
|
|
331
|
+
else:
|
|
332
|
+
writer.parquet(table_path)
|
|
333
|
+
|
|
334
|
+
self.logger.info(f"Inserted {row_count} rows to {table_path} (format: {write_format})")
|
|
335
|
+
return row_count
|
|
336
|
+
|
|
337
|
+
def _do_delete(
|
|
338
|
+
self,
|
|
339
|
+
table_path: Path | str,
|
|
340
|
+
condition: str | Any,
|
|
341
|
+
) -> int:
|
|
342
|
+
"""Delete rows using DeltaTable.forPath().delete().
|
|
343
|
+
|
|
344
|
+
Only supported for Delta Lake tables. Plain Parquet tables do not
|
|
345
|
+
support row-level deletes.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
table_path: Path to the Delta table
|
|
349
|
+
condition: SQL-like delete predicate (e.g., "id > 100")
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
Number of rows deleted
|
|
353
|
+
|
|
354
|
+
Raises:
|
|
355
|
+
NotImplementedError: If table is not Delta Lake format
|
|
356
|
+
"""
|
|
357
|
+
table_path = str(table_path)
|
|
358
|
+
|
|
359
|
+
if not DELTA_SPARK_AVAILABLE:
|
|
360
|
+
raise NotImplementedError("DELETE requires Delta Lake (delta-spark). Install with: pip install delta-spark")
|
|
361
|
+
|
|
362
|
+
if not self.is_delta_table(table_path):
|
|
363
|
+
raise NotImplementedError(
|
|
364
|
+
f"DELETE requires Delta Lake table format. "
|
|
365
|
+
f"Table at {table_path} is not a Delta table. "
|
|
366
|
+
f"Convert with: df.write.format('delta').save(path)"
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
# Get DeltaTable reference
|
|
370
|
+
dt = DeltaTable.forPath(self.spark, table_path)
|
|
371
|
+
|
|
372
|
+
# Get row count before delete
|
|
373
|
+
rows_before = dt.toDF().count()
|
|
374
|
+
|
|
375
|
+
# Execute delete using DeltaTable API
|
|
376
|
+
dt.delete(condition=str(condition))
|
|
377
|
+
|
|
378
|
+
# Get row count after delete
|
|
379
|
+
rows_after = dt.toDF().count()
|
|
380
|
+
rows_deleted = rows_before - rows_after
|
|
381
|
+
|
|
382
|
+
self.logger.info(f"Deleted {rows_deleted} rows from {table_path}")
|
|
383
|
+
return rows_deleted
|
|
384
|
+
|
|
385
|
+
def _do_update(
|
|
386
|
+
self,
|
|
387
|
+
table_path: Path | str,
|
|
388
|
+
condition: str | Any,
|
|
389
|
+
updates: dict[str, Any],
|
|
390
|
+
) -> int:
|
|
391
|
+
"""Update rows using DeltaTable.forPath().update().
|
|
392
|
+
|
|
393
|
+
Only supported for Delta Lake tables. Plain Parquet tables do not
|
|
394
|
+
support row-level updates.
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
table_path: Path to the Delta table
|
|
398
|
+
condition: SQL-like update predicate
|
|
399
|
+
updates: Column name to new value expression mapping
|
|
400
|
+
|
|
401
|
+
Returns:
|
|
402
|
+
Estimated number of rows updated
|
|
403
|
+
|
|
404
|
+
Raises:
|
|
405
|
+
NotImplementedError: If table is not Delta Lake format
|
|
406
|
+
"""
|
|
407
|
+
table_path = str(table_path)
|
|
408
|
+
|
|
409
|
+
if not DELTA_SPARK_AVAILABLE:
|
|
410
|
+
raise NotImplementedError("UPDATE requires Delta Lake (delta-spark). Install with: pip install delta-spark")
|
|
411
|
+
|
|
412
|
+
if not self.is_delta_table(table_path):
|
|
413
|
+
raise NotImplementedError(
|
|
414
|
+
f"UPDATE requires Delta Lake table format. "
|
|
415
|
+
f"Table at {table_path} is not a Delta table. "
|
|
416
|
+
f"Convert with: df.write.format('delta').save(path)"
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
# Get DeltaTable reference
|
|
420
|
+
dt = DeltaTable.forPath(self.spark, table_path)
|
|
421
|
+
|
|
422
|
+
# Count matching rows before update (for return value)
|
|
423
|
+
# Note: DeltaTable.update() doesn't return affected row count
|
|
424
|
+
matching_count = dt.toDF().filter(condition).count()
|
|
425
|
+
|
|
426
|
+
if matching_count == 0:
|
|
427
|
+
self.logger.info("No rows match update condition")
|
|
428
|
+
return 0
|
|
429
|
+
|
|
430
|
+
# Convert updates dict to Spark Column expressions
|
|
431
|
+
# Uses _to_spark_column() for consistent expression/literal handling
|
|
432
|
+
update_set = {col: self._to_spark_column(value) for col, value in updates.items()}
|
|
433
|
+
|
|
434
|
+
# Execute update using DeltaTable API
|
|
435
|
+
dt.update(condition=condition, set=update_set)
|
|
436
|
+
|
|
437
|
+
self.logger.info(f"Updated {matching_count} rows in {table_path}")
|
|
438
|
+
return matching_count
|
|
439
|
+
|
|
440
|
+
def _do_merge(
|
|
441
|
+
self,
|
|
442
|
+
table_path: Path | str,
|
|
443
|
+
source_dataframe: Any,
|
|
444
|
+
merge_condition: str | Any,
|
|
445
|
+
when_matched: dict[str, Any] | None,
|
|
446
|
+
when_not_matched: dict[str, Any] | None,
|
|
447
|
+
) -> int:
|
|
448
|
+
"""Merge rows using DeltaTable.alias().merge().execute().
|
|
449
|
+
|
|
450
|
+
Only supported for Delta Lake tables. Plain Parquet tables do not
|
|
451
|
+
support MERGE operations.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
table_path: Path to the Delta table
|
|
455
|
+
source_dataframe: Source DataFrame
|
|
456
|
+
merge_condition: Join condition for matching rows
|
|
457
|
+
when_matched: Updates to apply when matched
|
|
458
|
+
when_not_matched: Inserts when not matched
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
Number of rows affected
|
|
462
|
+
|
|
463
|
+
Raises:
|
|
464
|
+
NotImplementedError: If table is not Delta Lake format
|
|
465
|
+
"""
|
|
466
|
+
table_path = str(table_path)
|
|
467
|
+
|
|
468
|
+
if not DELTA_SPARK_AVAILABLE:
|
|
469
|
+
raise NotImplementedError("MERGE requires Delta Lake (delta-spark). Install with: pip install delta-spark")
|
|
470
|
+
|
|
471
|
+
if not self.is_delta_table(table_path):
|
|
472
|
+
raise NotImplementedError(
|
|
473
|
+
f"MERGE requires Delta Lake table format. "
|
|
474
|
+
f"Table at {table_path} is not a Delta table. "
|
|
475
|
+
f"Convert with: df.write.format('delta').save(path)"
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
# Convert source to Spark DataFrame
|
|
479
|
+
source_df = self._convert_to_spark_df(source_dataframe)
|
|
480
|
+
source_count = source_df.count()
|
|
481
|
+
|
|
482
|
+
# Get DeltaTable reference
|
|
483
|
+
dt = DeltaTable.forPath(self.spark, table_path)
|
|
484
|
+
|
|
485
|
+
# Build merge operation
|
|
486
|
+
merge_builder = dt.alias("target").merge(source_df.alias("source"), merge_condition)
|
|
487
|
+
|
|
488
|
+
# Add when_matched clause
|
|
489
|
+
# Uses _to_spark_column() for consistent expression/literal handling
|
|
490
|
+
if when_matched:
|
|
491
|
+
update_exprs = {col: self._to_spark_column(expr) for col, expr in when_matched.items()}
|
|
492
|
+
merge_builder = merge_builder.whenMatchedUpdate(set=update_exprs)
|
|
493
|
+
|
|
494
|
+
# Add when_not_matched clause
|
|
495
|
+
if when_not_matched:
|
|
496
|
+
insert_exprs = {col: self._to_spark_column(expr) for col, expr in when_not_matched.items()}
|
|
497
|
+
merge_builder = merge_builder.whenNotMatchedInsert(values=insert_exprs)
|
|
498
|
+
|
|
499
|
+
# Execute merge
|
|
500
|
+
merge_builder.execute()
|
|
501
|
+
|
|
502
|
+
# Delta doesn't return exact metrics through Python API by default
|
|
503
|
+
# Estimate based on source size
|
|
504
|
+
self.logger.info(f"Merged from {source_count} source rows into {table_path}")
|
|
505
|
+
return source_count
|
|
506
|
+
|
|
507
|
+
def execute_bulk_load(
|
|
508
|
+
self,
|
|
509
|
+
source_path: str | Path,
|
|
510
|
+
target_path: str | Path,
|
|
511
|
+
source_format: str = "parquet",
|
|
512
|
+
target_format: str = "parquet",
|
|
513
|
+
compression: str | None = "zstd",
|
|
514
|
+
partition_columns: list[str] | None = None,
|
|
515
|
+
sort_columns: list[str] | None = None,
|
|
516
|
+
) -> int:
|
|
517
|
+
"""Bulk load data using spark.read().write() pattern.
|
|
518
|
+
|
|
519
|
+
This uses pure DataFrame API operations without SQL:
|
|
520
|
+
1. spark.read.format(source_format).load(source_path)
|
|
521
|
+
2. Optional: df.orderBy(sort_columns)
|
|
522
|
+
3. df.write.format(target_format).save(target_path)
|
|
523
|
+
|
|
524
|
+
Args:
|
|
525
|
+
source_path: Path to source data
|
|
526
|
+
target_path: Path to write target data
|
|
527
|
+
source_format: Source format ("parquet", "csv", "json")
|
|
528
|
+
target_format: Target format ("parquet", "delta")
|
|
529
|
+
compression: Compression codec ("zstd", "snappy", "gzip", "lz4")
|
|
530
|
+
partition_columns: Columns to partition by
|
|
531
|
+
sort_columns: Columns to sort by before writing
|
|
532
|
+
|
|
533
|
+
Returns:
|
|
534
|
+
Number of rows loaded
|
|
535
|
+
"""
|
|
536
|
+
source_path = str(source_path)
|
|
537
|
+
target_path = str(target_path)
|
|
538
|
+
|
|
539
|
+
# Read source data using DataFrame API
|
|
540
|
+
reader = self.spark.read.format(source_format)
|
|
541
|
+
|
|
542
|
+
if source_format == "csv":
|
|
543
|
+
reader = reader.option("header", "true").option("inferSchema", "true")
|
|
544
|
+
|
|
545
|
+
df = reader.load(source_path)
|
|
546
|
+
row_count = df.count()
|
|
547
|
+
|
|
548
|
+
if row_count == 0:
|
|
549
|
+
self.logger.info("No rows to load")
|
|
550
|
+
return 0
|
|
551
|
+
|
|
552
|
+
# Apply sorting
|
|
553
|
+
if sort_columns:
|
|
554
|
+
df = df.orderBy(*sort_columns)
|
|
555
|
+
|
|
556
|
+
# Build writer
|
|
557
|
+
writer = df.write.mode("overwrite")
|
|
558
|
+
|
|
559
|
+
if partition_columns:
|
|
560
|
+
writer = writer.partitionBy(*partition_columns)
|
|
561
|
+
|
|
562
|
+
if compression:
|
|
563
|
+
writer = writer.option("compression", compression)
|
|
564
|
+
|
|
565
|
+
# Write using DataFrame API
|
|
566
|
+
if target_format == "delta":
|
|
567
|
+
writer.format("delta").save(target_path)
|
|
568
|
+
else:
|
|
569
|
+
writer.parquet(target_path)
|
|
570
|
+
|
|
571
|
+
self.logger.info(f"Bulk loaded {row_count} rows to {target_path}")
|
|
572
|
+
return row_count
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def get_pyspark_maintenance_operations(
|
|
576
|
+
spark_session: Any = None,
|
|
577
|
+
working_dir: str | Path | None = None,
|
|
578
|
+
prefer_delta: bool = True,
|
|
579
|
+
) -> PySparkMaintenanceOperations | None:
|
|
580
|
+
"""Get PySpark maintenance operations if PySpark is available.
|
|
581
|
+
|
|
582
|
+
Args:
|
|
583
|
+
spark_session: Active SparkSession (required)
|
|
584
|
+
working_dir: Optional working directory
|
|
585
|
+
prefer_delta: Whether to prefer Delta Lake operations
|
|
586
|
+
|
|
587
|
+
Returns:
|
|
588
|
+
PySparkMaintenanceOperations if PySpark is available and spark_session
|
|
589
|
+
is provided, None otherwise
|
|
590
|
+
"""
|
|
591
|
+
if not PYSPARK_AVAILABLE:
|
|
592
|
+
logger.debug("PySpark not available, cannot create maintenance operations")
|
|
593
|
+
return None
|
|
594
|
+
|
|
595
|
+
if spark_session is None:
|
|
596
|
+
logger.debug("No SparkSession provided, cannot create maintenance operations")
|
|
597
|
+
return None
|
|
598
|
+
|
|
599
|
+
return PySparkMaintenanceOperations(
|
|
600
|
+
spark_session=spark_session,
|
|
601
|
+
working_dir=working_dir,
|
|
602
|
+
prefer_delta=prefer_delta,
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
__all__ = [
|
|
607
|
+
"PySparkMaintenanceOperations",
|
|
608
|
+
"get_pyspark_maintenance_operations",
|
|
609
|
+
"PYSPARK_DELTA_CAPABILITIES",
|
|
610
|
+
"PYSPARK_PARQUET_CAPABILITIES",
|
|
611
|
+
"PYSPARK_AVAILABLE",
|
|
612
|
+
"DELTA_SPARK_AVAILABLE",
|
|
613
|
+
]
|
benchbox/platforms/datafusion.py
CHANGED
|
@@ -30,6 +30,7 @@ except ImportError:
|
|
|
30
30
|
RuntimeEnv = None # type: ignore[assignment, misc]
|
|
31
31
|
|
|
32
32
|
from benchbox.platforms.base import PlatformAdapter
|
|
33
|
+
from benchbox.utils.file_format import get_delimiter_for_file
|
|
33
34
|
|
|
34
35
|
logger = logging.getLogger(__name__)
|
|
35
36
|
|
|
@@ -480,12 +481,10 @@ class DataFusionAdapter(PlatformAdapter):
|
|
|
480
481
|
Tuple of (delimiter, has_trailing_delimiter)
|
|
481
482
|
"""
|
|
482
483
|
if file_paths:
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
else:
|
|
488
|
-
return ",", False
|
|
484
|
+
delimiter = get_delimiter_for_file(file_paths[0])
|
|
485
|
+
# TPC benchmark format uses pipe delimiter with trailing delimiter
|
|
486
|
+
has_trailing = delimiter == "|"
|
|
487
|
+
return delimiter, has_trailing
|
|
489
488
|
return ",", False
|
|
490
489
|
|
|
491
490
|
def _load_table_csv(self, connection: Any, table_name: str, file_paths: list[Path], data_dir: Path) -> int:
|
benchbox/platforms/duckdb.py
CHANGED
|
@@ -22,6 +22,7 @@ except ImportError:
|
|
|
22
22
|
|
|
23
23
|
from benchbox.core.errors import PlanCaptureError
|
|
24
24
|
from benchbox.utils.cloud_storage import get_cloud_path_info, is_cloud_path
|
|
25
|
+
from benchbox.utils.file_format import is_tpc_format
|
|
25
26
|
|
|
26
27
|
from .base import PlatformAdapter
|
|
27
28
|
|
|
@@ -355,7 +356,7 @@ class DuckDBAdapter(PlatformAdapter):
|
|
|
355
356
|
base_ext = FileFormatRegistry.get_base_data_extension(file_path)
|
|
356
357
|
|
|
357
358
|
# Create DuckDB native handler for supported formats
|
|
358
|
-
if
|
|
359
|
+
if is_tpc_format(file_path):
|
|
359
360
|
return DuckDBNativeHandler("|", adapter, benchmark_instance)
|
|
360
361
|
elif base_ext == ".csv":
|
|
361
362
|
return DuckDBNativeHandler(",", adapter, benchmark_instance)
|