benchbox 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchbox/__init__.py +1 -1
- benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query72.tpl +1 -1
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/ansi.tpl → templates/query_templates/sqlserver.tpl} +1 -1
- benchbox/_binaries/tpc-ds/templates/query_variants/README +6 -0
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query10.tpl → templates/query_variants/query10a.tpl} +13 -14
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/query14.tpl → templates/query_variants/query14a.tpl} +30 -26
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query18.tpl → templates/query_variants/query18a.tpl} +40 -19
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query22.tpl → templates/query_variants/query22a.tpl} +31 -9
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/query27.tpl → templates/query_variants/query27a.tpl} +23 -10
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query35.tpl → templates/query_variants/query35a.tpl} +9 -8
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query36.tpl → templates/query_variants/query36a.tpl} +24 -12
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query51.tpl → templates/query_variants/query51a.tpl} +37 -20
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query5.tpl → templates/query_variants/query5a.tpl} +15 -10
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query67.tpl → templates/query_variants/query67a.tpl} +46 -18
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query70.tpl → templates/query_variants/query70a.tpl} +31 -27
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query77.tpl → templates/query_variants/query77a.tpl} +22 -15
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query80.tpl → templates/query_variants/query80a.tpl} +22 -8
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query86.tpl → templates/query_variants/query86a.tpl} +22 -13
- benchbox/_binaries/tpc-h/templates/dists.dss +836 -0
- benchbox/_binaries/tpc-h/templates/queries/1.sql +28 -0
- benchbox/_binaries/tpc-h/templates/queries/10.sql +38 -0
- benchbox/_binaries/tpc-h/templates/queries/11.sql +34 -0
- benchbox/_binaries/tpc-h/templates/queries/12.sql +35 -0
- benchbox/_binaries/tpc-h/templates/queries/13.sql +27 -0
- benchbox/_binaries/tpc-h/templates/queries/14.sql +20 -0
- benchbox/_binaries/tpc-h/templates/queries/15.sql +40 -0
- benchbox/_binaries/tpc-h/templates/queries/16.sql +37 -0
- benchbox/_binaries/tpc-h/templates/queries/17.sql +24 -0
- benchbox/_binaries/tpc-h/templates/queries/18.sql +39 -0
- benchbox/_binaries/tpc-h/templates/queries/19.sql +42 -0
- benchbox/_binaries/tpc-h/templates/queries/2.sql +50 -0
- benchbox/_binaries/tpc-h/templates/queries/20.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/21.sql +46 -0
- benchbox/_binaries/tpc-h/templates/queries/22.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/3.sql +29 -0
- benchbox/_binaries/tpc-h/templates/queries/4.sql +28 -0
- benchbox/_binaries/tpc-h/templates/queries/5.sql +31 -0
- benchbox/_binaries/tpc-h/templates/queries/6.sql +16 -0
- benchbox/_binaries/tpc-h/templates/queries/7.sql +46 -0
- benchbox/_binaries/tpc-h/templates/queries/8.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/9.sql +39 -0
- benchbox/_binaries/tpc-h/templates/variants/12a.sql +27 -0
- benchbox/_binaries/tpc-h/templates/variants/13a.sql +30 -0
- benchbox/_binaries/tpc-h/templates/variants/14a.sql +18 -0
- benchbox/_binaries/tpc-h/templates/variants/15a.sql +39 -0
- benchbox/_binaries/tpc-h/templates/variants/8a.sql +77 -0
- benchbox/base.py +88 -121
- benchbox/cli/benchmarks.py +3 -3
- benchbox/cli/commands/calculate_qphh.py +55 -14
- benchbox/cli/commands/checks.py +1 -4
- benchbox/cli/commands/convert.py +8 -3
- benchbox/cli/commands/metrics.py +55 -14
- benchbox/cli/commands/results.py +131 -3
- benchbox/cli/commands/run.py +157 -22
- benchbox/cli/commands/visualize.py +3 -3
- benchbox/cli/composite_params.py +1 -1
- benchbox/cli/config.py +13 -3
- benchbox/cli/database.py +3 -3
- benchbox/cli/dryrun.py +30 -4
- benchbox/cli/exceptions.py +2 -1
- benchbox/cli/execution_pipeline.py +2 -1
- benchbox/cli/orchestrator.py +25 -71
- benchbox/cli/tuning.py +1 -1
- benchbox/core/ai_primitives/benchmark.py +53 -0
- benchbox/core/ai_primitives/dataframe_operations.py +1217 -0
- benchbox/core/base_benchmark.py +90 -68
- benchbox/core/coffeeshop/queries.py +1 -1
- benchbox/core/coffeeshop/schema.py +1 -1
- benchbox/core/comparison/plotter.py +5 -4
- benchbox/core/dataframe/__init__.py +26 -0
- benchbox/core/dataframe/benchmark_suite.py +5 -4
- benchbox/core/dataframe/context.py +45 -0
- benchbox/core/dataframe/data_loader.py +180 -79
- benchbox/core/dataframe/maintenance_interface.py +866 -0
- benchbox/core/dryrun.py +152 -22
- benchbox/core/expected_results/registry.py +22 -5
- benchbox/core/manifest/io.py +4 -3
- benchbox/core/metadata_primitives/__init__.py +31 -0
- benchbox/core/metadata_primitives/benchmark.py +337 -0
- benchbox/core/metadata_primitives/dataframe_operations.py +1824 -0
- benchbox/core/platform_registry.py +134 -45
- benchbox/core/read_primitives/benchmark.py +56 -4
- benchbox/core/read_primitives/dataframe_queries.py +6547 -0
- benchbox/core/results/__init__.py +47 -6
- benchbox/core/results/builder.py +909 -0
- benchbox/core/results/database.py +5 -5
- benchbox/core/results/exporter.py +58 -96
- benchbox/core/results/filenames.py +102 -0
- benchbox/core/results/loader.py +10 -9
- benchbox/core/results/metrics.py +211 -0
- benchbox/core/results/models.py +3 -1
- benchbox/core/results/normalizer.py +346 -0
- benchbox/core/results/platform_info.py +235 -0
- benchbox/core/results/query_normalizer.py +200 -0
- benchbox/core/results/schema.py +368 -69
- benchbox/core/runner/conversion.py +2 -0
- benchbox/core/runner/dataframe_runner.py +135 -131
- benchbox/core/runner/runner.py +111 -18
- benchbox/core/schemas.py +145 -3
- benchbox/core/ssb/generator.py +14 -2
- benchbox/core/tpc_compliance.py +4 -4
- benchbox/core/tpc_metrics.py +9 -4
- benchbox/core/tpcdi/generator/manifest.py +15 -2
- benchbox/core/tpcds/benchmark/runner.py +3 -7
- benchbox/core/tpcds/c_tools.py +34 -28
- benchbox/core/tpcds/dataframe_queries/queries.py +44 -21
- benchbox/core/tpcds/generator/filesystem.py +23 -11
- benchbox/core/tpcds/generator/manager.py +3 -2
- benchbox/core/tpcds/maintenance_test.py +281 -0
- benchbox/core/tpcds/power_test.py +21 -11
- benchbox/core/tpcds/throughput_test.py +27 -9
- benchbox/core/tpcds_obt/etl/transformer.py +24 -5
- benchbox/core/tpch/dataframe_queries.py +46 -43
- benchbox/core/tpch/generator.py +21 -8
- benchbox/core/tpch/maintenance_test.py +87 -0
- benchbox/core/tpch/power_test.py +21 -5
- benchbox/core/tpch/queries.py +2 -7
- benchbox/core/tpch/streams.py +3 -19
- benchbox/core/transaction_primitives/benchmark.py +99 -0
- benchbox/core/transaction_primitives/dataframe_operations.py +1294 -0
- benchbox/core/transaction_primitives/generator.py +11 -4
- benchbox/core/visualization/__init__.py +2 -2
- benchbox/core/visualization/charts.py +4 -4
- benchbox/core/visualization/dependencies.py +1 -12
- benchbox/core/visualization/exporters.py +15 -26
- benchbox/core/visualization/result_plotter.py +90 -49
- benchbox/core/visualization/templates.py +6 -6
- benchbox/core/write_primitives/__init__.py +13 -0
- benchbox/core/write_primitives/benchmark.py +66 -0
- benchbox/core/write_primitives/dataframe_operations.py +912 -0
- benchbox/core/write_primitives/generator.py +11 -4
- benchbox/mcp/__init__.py +5 -1
- benchbox/mcp/errors.py +29 -0
- benchbox/mcp/resources/registry.py +12 -7
- benchbox/mcp/schemas.py +62 -0
- benchbox/mcp/server.py +17 -14
- benchbox/mcp/tools/__init__.py +3 -0
- benchbox/mcp/tools/analytics.py +550 -582
- benchbox/mcp/tools/benchmark.py +603 -611
- benchbox/mcp/tools/discovery.py +156 -205
- benchbox/mcp/tools/results.py +332 -533
- benchbox/mcp/tools/visualization.py +449 -0
- benchbox/platforms/__init__.py +740 -622
- benchbox/platforms/adapter_factory.py +6 -6
- benchbox/platforms/azure_synapse.py +3 -7
- benchbox/platforms/base/adapter.py +189 -49
- benchbox/platforms/base/cloud_spark/config.py +8 -0
- benchbox/platforms/base/cloud_spark/mixins.py +96 -0
- benchbox/platforms/base/cloud_spark/session.py +4 -2
- benchbox/platforms/base/cloud_spark/staging.py +15 -7
- benchbox/platforms/base/data_loading.py +315 -1
- benchbox/platforms/base/format_capabilities.py +37 -2
- benchbox/platforms/base/utils.py +6 -4
- benchbox/platforms/bigquery.py +5 -6
- benchbox/platforms/clickhouse_cloud.py +263 -0
- benchbox/platforms/databricks/adapter.py +16 -15
- benchbox/platforms/databricks/dataframe_adapter.py +4 -1
- benchbox/platforms/dataframe/__init__.py +31 -0
- benchbox/platforms/dataframe/benchmark_mixin.py +779 -0
- benchbox/platforms/dataframe/cudf_df.py +3 -3
- benchbox/platforms/dataframe/dask_df.py +3 -3
- benchbox/platforms/dataframe/datafusion_df.py +152 -15
- benchbox/platforms/dataframe/delta_lake_maintenance.py +341 -0
- benchbox/platforms/dataframe/ducklake_maintenance.py +402 -0
- benchbox/platforms/dataframe/expression_family.py +47 -8
- benchbox/platforms/dataframe/hudi_maintenance.py +437 -0
- benchbox/platforms/dataframe/iceberg_maintenance.py +605 -0
- benchbox/platforms/dataframe/modin_df.py +3 -3
- benchbox/platforms/dataframe/pandas_df.py +3 -3
- benchbox/platforms/dataframe/pandas_family.py +59 -8
- benchbox/platforms/dataframe/platform_checker.py +16 -49
- benchbox/platforms/dataframe/polars_df.py +14 -12
- benchbox/platforms/dataframe/polars_maintenance.py +630 -0
- benchbox/platforms/dataframe/pyspark_df.py +15 -0
- benchbox/platforms/dataframe/pyspark_maintenance.py +613 -0
- benchbox/platforms/datafusion.py +5 -6
- benchbox/platforms/duckdb.py +2 -1
- benchbox/platforms/fabric_warehouse.py +15 -15
- benchbox/platforms/firebolt.py +3 -2
- benchbox/platforms/influxdb/adapter.py +7 -3
- benchbox/platforms/motherduck.py +3 -2
- benchbox/platforms/onehouse/__init__.py +39 -0
- benchbox/platforms/onehouse/onehouse_client.py +509 -0
- benchbox/platforms/onehouse/quanton_adapter.py +646 -0
- benchbox/platforms/postgresql.py +5 -9
- benchbox/platforms/presto.py +2 -2
- benchbox/platforms/pyspark/session.py +3 -3
- benchbox/platforms/pyspark/sql_adapter.py +2 -3
- benchbox/platforms/redshift.py +7 -7
- benchbox/platforms/snowflake.py +4 -4
- benchbox/platforms/snowpark_connect.py +2 -1
- benchbox/platforms/trino.py +2 -2
- benchbox/release/__init__.py +17 -0
- benchbox/release/content_validation.py +745 -0
- benchbox/release/workflow.py +17 -0
- benchbox/utils/VERSION_MANAGEMENT.md +1 -1
- benchbox/utils/cloud_storage.py +7 -5
- benchbox/utils/compression.py +8 -8
- benchbox/utils/compression_mixin.py +2 -1
- benchbox/utils/data_validation.py +23 -14
- benchbox/utils/dependencies.py +47 -7
- benchbox/utils/file_format.py +407 -0
- benchbox/utils/format_converters/__init__.py +5 -1
- benchbox/utils/format_converters/ducklake_converter.py +227 -0
- benchbox/utils/format_converters/vortex_converter.py +168 -0
- benchbox/utils/tpc_compilation.py +43 -0
- benchbox/utils/version.py +14 -2
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/METADATA +15 -15
- benchbox-0.1.1.dist-info/RECORD +839 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/WHEEL +1 -1
- benchbox/_binaries/tpc-ds/darwin-arm64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/templates.lst +0 -99
- benchbox-0.1.0.dist-info/RECORD +0 -1192
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/README +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/ansi.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/db2.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/netezza.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/oracle.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query1.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query10.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query11.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query12.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query13.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query14.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query15.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query16.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query17.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query18.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query19.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query2.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query20.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query21.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query22.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query23.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query24.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query25.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query26.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query27.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query28.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query29.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query3.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query30.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query31.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query32.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query33.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query34.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query35.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query36.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query37.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query38.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query39.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query4.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query40.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query41.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query42.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query43.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query44.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query45.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query46.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query47.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query48.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query49.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query5.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query50.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query51.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query52.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query53.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query54.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query55.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query56.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query57.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query58.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query59.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query6.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query60.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query61.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query62.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query63.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query64.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query65.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query66.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query67.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query68.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query69.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query7.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query70.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query71.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query73.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query74.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query75.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query76.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query77.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query78.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query79.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query8.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query80.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query81.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query82.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query83.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query84.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query85.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query86.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query87.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query88.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query89.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query9.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query90.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query91.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query92.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query93.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query94.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query95.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query96.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query97.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query98.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query99.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/templates.lst +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/entry_points.txt +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,912 @@
|
|
|
1
|
+
"""DataFrame operations for Write Primitives benchmark.
|
|
2
|
+
|
|
3
|
+
This module provides DataFrame implementations of Write Primitives operations,
|
|
4
|
+
enabling benchmarking of write operations (INSERT, UPDATE, DELETE, MERGE, BULK_LOAD)
|
|
5
|
+
on DataFrame platforms like Polars, Delta Lake, and Iceberg.
|
|
6
|
+
|
|
7
|
+
The module leverages the existing maintenance interface infrastructure from
|
|
8
|
+
benchbox.core.dataframe.maintenance_interface for row-level operations.
|
|
9
|
+
|
|
10
|
+
Platform Support:
|
|
11
|
+
- Polars: Full support via read-modify-write pattern
|
|
12
|
+
- Delta Lake: Native ACID support via deltalake
|
|
13
|
+
- Iceberg: Native ACID support via pyiceberg
|
|
14
|
+
- PySpark: Via Delta Lake or file-based operations
|
|
15
|
+
- Pandas: File-level operations only (INSERT, BULK_LOAD)
|
|
16
|
+
|
|
17
|
+
Copyright 2026 Joe Harris / BenchBox Project
|
|
18
|
+
|
|
19
|
+
Licensed under the MIT License. See LICENSE file in the project root for details.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
import time
|
|
26
|
+
from dataclasses import dataclass, field
|
|
27
|
+
from enum import Enum
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from typing import TYPE_CHECKING, Any
|
|
30
|
+
|
|
31
|
+
from benchbox.core.dataframe.maintenance_interface import (
|
|
32
|
+
DataFrameMaintenanceCapabilities,
|
|
33
|
+
MaintenanceResult,
|
|
34
|
+
get_maintenance_operations_for_platform,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
if TYPE_CHECKING:
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class WriteOperationType(Enum):
|
|
44
|
+
"""Types of write operations supported by the benchmark.
|
|
45
|
+
|
|
46
|
+
These map to Write Primitives benchmark categories and the underlying
|
|
47
|
+
maintenance operations.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
# File-level operations (all platforms)
|
|
51
|
+
INSERT = "insert" # Append rows
|
|
52
|
+
BULK_LOAD = "bulk_load" # Load from files with format options
|
|
53
|
+
|
|
54
|
+
# Row-level operations (Polars via rewrite, Delta Lake/Iceberg native)
|
|
55
|
+
UPDATE = "update" # Modify existing rows
|
|
56
|
+
DELETE = "delete" # Remove rows
|
|
57
|
+
MERGE = "merge" # Upsert operations
|
|
58
|
+
|
|
59
|
+
# Transaction operations (ACID platforms only)
|
|
60
|
+
TRANSACTION = "transaction"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class DataFrameWriteCapabilities:
|
|
65
|
+
"""Platform capabilities for DataFrame write operations.
|
|
66
|
+
|
|
67
|
+
Extended from DataFrameMaintenanceCapabilities to include
|
|
68
|
+
Write Primitives-specific features like BULK_LOAD, compression.
|
|
69
|
+
|
|
70
|
+
Attributes:
|
|
71
|
+
platform_name: Name of the platform
|
|
72
|
+
maintenance_caps: Underlying maintenance capabilities
|
|
73
|
+
supports_bulk_load: Can load from various file formats
|
|
74
|
+
supports_compression: Supports compression options
|
|
75
|
+
supported_compressions: List of supported compression codecs
|
|
76
|
+
supports_partitioning: Supports partition writes
|
|
77
|
+
supports_sorting: Supports sorted writes
|
|
78
|
+
notes: Platform-specific notes
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
platform_name: str
|
|
82
|
+
maintenance_caps: DataFrameMaintenanceCapabilities | None = None
|
|
83
|
+
supports_bulk_load: bool = True # All DataFrame platforms can read files
|
|
84
|
+
supports_compression: bool = True # Most platforms support compression
|
|
85
|
+
supported_compressions: list[str] = field(default_factory=lambda: ["zstd", "snappy", "gzip", "lz4"])
|
|
86
|
+
supports_partitioning: bool = False # PySpark, Polars have partitioning
|
|
87
|
+
supports_sorting: bool = True # Most platforms can sort before write
|
|
88
|
+
notes: str = ""
|
|
89
|
+
|
|
90
|
+
def supports_operation(self, operation: WriteOperationType) -> bool:
|
|
91
|
+
"""Check if an operation type is supported.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
operation: The operation type to check
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
True if the operation is supported
|
|
98
|
+
"""
|
|
99
|
+
# File-level operations always supported
|
|
100
|
+
if operation == WriteOperationType.BULK_LOAD:
|
|
101
|
+
return self.supports_bulk_load
|
|
102
|
+
if operation == WriteOperationType.TRANSACTION:
|
|
103
|
+
return self.maintenance_caps.supports_transactions if self.maintenance_caps else False
|
|
104
|
+
|
|
105
|
+
# Row-level operations depend on maintenance capabilities
|
|
106
|
+
if self.maintenance_caps is None:
|
|
107
|
+
return False
|
|
108
|
+
|
|
109
|
+
mapping = {
|
|
110
|
+
WriteOperationType.INSERT: self.maintenance_caps.supports_insert,
|
|
111
|
+
WriteOperationType.UPDATE: self.maintenance_caps.supports_update,
|
|
112
|
+
WriteOperationType.DELETE: self.maintenance_caps.supports_delete
|
|
113
|
+
or self.maintenance_caps.supports_partitioned_delete,
|
|
114
|
+
WriteOperationType.MERGE: self.maintenance_caps.supports_merge,
|
|
115
|
+
}
|
|
116
|
+
return mapping.get(operation, False)
|
|
117
|
+
|
|
118
|
+
def get_unsupported_operations(self) -> list[WriteOperationType]:
|
|
119
|
+
"""Get list of operations not supported by this platform.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
List of unsupported WriteOperationType values
|
|
123
|
+
"""
|
|
124
|
+
return [op for op in WriteOperationType if not self.supports_operation(op)]
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# Pre-defined capabilities for common DataFrame platforms
|
|
128
|
+
POLARS_WRITE_CAPABILITIES = DataFrameWriteCapabilities(
|
|
129
|
+
platform_name="polars-df",
|
|
130
|
+
maintenance_caps=None, # Set at runtime via get_maintenance_operations_for_platform
|
|
131
|
+
supports_bulk_load=True,
|
|
132
|
+
supports_compression=True,
|
|
133
|
+
supported_compressions=["zstd", "snappy", "gzip", "lz4"],
|
|
134
|
+
supports_partitioning=True, # Polars has partition_by in write_parquet
|
|
135
|
+
supports_sorting=True,
|
|
136
|
+
notes="Full operation support via read-modify-write. RAM-limited for large datasets.",
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
PANDAS_WRITE_CAPABILITIES = DataFrameWriteCapabilities(
|
|
140
|
+
platform_name="pandas-df",
|
|
141
|
+
maintenance_caps=None,
|
|
142
|
+
supports_bulk_load=True,
|
|
143
|
+
supports_compression=True,
|
|
144
|
+
supported_compressions=["snappy", "gzip", "brotli"],
|
|
145
|
+
supports_partitioning=False,
|
|
146
|
+
supports_sorting=True,
|
|
147
|
+
notes="File-level operations only. Use Polars or PySpark for row-level operations.",
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
PYSPARK_WRITE_CAPABILITIES = DataFrameWriteCapabilities(
|
|
151
|
+
platform_name="pyspark-df",
|
|
152
|
+
maintenance_caps=None, # Depends on underlying table format (Delta/Iceberg)
|
|
153
|
+
supports_bulk_load=True,
|
|
154
|
+
supports_compression=True,
|
|
155
|
+
supported_compressions=["zstd", "snappy", "gzip", "lz4"],
|
|
156
|
+
supports_partitioning=True, # partitionBy
|
|
157
|
+
supports_sorting=True, # orderBy
|
|
158
|
+
notes="Row-level operations require Delta Lake or Iceberg table format.",
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@dataclass
|
|
163
|
+
class DataFrameWriteResult:
|
|
164
|
+
"""Result of a DataFrame write operation.
|
|
165
|
+
|
|
166
|
+
Extends MaintenanceResult with Write Primitives-specific metrics.
|
|
167
|
+
|
|
168
|
+
Attributes:
|
|
169
|
+
operation_type: Type of write operation
|
|
170
|
+
success: Whether the operation completed successfully
|
|
171
|
+
start_time: Operation start timestamp (Unix time)
|
|
172
|
+
end_time: Operation end timestamp (Unix time)
|
|
173
|
+
duration_ms: Operation duration in milliseconds
|
|
174
|
+
rows_affected: Number of rows written/modified
|
|
175
|
+
bytes_written: Bytes written (if available)
|
|
176
|
+
compression: Compression codec used
|
|
177
|
+
file_count: Number of files written
|
|
178
|
+
error_message: Error description if operation failed
|
|
179
|
+
validation_passed: Whether validation checks passed
|
|
180
|
+
validation_results: Details of validation checks
|
|
181
|
+
metrics: Additional platform-specific metrics
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
operation_type: WriteOperationType
|
|
185
|
+
success: bool
|
|
186
|
+
start_time: float
|
|
187
|
+
end_time: float
|
|
188
|
+
duration_ms: float
|
|
189
|
+
rows_affected: int
|
|
190
|
+
bytes_written: int | None = None
|
|
191
|
+
compression: str | None = None
|
|
192
|
+
file_count: int | None = None
|
|
193
|
+
error_message: str | None = None
|
|
194
|
+
validation_passed: bool = True
|
|
195
|
+
validation_results: list[dict[str, Any]] = field(default_factory=list)
|
|
196
|
+
metrics: dict[str, Any] = field(default_factory=dict)
|
|
197
|
+
|
|
198
|
+
@classmethod
|
|
199
|
+
def from_maintenance_result(
|
|
200
|
+
cls,
|
|
201
|
+
maintenance_result: MaintenanceResult,
|
|
202
|
+
operation_type: WriteOperationType,
|
|
203
|
+
**extra_fields: Any,
|
|
204
|
+
) -> DataFrameWriteResult:
|
|
205
|
+
"""Create from a MaintenanceResult.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
maintenance_result: Underlying maintenance result
|
|
209
|
+
operation_type: Write operation type
|
|
210
|
+
**extra_fields: Additional fields to set
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
DataFrameWriteResult instance
|
|
214
|
+
"""
|
|
215
|
+
return cls(
|
|
216
|
+
operation_type=operation_type,
|
|
217
|
+
success=maintenance_result.success,
|
|
218
|
+
start_time=maintenance_result.start_time,
|
|
219
|
+
end_time=maintenance_result.end_time,
|
|
220
|
+
duration_ms=maintenance_result.duration * 1000, # Convert to ms
|
|
221
|
+
rows_affected=maintenance_result.rows_affected,
|
|
222
|
+
error_message=maintenance_result.error_message,
|
|
223
|
+
metrics=maintenance_result.metrics,
|
|
224
|
+
**extra_fields,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
@classmethod
|
|
228
|
+
def failure(
|
|
229
|
+
cls,
|
|
230
|
+
operation_type: WriteOperationType,
|
|
231
|
+
error_message: str,
|
|
232
|
+
start_time: float | None = None,
|
|
233
|
+
) -> DataFrameWriteResult:
|
|
234
|
+
"""Create a failure result.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
operation_type: The operation that failed
|
|
238
|
+
error_message: Description of the failure
|
|
239
|
+
start_time: Optional start time (defaults to now)
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
DataFrameWriteResult indicating failure
|
|
243
|
+
"""
|
|
244
|
+
now = time.time()
|
|
245
|
+
return cls(
|
|
246
|
+
operation_type=operation_type,
|
|
247
|
+
success=False,
|
|
248
|
+
start_time=start_time or now,
|
|
249
|
+
end_time=now,
|
|
250
|
+
duration_ms=0.0 if start_time is None else (now - start_time) * 1000,
|
|
251
|
+
rows_affected=0,
|
|
252
|
+
error_message=error_message,
|
|
253
|
+
validation_passed=False,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class DataFrameWriteOperationsManager:
|
|
258
|
+
"""Manager for DataFrame write operations.
|
|
259
|
+
|
|
260
|
+
Wraps the maintenance operations interface with Write Primitives-specific
|
|
261
|
+
functionality including BULK_LOAD, validation, and result formatting.
|
|
262
|
+
|
|
263
|
+
Example:
|
|
264
|
+
manager = DataFrameWriteOperationsManager("polars-df")
|
|
265
|
+
|
|
266
|
+
# Check capabilities
|
|
267
|
+
if manager.supports_operation(WriteOperationType.UPDATE):
|
|
268
|
+
result = manager.execute_update(
|
|
269
|
+
table_path="/data/orders",
|
|
270
|
+
condition="status = 'pending'",
|
|
271
|
+
updates={"status": "'cancelled'"}
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# Bulk load with options
|
|
275
|
+
result = manager.execute_bulk_load(
|
|
276
|
+
source_path="/data/raw/orders.csv",
|
|
277
|
+
target_path="/data/orders",
|
|
278
|
+
format="csv",
|
|
279
|
+
compression="zstd"
|
|
280
|
+
)
|
|
281
|
+
"""
|
|
282
|
+
|
|
283
|
+
def __init__(self, platform_name: str, spark_session: Any = None) -> None:
|
|
284
|
+
"""Initialize the write operations manager.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
platform_name: Platform name (e.g., "polars-df", "pyspark-df")
|
|
288
|
+
spark_session: SparkSession instance (required for pyspark-df)
|
|
289
|
+
|
|
290
|
+
Raises:
|
|
291
|
+
ValueError: If platform is not supported for DataFrame operations
|
|
292
|
+
"""
|
|
293
|
+
self.platform_name = platform_name.lower()
|
|
294
|
+
self.spark_session = spark_session
|
|
295
|
+
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
|
296
|
+
|
|
297
|
+
# Get maintenance operations handler
|
|
298
|
+
self._maintenance_ops = self._get_maintenance_ops()
|
|
299
|
+
|
|
300
|
+
# Build capabilities
|
|
301
|
+
self._capabilities = self._build_capabilities()
|
|
302
|
+
|
|
303
|
+
def _get_maintenance_ops(self) -> Any:
|
|
304
|
+
"""Get the appropriate maintenance operations handler.
|
|
305
|
+
|
|
306
|
+
For PySpark, uses the dedicated PySpark maintenance module.
|
|
307
|
+
For other platforms, uses the generic maintenance interface.
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
Maintenance operations handler or None
|
|
311
|
+
"""
|
|
312
|
+
if "pyspark" in self.platform_name or "spark" in self.platform_name:
|
|
313
|
+
if self.spark_session is not None:
|
|
314
|
+
try:
|
|
315
|
+
from benchbox.platforms.dataframe.pyspark_maintenance import (
|
|
316
|
+
get_pyspark_maintenance_operations,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
return get_pyspark_maintenance_operations(
|
|
320
|
+
spark_session=self.spark_session,
|
|
321
|
+
prefer_delta=True,
|
|
322
|
+
)
|
|
323
|
+
except ImportError:
|
|
324
|
+
self.logger.debug("PySpark maintenance module not available")
|
|
325
|
+
return None
|
|
326
|
+
else:
|
|
327
|
+
self.logger.debug("No SparkSession provided for PySpark platform")
|
|
328
|
+
return None
|
|
329
|
+
|
|
330
|
+
return get_maintenance_operations_for_platform(self.platform_name)
|
|
331
|
+
|
|
332
|
+
def _build_capabilities(self) -> DataFrameWriteCapabilities:
|
|
333
|
+
"""Build platform capabilities.
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
DataFrameWriteCapabilities for this platform
|
|
337
|
+
"""
|
|
338
|
+
maintenance_caps = None
|
|
339
|
+
if self._maintenance_ops is not None:
|
|
340
|
+
maintenance_caps = self._maintenance_ops.get_capabilities()
|
|
341
|
+
|
|
342
|
+
# Use platform-specific defaults
|
|
343
|
+
if "polars" in self.platform_name:
|
|
344
|
+
caps = DataFrameWriteCapabilities(
|
|
345
|
+
platform_name=self.platform_name,
|
|
346
|
+
maintenance_caps=maintenance_caps,
|
|
347
|
+
supports_bulk_load=True,
|
|
348
|
+
supports_compression=True,
|
|
349
|
+
supported_compressions=["zstd", "snappy", "gzip", "lz4"],
|
|
350
|
+
supports_partitioning=True,
|
|
351
|
+
supports_sorting=True,
|
|
352
|
+
notes="Full operation support via read-modify-write.",
|
|
353
|
+
)
|
|
354
|
+
elif "pandas" in self.platform_name:
|
|
355
|
+
caps = DataFrameWriteCapabilities(
|
|
356
|
+
platform_name=self.platform_name,
|
|
357
|
+
maintenance_caps=maintenance_caps,
|
|
358
|
+
supports_bulk_load=True,
|
|
359
|
+
supports_compression=True,
|
|
360
|
+
supported_compressions=["snappy", "gzip", "brotli"],
|
|
361
|
+
supports_partitioning=False,
|
|
362
|
+
supports_sorting=True,
|
|
363
|
+
notes="File-level operations only.",
|
|
364
|
+
)
|
|
365
|
+
elif "pyspark" in self.platform_name or "spark" in self.platform_name:
|
|
366
|
+
caps = DataFrameWriteCapabilities(
|
|
367
|
+
platform_name=self.platform_name,
|
|
368
|
+
maintenance_caps=maintenance_caps,
|
|
369
|
+
supports_bulk_load=True,
|
|
370
|
+
supports_compression=True,
|
|
371
|
+
supported_compressions=["zstd", "snappy", "gzip", "lz4"],
|
|
372
|
+
supports_partitioning=True,
|
|
373
|
+
supports_sorting=True,
|
|
374
|
+
notes="Row-level operations require Delta Lake table format.",
|
|
375
|
+
)
|
|
376
|
+
else:
|
|
377
|
+
# Generic capabilities
|
|
378
|
+
caps = DataFrameWriteCapabilities(
|
|
379
|
+
platform_name=self.platform_name,
|
|
380
|
+
maintenance_caps=maintenance_caps,
|
|
381
|
+
supports_bulk_load=True,
|
|
382
|
+
supports_compression=True,
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
return caps
|
|
386
|
+
|
|
387
|
+
def get_capabilities(self) -> DataFrameWriteCapabilities:
|
|
388
|
+
"""Get platform write capabilities.
|
|
389
|
+
|
|
390
|
+
Returns:
|
|
391
|
+
DataFrameWriteCapabilities for this platform
|
|
392
|
+
"""
|
|
393
|
+
return self._capabilities
|
|
394
|
+
|
|
395
|
+
def supports_operation(self, operation: WriteOperationType) -> bool:
|
|
396
|
+
"""Check if an operation type is supported.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
operation: The operation to check
|
|
400
|
+
|
|
401
|
+
Returns:
|
|
402
|
+
True if supported
|
|
403
|
+
"""
|
|
404
|
+
return self._capabilities.supports_operation(operation)
|
|
405
|
+
|
|
406
|
+
def get_unsupported_message(self, operation: WriteOperationType) -> str:
|
|
407
|
+
"""Get error message for unsupported operation.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
operation: The unsupported operation
|
|
411
|
+
|
|
412
|
+
Returns:
|
|
413
|
+
Helpful error message with alternatives
|
|
414
|
+
"""
|
|
415
|
+
if operation in (WriteOperationType.UPDATE, WriteOperationType.DELETE, WriteOperationType.MERGE):
|
|
416
|
+
return (
|
|
417
|
+
f"{self.platform_name} does not support {operation.value} operations in the current configuration.\n"
|
|
418
|
+
f"Alternatives:\n"
|
|
419
|
+
f" - Use polars-df (supports row-level operations via read-modify-write)\n"
|
|
420
|
+
f" - Use pyspark-df with Delta Lake table format\n"
|
|
421
|
+
f" - Use file-level INSERT/BULK_LOAD operations instead"
|
|
422
|
+
)
|
|
423
|
+
if operation == WriteOperationType.TRANSACTION:
|
|
424
|
+
return (
|
|
425
|
+
f"{self.platform_name} does not support explicit transactions.\n"
|
|
426
|
+
f"Use Delta Lake or Iceberg for ACID transaction support."
|
|
427
|
+
)
|
|
428
|
+
return f"{self.platform_name} does not support {operation.value} operations."
|
|
429
|
+
|
|
430
|
+
def execute_insert(
|
|
431
|
+
self,
|
|
432
|
+
table_path: Path | str,
|
|
433
|
+
dataframe: Any,
|
|
434
|
+
partition_columns: list[str] | None = None,
|
|
435
|
+
mode: str = "append",
|
|
436
|
+
) -> DataFrameWriteResult:
|
|
437
|
+
"""Execute INSERT operation.
|
|
438
|
+
|
|
439
|
+
Args:
|
|
440
|
+
table_path: Path to the table directory
|
|
441
|
+
dataframe: DataFrame containing rows to insert
|
|
442
|
+
partition_columns: Columns to partition by
|
|
443
|
+
mode: Write mode ("append" or "overwrite")
|
|
444
|
+
|
|
445
|
+
Returns:
|
|
446
|
+
DataFrameWriteResult with operation outcome
|
|
447
|
+
"""
|
|
448
|
+
if not self.supports_operation(WriteOperationType.INSERT):
|
|
449
|
+
return DataFrameWriteResult.failure(
|
|
450
|
+
WriteOperationType.INSERT,
|
|
451
|
+
self.get_unsupported_message(WriteOperationType.INSERT),
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
if self._maintenance_ops is None:
|
|
455
|
+
return DataFrameWriteResult.failure(
|
|
456
|
+
WriteOperationType.INSERT,
|
|
457
|
+
f"Maintenance operations not available for {self.platform_name}",
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
result = self._maintenance_ops.insert_rows(
|
|
461
|
+
table_path=table_path,
|
|
462
|
+
dataframe=dataframe,
|
|
463
|
+
partition_columns=partition_columns,
|
|
464
|
+
mode=mode,
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
return DataFrameWriteResult.from_maintenance_result(
|
|
468
|
+
result,
|
|
469
|
+
WriteOperationType.INSERT,
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
def execute_update(
|
|
473
|
+
self,
|
|
474
|
+
table_path: Path | str,
|
|
475
|
+
condition: str,
|
|
476
|
+
updates: dict[str, Any],
|
|
477
|
+
) -> DataFrameWriteResult:
|
|
478
|
+
"""Execute UPDATE operation.
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
table_path: Path to the table directory
|
|
482
|
+
condition: SQL-like condition string
|
|
483
|
+
updates: Column name to new value mapping
|
|
484
|
+
|
|
485
|
+
Returns:
|
|
486
|
+
DataFrameWriteResult with operation outcome
|
|
487
|
+
"""
|
|
488
|
+
if not self.supports_operation(WriteOperationType.UPDATE):
|
|
489
|
+
return DataFrameWriteResult.failure(
|
|
490
|
+
WriteOperationType.UPDATE,
|
|
491
|
+
self.get_unsupported_message(WriteOperationType.UPDATE),
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
if self._maintenance_ops is None:
|
|
495
|
+
return DataFrameWriteResult.failure(
|
|
496
|
+
WriteOperationType.UPDATE,
|
|
497
|
+
f"Maintenance operations not available for {self.platform_name}",
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
result = self._maintenance_ops.update_rows(
|
|
501
|
+
table_path=table_path,
|
|
502
|
+
condition=condition,
|
|
503
|
+
updates=updates,
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
return DataFrameWriteResult.from_maintenance_result(
|
|
507
|
+
result,
|
|
508
|
+
WriteOperationType.UPDATE,
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
def execute_delete(
|
|
512
|
+
self,
|
|
513
|
+
table_path: Path | str,
|
|
514
|
+
condition: str,
|
|
515
|
+
) -> DataFrameWriteResult:
|
|
516
|
+
"""Execute DELETE operation.
|
|
517
|
+
|
|
518
|
+
Args:
|
|
519
|
+
table_path: Path to the table directory
|
|
520
|
+
condition: SQL-like condition string
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
DataFrameWriteResult with operation outcome
|
|
524
|
+
"""
|
|
525
|
+
if not self.supports_operation(WriteOperationType.DELETE):
|
|
526
|
+
return DataFrameWriteResult.failure(
|
|
527
|
+
WriteOperationType.DELETE,
|
|
528
|
+
self.get_unsupported_message(WriteOperationType.DELETE),
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
if self._maintenance_ops is None:
|
|
532
|
+
return DataFrameWriteResult.failure(
|
|
533
|
+
WriteOperationType.DELETE,
|
|
534
|
+
f"Maintenance operations not available for {self.platform_name}",
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
result = self._maintenance_ops.delete_rows(
|
|
538
|
+
table_path=table_path,
|
|
539
|
+
condition=condition,
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
return DataFrameWriteResult.from_maintenance_result(
|
|
543
|
+
result,
|
|
544
|
+
WriteOperationType.DELETE,
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
def execute_merge(
|
|
548
|
+
self,
|
|
549
|
+
table_path: Path | str,
|
|
550
|
+
source_dataframe: Any,
|
|
551
|
+
merge_condition: str,
|
|
552
|
+
when_matched: dict[str, Any] | None = None,
|
|
553
|
+
when_not_matched: dict[str, Any] | None = None,
|
|
554
|
+
) -> DataFrameWriteResult:
|
|
555
|
+
"""Execute MERGE (upsert) operation.
|
|
556
|
+
|
|
557
|
+
Args:
|
|
558
|
+
table_path: Path to the target table
|
|
559
|
+
source_dataframe: DataFrame containing source rows
|
|
560
|
+
merge_condition: Join condition for matching rows
|
|
561
|
+
when_matched: Updates to apply when matched
|
|
562
|
+
when_not_matched: Values for insert when not matched
|
|
563
|
+
|
|
564
|
+
Returns:
|
|
565
|
+
DataFrameWriteResult with operation outcome
|
|
566
|
+
"""
|
|
567
|
+
if not self.supports_operation(WriteOperationType.MERGE):
|
|
568
|
+
return DataFrameWriteResult.failure(
|
|
569
|
+
WriteOperationType.MERGE,
|
|
570
|
+
self.get_unsupported_message(WriteOperationType.MERGE),
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
if self._maintenance_ops is None:
|
|
574
|
+
return DataFrameWriteResult.failure(
|
|
575
|
+
WriteOperationType.MERGE,
|
|
576
|
+
f"Maintenance operations not available for {self.platform_name}",
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
result = self._maintenance_ops.merge_rows(
|
|
580
|
+
table_path=table_path,
|
|
581
|
+
source_dataframe=source_dataframe,
|
|
582
|
+
merge_condition=merge_condition,
|
|
583
|
+
when_matched=when_matched,
|
|
584
|
+
when_not_matched=when_not_matched,
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
return DataFrameWriteResult.from_maintenance_result(
|
|
588
|
+
result,
|
|
589
|
+
WriteOperationType.MERGE,
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
def execute_bulk_load(
|
|
593
|
+
self,
|
|
594
|
+
source_path: Path | str,
|
|
595
|
+
target_path: Path | str,
|
|
596
|
+
source_format: str = "parquet",
|
|
597
|
+
target_format: str = "parquet",
|
|
598
|
+
compression: str | None = "zstd",
|
|
599
|
+
partition_columns: list[str] | None = None,
|
|
600
|
+
sort_columns: list[str] | None = None,
|
|
601
|
+
) -> DataFrameWriteResult:
|
|
602
|
+
"""Execute BULK_LOAD operation.
|
|
603
|
+
|
|
604
|
+
Reads data from source files and writes to target with specified options.
|
|
605
|
+
|
|
606
|
+
Args:
|
|
607
|
+
source_path: Path to source data files
|
|
608
|
+
target_path: Path to write target data
|
|
609
|
+
source_format: Source file format ("parquet", "csv", "json")
|
|
610
|
+
target_format: Target file format ("parquet")
|
|
611
|
+
compression: Compression codec (None, "zstd", "snappy", "gzip", "lz4")
|
|
612
|
+
partition_columns: Columns to partition by
|
|
613
|
+
sort_columns: Columns to sort by before writing
|
|
614
|
+
|
|
615
|
+
Returns:
|
|
616
|
+
DataFrameWriteResult with operation outcome
|
|
617
|
+
"""
|
|
618
|
+
if not self.supports_operation(WriteOperationType.BULK_LOAD):
|
|
619
|
+
return DataFrameWriteResult.failure(
|
|
620
|
+
WriteOperationType.BULK_LOAD,
|
|
621
|
+
self.get_unsupported_message(WriteOperationType.BULK_LOAD),
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
start_time = time.time()
|
|
625
|
+
source_path = Path(source_path)
|
|
626
|
+
target_path = Path(target_path)
|
|
627
|
+
|
|
628
|
+
try:
|
|
629
|
+
# Platform-specific bulk load implementation
|
|
630
|
+
if "polars" in self.platform_name:
|
|
631
|
+
rows, bytes_written, file_count = self._bulk_load_polars(
|
|
632
|
+
source_path,
|
|
633
|
+
target_path,
|
|
634
|
+
source_format,
|
|
635
|
+
target_format,
|
|
636
|
+
compression,
|
|
637
|
+
partition_columns,
|
|
638
|
+
sort_columns,
|
|
639
|
+
)
|
|
640
|
+
elif "pandas" in self.platform_name:
|
|
641
|
+
rows, bytes_written, file_count = self._bulk_load_pandas(
|
|
642
|
+
source_path,
|
|
643
|
+
target_path,
|
|
644
|
+
source_format,
|
|
645
|
+
target_format,
|
|
646
|
+
compression,
|
|
647
|
+
sort_columns,
|
|
648
|
+
)
|
|
649
|
+
elif "pyspark" in self.platform_name or "spark" in self.platform_name:
|
|
650
|
+
rows, bytes_written, file_count = self._bulk_load_pyspark(
|
|
651
|
+
source_path,
|
|
652
|
+
target_path,
|
|
653
|
+
source_format,
|
|
654
|
+
target_format,
|
|
655
|
+
compression,
|
|
656
|
+
partition_columns,
|
|
657
|
+
sort_columns,
|
|
658
|
+
)
|
|
659
|
+
else:
|
|
660
|
+
return DataFrameWriteResult.failure(
|
|
661
|
+
WriteOperationType.BULK_LOAD,
|
|
662
|
+
f"BULK_LOAD not implemented for {self.platform_name}",
|
|
663
|
+
start_time,
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
end_time = time.time()
|
|
667
|
+
return DataFrameWriteResult(
|
|
668
|
+
operation_type=WriteOperationType.BULK_LOAD,
|
|
669
|
+
success=True,
|
|
670
|
+
start_time=start_time,
|
|
671
|
+
end_time=end_time,
|
|
672
|
+
duration_ms=(end_time - start_time) * 1000,
|
|
673
|
+
rows_affected=rows,
|
|
674
|
+
bytes_written=bytes_written,
|
|
675
|
+
compression=compression,
|
|
676
|
+
file_count=file_count,
|
|
677
|
+
)
|
|
678
|
+
|
|
679
|
+
except Exception as e:
|
|
680
|
+
self.logger.error(f"BULK_LOAD failed: {e}")
|
|
681
|
+
return DataFrameWriteResult.failure(
|
|
682
|
+
WriteOperationType.BULK_LOAD,
|
|
683
|
+
str(e),
|
|
684
|
+
start_time,
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
def _bulk_load_polars(
|
|
688
|
+
self,
|
|
689
|
+
source_path: Path,
|
|
690
|
+
target_path: Path,
|
|
691
|
+
source_format: str,
|
|
692
|
+
target_format: str,
|
|
693
|
+
compression: str | None,
|
|
694
|
+
partition_columns: list[str] | None,
|
|
695
|
+
sort_columns: list[str] | None,
|
|
696
|
+
) -> tuple[int, int | None, int]:
|
|
697
|
+
"""Polars-specific bulk load implementation.
|
|
698
|
+
|
|
699
|
+
Returns:
|
|
700
|
+
Tuple of (rows_written, bytes_written, file_count)
|
|
701
|
+
"""
|
|
702
|
+
try:
|
|
703
|
+
import polars as pl
|
|
704
|
+
except ImportError as e:
|
|
705
|
+
raise ImportError("Polars is required for polars-df bulk load") from e
|
|
706
|
+
|
|
707
|
+
# Read source data
|
|
708
|
+
if source_format == "parquet":
|
|
709
|
+
df = pl.scan_parquet(source_path).collect()
|
|
710
|
+
elif source_format == "csv":
|
|
711
|
+
df = pl.scan_csv(source_path).collect()
|
|
712
|
+
elif source_format == "json":
|
|
713
|
+
df = pl.read_json(source_path)
|
|
714
|
+
else:
|
|
715
|
+
raise ValueError(f"Unsupported source format: {source_format}")
|
|
716
|
+
|
|
717
|
+
row_count = df.height
|
|
718
|
+
|
|
719
|
+
# Apply sorting
|
|
720
|
+
if sort_columns:
|
|
721
|
+
df = df.sort(sort_columns)
|
|
722
|
+
|
|
723
|
+
# Write to target
|
|
724
|
+
target_path.mkdir(parents=True, exist_ok=True)
|
|
725
|
+
|
|
726
|
+
if partition_columns:
|
|
727
|
+
# Partitioned write
|
|
728
|
+
for partition_vals, partition_df in df.group_by(partition_columns):
|
|
729
|
+
if isinstance(partition_vals, tuple):
|
|
730
|
+
parts = zip(partition_columns, partition_vals)
|
|
731
|
+
else:
|
|
732
|
+
parts = [(partition_columns[0], partition_vals)]
|
|
733
|
+
|
|
734
|
+
partition_path = target_path
|
|
735
|
+
for col, val in parts:
|
|
736
|
+
partition_path = partition_path / f"{col}={val}"
|
|
737
|
+
|
|
738
|
+
partition_path.mkdir(parents=True, exist_ok=True)
|
|
739
|
+
partition_df.write_parquet(
|
|
740
|
+
partition_path / "part-00000.parquet",
|
|
741
|
+
compression=compression or "uncompressed",
|
|
742
|
+
)
|
|
743
|
+
file_count = len(list(target_path.rglob("*.parquet")))
|
|
744
|
+
else:
|
|
745
|
+
# Single file write
|
|
746
|
+
output_file = target_path / "part-00000.parquet"
|
|
747
|
+
df.write_parquet(
|
|
748
|
+
output_file,
|
|
749
|
+
compression=compression or "uncompressed",
|
|
750
|
+
)
|
|
751
|
+
file_count = 1
|
|
752
|
+
|
|
753
|
+
# Estimate bytes written
|
|
754
|
+
bytes_written = sum(f.stat().st_size for f in target_path.rglob("*.parquet"))
|
|
755
|
+
|
|
756
|
+
return row_count, bytes_written, file_count
|
|
757
|
+
|
|
758
|
+
def _bulk_load_pandas(
|
|
759
|
+
self,
|
|
760
|
+
source_path: Path,
|
|
761
|
+
target_path: Path,
|
|
762
|
+
source_format: str,
|
|
763
|
+
target_format: str,
|
|
764
|
+
compression: str | None,
|
|
765
|
+
sort_columns: list[str] | None,
|
|
766
|
+
) -> tuple[int, int | None, int]:
|
|
767
|
+
"""Pandas-specific bulk load implementation.
|
|
768
|
+
|
|
769
|
+
Returns:
|
|
770
|
+
Tuple of (rows_written, bytes_written, file_count)
|
|
771
|
+
"""
|
|
772
|
+
try:
|
|
773
|
+
import pandas as pd
|
|
774
|
+
except ImportError as e:
|
|
775
|
+
raise ImportError("Pandas is required for pandas-df bulk load") from e
|
|
776
|
+
|
|
777
|
+
# Read source data
|
|
778
|
+
if source_format == "parquet":
|
|
779
|
+
df = pd.read_parquet(source_path)
|
|
780
|
+
elif source_format == "csv":
|
|
781
|
+
df = pd.read_csv(source_path)
|
|
782
|
+
elif source_format == "json":
|
|
783
|
+
df = pd.read_json(source_path)
|
|
784
|
+
else:
|
|
785
|
+
raise ValueError(f"Unsupported source format: {source_format}")
|
|
786
|
+
|
|
787
|
+
row_count = len(df)
|
|
788
|
+
|
|
789
|
+
# Apply sorting
|
|
790
|
+
if sort_columns:
|
|
791
|
+
df = df.sort_values(sort_columns)
|
|
792
|
+
|
|
793
|
+
# Write to target
|
|
794
|
+
target_path.mkdir(parents=True, exist_ok=True)
|
|
795
|
+
output_file = target_path / "part-00000.parquet"
|
|
796
|
+
df.to_parquet(
|
|
797
|
+
output_file,
|
|
798
|
+
compression=compression or "snappy",
|
|
799
|
+
index=False,
|
|
800
|
+
)
|
|
801
|
+
|
|
802
|
+
bytes_written = output_file.stat().st_size
|
|
803
|
+
|
|
804
|
+
return row_count, bytes_written, 1
|
|
805
|
+
|
|
806
|
+
def _bulk_load_pyspark(
|
|
807
|
+
self,
|
|
808
|
+
source_path: Path,
|
|
809
|
+
target_path: Path,
|
|
810
|
+
source_format: str,
|
|
811
|
+
target_format: str,
|
|
812
|
+
compression: str | None,
|
|
813
|
+
partition_columns: list[str] | None,
|
|
814
|
+
sort_columns: list[str] | None,
|
|
815
|
+
) -> tuple[int, int | None, int]:
|
|
816
|
+
"""PySpark-specific bulk load implementation using DataFrame API.
|
|
817
|
+
|
|
818
|
+
Uses spark.read.format().load() and df.write.format().save() pattern.
|
|
819
|
+
|
|
820
|
+
Returns:
|
|
821
|
+
Tuple of (rows_written, bytes_written, file_count)
|
|
822
|
+
"""
|
|
823
|
+
if self.spark_session is None:
|
|
824
|
+
raise ValueError(
|
|
825
|
+
"SparkSession is required for PySpark bulk load. "
|
|
826
|
+
"Pass spark_session to DataFrameWriteOperationsManager or use get_pyspark_write_manager()."
|
|
827
|
+
)
|
|
828
|
+
|
|
829
|
+
spark = self.spark_session
|
|
830
|
+
source_str = str(source_path)
|
|
831
|
+
target_str = str(target_path)
|
|
832
|
+
|
|
833
|
+
# Read source data using DataFrame API
|
|
834
|
+
reader = spark.read.format(source_format)
|
|
835
|
+
|
|
836
|
+
if source_format == "csv":
|
|
837
|
+
reader = reader.option("header", "true").option("inferSchema", "true")
|
|
838
|
+
|
|
839
|
+
df = reader.load(source_str)
|
|
840
|
+
row_count = df.count()
|
|
841
|
+
|
|
842
|
+
if row_count == 0:
|
|
843
|
+
self.logger.info("No rows to load")
|
|
844
|
+
return 0, 0, 0
|
|
845
|
+
|
|
846
|
+
# Apply sorting
|
|
847
|
+
if sort_columns:
|
|
848
|
+
df = df.orderBy(*sort_columns)
|
|
849
|
+
|
|
850
|
+
# Build writer
|
|
851
|
+
writer = df.write.mode("overwrite")
|
|
852
|
+
|
|
853
|
+
if partition_columns:
|
|
854
|
+
writer = writer.partitionBy(*partition_columns)
|
|
855
|
+
|
|
856
|
+
if compression:
|
|
857
|
+
writer = writer.option("compression", compression)
|
|
858
|
+
|
|
859
|
+
# Write using DataFrame API
|
|
860
|
+
if target_format == "delta":
|
|
861
|
+
writer.format("delta").save(target_str)
|
|
862
|
+
else:
|
|
863
|
+
writer.parquet(target_str)
|
|
864
|
+
|
|
865
|
+
# Estimate bytes written and file count
|
|
866
|
+
target_path.mkdir(parents=True, exist_ok=True)
|
|
867
|
+
parquet_files = list(target_path.rglob("*.parquet"))
|
|
868
|
+
file_count = len(parquet_files)
|
|
869
|
+
bytes_written = sum(f.stat().st_size for f in parquet_files)
|
|
870
|
+
|
|
871
|
+
return row_count, bytes_written, file_count
|
|
872
|
+
|
|
873
|
+
|
|
874
|
+
def get_dataframe_write_manager(
|
|
875
|
+
platform_name: str,
|
|
876
|
+
spark_session: Any = None,
|
|
877
|
+
) -> DataFrameWriteOperationsManager | None:
|
|
878
|
+
"""Get a DataFrame write operations manager for a platform.
|
|
879
|
+
|
|
880
|
+
Args:
|
|
881
|
+
platform_name: Platform name (e.g., "polars-df", "pandas-df", "pyspark-df")
|
|
882
|
+
spark_session: SparkSession instance (required for pyspark-df)
|
|
883
|
+
|
|
884
|
+
Returns:
|
|
885
|
+
DataFrameWriteOperationsManager if platform supports DataFrame writes,
|
|
886
|
+
None if platform is not a DataFrame platform.
|
|
887
|
+
"""
|
|
888
|
+
platform_lower = platform_name.lower()
|
|
889
|
+
|
|
890
|
+
# Check if this is a DataFrame platform
|
|
891
|
+
df_platforms = ("polars-df", "polars", "pandas-df", "pandas", "pyspark-df", "pyspark")
|
|
892
|
+
if not any(p in platform_lower for p in df_platforms):
|
|
893
|
+
logger.debug(f"Platform {platform_name} is not a DataFrame platform")
|
|
894
|
+
return None
|
|
895
|
+
|
|
896
|
+
try:
|
|
897
|
+
return DataFrameWriteOperationsManager(platform_name, spark_session=spark_session)
|
|
898
|
+
except Exception as e:
|
|
899
|
+
logger.warning(f"Failed to create write manager for {platform_name}: {e}")
|
|
900
|
+
return None
|
|
901
|
+
|
|
902
|
+
|
|
903
|
+
__all__ = [
|
|
904
|
+
"WriteOperationType",
|
|
905
|
+
"DataFrameWriteCapabilities",
|
|
906
|
+
"DataFrameWriteResult",
|
|
907
|
+
"DataFrameWriteOperationsManager",
|
|
908
|
+
"get_dataframe_write_manager",
|
|
909
|
+
"POLARS_WRITE_CAPABILITIES",
|
|
910
|
+
"PANDAS_WRITE_CAPABILITIES",
|
|
911
|
+
"PYSPARK_WRITE_CAPABILITIES",
|
|
912
|
+
]
|