benchbox 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchbox/__init__.py +1 -1
- benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query72.tpl +1 -1
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/ansi.tpl → templates/query_templates/sqlserver.tpl} +1 -1
- benchbox/_binaries/tpc-ds/templates/query_variants/README +6 -0
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query10.tpl → templates/query_variants/query10a.tpl} +13 -14
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/query14.tpl → templates/query_variants/query14a.tpl} +30 -26
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query18.tpl → templates/query_variants/query18a.tpl} +40 -19
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query22.tpl → templates/query_variants/query22a.tpl} +31 -9
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/query27.tpl → templates/query_variants/query27a.tpl} +23 -10
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query35.tpl → templates/query_variants/query35a.tpl} +9 -8
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query36.tpl → templates/query_variants/query36a.tpl} +24 -12
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query51.tpl → templates/query_variants/query51a.tpl} +37 -20
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query5.tpl → templates/query_variants/query5a.tpl} +15 -10
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query67.tpl → templates/query_variants/query67a.tpl} +46 -18
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query70.tpl → templates/query_variants/query70a.tpl} +31 -27
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query77.tpl → templates/query_variants/query77a.tpl} +22 -15
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query80.tpl → templates/query_variants/query80a.tpl} +22 -8
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query86.tpl → templates/query_variants/query86a.tpl} +22 -13
- benchbox/_binaries/tpc-h/templates/dists.dss +836 -0
- benchbox/_binaries/tpc-h/templates/queries/1.sql +28 -0
- benchbox/_binaries/tpc-h/templates/queries/10.sql +38 -0
- benchbox/_binaries/tpc-h/templates/queries/11.sql +34 -0
- benchbox/_binaries/tpc-h/templates/queries/12.sql +35 -0
- benchbox/_binaries/tpc-h/templates/queries/13.sql +27 -0
- benchbox/_binaries/tpc-h/templates/queries/14.sql +20 -0
- benchbox/_binaries/tpc-h/templates/queries/15.sql +40 -0
- benchbox/_binaries/tpc-h/templates/queries/16.sql +37 -0
- benchbox/_binaries/tpc-h/templates/queries/17.sql +24 -0
- benchbox/_binaries/tpc-h/templates/queries/18.sql +39 -0
- benchbox/_binaries/tpc-h/templates/queries/19.sql +42 -0
- benchbox/_binaries/tpc-h/templates/queries/2.sql +50 -0
- benchbox/_binaries/tpc-h/templates/queries/20.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/21.sql +46 -0
- benchbox/_binaries/tpc-h/templates/queries/22.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/3.sql +29 -0
- benchbox/_binaries/tpc-h/templates/queries/4.sql +28 -0
- benchbox/_binaries/tpc-h/templates/queries/5.sql +31 -0
- benchbox/_binaries/tpc-h/templates/queries/6.sql +16 -0
- benchbox/_binaries/tpc-h/templates/queries/7.sql +46 -0
- benchbox/_binaries/tpc-h/templates/queries/8.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/9.sql +39 -0
- benchbox/_binaries/tpc-h/templates/variants/12a.sql +27 -0
- benchbox/_binaries/tpc-h/templates/variants/13a.sql +30 -0
- benchbox/_binaries/tpc-h/templates/variants/14a.sql +18 -0
- benchbox/_binaries/tpc-h/templates/variants/15a.sql +39 -0
- benchbox/_binaries/tpc-h/templates/variants/8a.sql +77 -0
- benchbox/base.py +88 -121
- benchbox/cli/benchmarks.py +3 -3
- benchbox/cli/commands/calculate_qphh.py +55 -14
- benchbox/cli/commands/checks.py +1 -4
- benchbox/cli/commands/convert.py +8 -3
- benchbox/cli/commands/metrics.py +55 -14
- benchbox/cli/commands/results.py +131 -3
- benchbox/cli/commands/run.py +157 -22
- benchbox/cli/commands/visualize.py +3 -3
- benchbox/cli/composite_params.py +1 -1
- benchbox/cli/config.py +13 -3
- benchbox/cli/database.py +3 -3
- benchbox/cli/dryrun.py +30 -4
- benchbox/cli/exceptions.py +2 -1
- benchbox/cli/execution_pipeline.py +2 -1
- benchbox/cli/orchestrator.py +25 -71
- benchbox/cli/tuning.py +1 -1
- benchbox/core/ai_primitives/benchmark.py +53 -0
- benchbox/core/ai_primitives/dataframe_operations.py +1217 -0
- benchbox/core/base_benchmark.py +90 -68
- benchbox/core/coffeeshop/queries.py +1 -1
- benchbox/core/coffeeshop/schema.py +1 -1
- benchbox/core/comparison/plotter.py +5 -4
- benchbox/core/dataframe/__init__.py +26 -0
- benchbox/core/dataframe/benchmark_suite.py +5 -4
- benchbox/core/dataframe/context.py +45 -0
- benchbox/core/dataframe/data_loader.py +180 -79
- benchbox/core/dataframe/maintenance_interface.py +866 -0
- benchbox/core/dryrun.py +152 -22
- benchbox/core/expected_results/registry.py +22 -5
- benchbox/core/manifest/io.py +4 -3
- benchbox/core/metadata_primitives/__init__.py +31 -0
- benchbox/core/metadata_primitives/benchmark.py +337 -0
- benchbox/core/metadata_primitives/dataframe_operations.py +1824 -0
- benchbox/core/platform_registry.py +134 -45
- benchbox/core/read_primitives/benchmark.py +56 -4
- benchbox/core/read_primitives/dataframe_queries.py +6547 -0
- benchbox/core/results/__init__.py +47 -6
- benchbox/core/results/builder.py +909 -0
- benchbox/core/results/database.py +5 -5
- benchbox/core/results/exporter.py +58 -96
- benchbox/core/results/filenames.py +102 -0
- benchbox/core/results/loader.py +10 -9
- benchbox/core/results/metrics.py +211 -0
- benchbox/core/results/models.py +3 -1
- benchbox/core/results/normalizer.py +346 -0
- benchbox/core/results/platform_info.py +235 -0
- benchbox/core/results/query_normalizer.py +200 -0
- benchbox/core/results/schema.py +368 -69
- benchbox/core/runner/conversion.py +2 -0
- benchbox/core/runner/dataframe_runner.py +135 -131
- benchbox/core/runner/runner.py +111 -18
- benchbox/core/schemas.py +145 -3
- benchbox/core/ssb/generator.py +14 -2
- benchbox/core/tpc_compliance.py +4 -4
- benchbox/core/tpc_metrics.py +9 -4
- benchbox/core/tpcdi/generator/manifest.py +15 -2
- benchbox/core/tpcds/benchmark/runner.py +3 -7
- benchbox/core/tpcds/c_tools.py +34 -28
- benchbox/core/tpcds/dataframe_queries/queries.py +44 -21
- benchbox/core/tpcds/generator/filesystem.py +23 -11
- benchbox/core/tpcds/generator/manager.py +3 -2
- benchbox/core/tpcds/maintenance_test.py +281 -0
- benchbox/core/tpcds/power_test.py +21 -11
- benchbox/core/tpcds/throughput_test.py +27 -9
- benchbox/core/tpcds_obt/etl/transformer.py +24 -5
- benchbox/core/tpch/dataframe_queries.py +46 -43
- benchbox/core/tpch/generator.py +21 -8
- benchbox/core/tpch/maintenance_test.py +87 -0
- benchbox/core/tpch/power_test.py +21 -5
- benchbox/core/tpch/queries.py +2 -7
- benchbox/core/tpch/streams.py +3 -19
- benchbox/core/transaction_primitives/benchmark.py +99 -0
- benchbox/core/transaction_primitives/dataframe_operations.py +1294 -0
- benchbox/core/transaction_primitives/generator.py +11 -4
- benchbox/core/visualization/__init__.py +2 -2
- benchbox/core/visualization/charts.py +4 -4
- benchbox/core/visualization/dependencies.py +1 -12
- benchbox/core/visualization/exporters.py +15 -26
- benchbox/core/visualization/result_plotter.py +90 -49
- benchbox/core/visualization/templates.py +6 -6
- benchbox/core/write_primitives/__init__.py +13 -0
- benchbox/core/write_primitives/benchmark.py +66 -0
- benchbox/core/write_primitives/dataframe_operations.py +912 -0
- benchbox/core/write_primitives/generator.py +11 -4
- benchbox/mcp/__init__.py +5 -1
- benchbox/mcp/errors.py +29 -0
- benchbox/mcp/resources/registry.py +12 -7
- benchbox/mcp/schemas.py +62 -0
- benchbox/mcp/server.py +17 -14
- benchbox/mcp/tools/__init__.py +3 -0
- benchbox/mcp/tools/analytics.py +550 -582
- benchbox/mcp/tools/benchmark.py +603 -611
- benchbox/mcp/tools/discovery.py +156 -205
- benchbox/mcp/tools/results.py +332 -533
- benchbox/mcp/tools/visualization.py +449 -0
- benchbox/platforms/__init__.py +740 -622
- benchbox/platforms/adapter_factory.py +6 -6
- benchbox/platforms/azure_synapse.py +3 -7
- benchbox/platforms/base/adapter.py +189 -49
- benchbox/platforms/base/cloud_spark/config.py +8 -0
- benchbox/platforms/base/cloud_spark/mixins.py +96 -0
- benchbox/platforms/base/cloud_spark/session.py +4 -2
- benchbox/platforms/base/cloud_spark/staging.py +15 -7
- benchbox/platforms/base/data_loading.py +315 -1
- benchbox/platforms/base/format_capabilities.py +37 -2
- benchbox/platforms/base/utils.py +6 -4
- benchbox/platforms/bigquery.py +5 -6
- benchbox/platforms/clickhouse_cloud.py +263 -0
- benchbox/platforms/databricks/adapter.py +16 -15
- benchbox/platforms/databricks/dataframe_adapter.py +4 -1
- benchbox/platforms/dataframe/__init__.py +31 -0
- benchbox/platforms/dataframe/benchmark_mixin.py +779 -0
- benchbox/platforms/dataframe/cudf_df.py +3 -3
- benchbox/platforms/dataframe/dask_df.py +3 -3
- benchbox/platforms/dataframe/datafusion_df.py +152 -15
- benchbox/platforms/dataframe/delta_lake_maintenance.py +341 -0
- benchbox/platforms/dataframe/ducklake_maintenance.py +402 -0
- benchbox/platforms/dataframe/expression_family.py +47 -8
- benchbox/platforms/dataframe/hudi_maintenance.py +437 -0
- benchbox/platforms/dataframe/iceberg_maintenance.py +605 -0
- benchbox/platforms/dataframe/modin_df.py +3 -3
- benchbox/platforms/dataframe/pandas_df.py +3 -3
- benchbox/platforms/dataframe/pandas_family.py +59 -8
- benchbox/platforms/dataframe/platform_checker.py +16 -49
- benchbox/platforms/dataframe/polars_df.py +14 -12
- benchbox/platforms/dataframe/polars_maintenance.py +630 -0
- benchbox/platforms/dataframe/pyspark_df.py +15 -0
- benchbox/platforms/dataframe/pyspark_maintenance.py +613 -0
- benchbox/platforms/datafusion.py +5 -6
- benchbox/platforms/duckdb.py +2 -1
- benchbox/platforms/fabric_warehouse.py +15 -15
- benchbox/platforms/firebolt.py +3 -2
- benchbox/platforms/influxdb/adapter.py +7 -3
- benchbox/platforms/motherduck.py +3 -2
- benchbox/platforms/onehouse/__init__.py +39 -0
- benchbox/platforms/onehouse/onehouse_client.py +509 -0
- benchbox/platforms/onehouse/quanton_adapter.py +646 -0
- benchbox/platforms/postgresql.py +5 -9
- benchbox/platforms/presto.py +2 -2
- benchbox/platforms/pyspark/session.py +3 -3
- benchbox/platforms/pyspark/sql_adapter.py +2 -3
- benchbox/platforms/redshift.py +7 -7
- benchbox/platforms/snowflake.py +4 -4
- benchbox/platforms/snowpark_connect.py +2 -1
- benchbox/platforms/trino.py +2 -2
- benchbox/release/__init__.py +17 -0
- benchbox/release/content_validation.py +745 -0
- benchbox/release/workflow.py +17 -0
- benchbox/utils/VERSION_MANAGEMENT.md +1 -1
- benchbox/utils/cloud_storage.py +7 -5
- benchbox/utils/compression.py +8 -8
- benchbox/utils/compression_mixin.py +2 -1
- benchbox/utils/data_validation.py +23 -14
- benchbox/utils/dependencies.py +47 -7
- benchbox/utils/file_format.py +407 -0
- benchbox/utils/format_converters/__init__.py +5 -1
- benchbox/utils/format_converters/ducklake_converter.py +227 -0
- benchbox/utils/format_converters/vortex_converter.py +168 -0
- benchbox/utils/tpc_compilation.py +43 -0
- benchbox/utils/version.py +14 -2
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/METADATA +15 -15
- benchbox-0.1.1.dist-info/RECORD +839 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/WHEEL +1 -1
- benchbox/_binaries/tpc-ds/darwin-arm64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/templates.lst +0 -99
- benchbox-0.1.0.dist-info/RECORD +0 -1192
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/README +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/ansi.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/db2.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/netezza.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/oracle.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query1.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query10.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query11.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query12.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query13.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query14.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query15.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query16.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query17.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query18.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query19.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query2.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query20.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query21.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query22.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query23.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query24.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query25.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query26.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query27.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query28.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query29.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query3.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query30.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query31.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query32.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query33.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query34.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query35.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query36.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query37.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query38.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query39.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query4.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query40.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query41.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query42.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query43.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query44.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query45.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query46.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query47.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query48.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query49.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query5.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query50.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query51.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query52.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query53.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query54.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query55.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query56.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query57.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query58.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query59.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query6.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query60.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query61.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query62.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query63.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query64.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query65.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query66.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query67.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query68.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query69.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query7.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query70.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query71.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query73.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query74.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query75.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query76.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query77.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query78.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query79.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query8.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query80.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query81.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query82.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query83.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query84.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query85.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query86.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query87.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query88.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query89.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query9.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query90.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query91.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query92.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query93.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query94.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query95.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query96.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query97.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query98.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query99.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/templates.lst +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/entry_points.txt +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
"""File format and compression detection utilities for BenchBox.
|
|
2
|
+
|
|
3
|
+
This module provides centralized utilities for detecting file formats and
|
|
4
|
+
compression types from file paths. It eliminates duplication across the
|
|
5
|
+
codebase where compression extension sets were hardcoded in multiple locations.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from benchbox.utils.file_format import (
|
|
9
|
+
detect_data_format,
|
|
10
|
+
detect_compression,
|
|
11
|
+
strip_compression_suffix,
|
|
12
|
+
is_compression_extension,
|
|
13
|
+
is_tpc_format,
|
|
14
|
+
is_parquet_format,
|
|
15
|
+
is_csv_format,
|
|
16
|
+
get_delimiter_for_file,
|
|
17
|
+
COMPRESSION_EXTENSIONS,
|
|
18
|
+
DATA_FORMAT_EXTENSIONS,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# Detect format from compressed file
|
|
22
|
+
detect_data_format(Path("data.tbl.zst")) # Returns "tbl"
|
|
23
|
+
|
|
24
|
+
# Detect compression type
|
|
25
|
+
detect_compression(Path("data.csv.gz")) # Returns "gzip"
|
|
26
|
+
|
|
27
|
+
# Strip compression suffix
|
|
28
|
+
strip_compression_suffix(Path("data.tbl.zst")) # Returns Path("data.tbl")
|
|
29
|
+
|
|
30
|
+
Copyright 2026 Joe Harris / BenchBox Project
|
|
31
|
+
|
|
32
|
+
Licensed under the MIT License. See LICENSE file in the project root for details.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
from __future__ import annotations
|
|
36
|
+
|
|
37
|
+
import logging
|
|
38
|
+
from pathlib import Path
|
|
39
|
+
from typing import Union
|
|
40
|
+
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
# Recognized compression extensions
|
|
44
|
+
# This is the canonical set - all modules should import from here
|
|
45
|
+
COMPRESSION_EXTENSIONS: frozenset[str] = frozenset(
|
|
46
|
+
{
|
|
47
|
+
".zst", # Zstandard
|
|
48
|
+
".gz", # Gzip
|
|
49
|
+
".bz2", # Bzip2
|
|
50
|
+
".xz", # XZ/LZMA
|
|
51
|
+
".lz4", # LZ4
|
|
52
|
+
".snappy", # Snappy
|
|
53
|
+
}
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Recognized data format extensions
|
|
57
|
+
DATA_FORMAT_EXTENSIONS: frozenset[str] = frozenset(
|
|
58
|
+
{
|
|
59
|
+
".parquet", # Apache Parquet
|
|
60
|
+
".vortex", # Vortex columnar format
|
|
61
|
+
".tbl", # TPC pipe-delimited
|
|
62
|
+
".csv", # Comma-separated values
|
|
63
|
+
".dat", # TPC-DS pipe-delimited (same format as .tbl)
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Mapping from compression extension to canonical name
|
|
68
|
+
_COMPRESSION_NAMES: dict[str, str] = {
|
|
69
|
+
".zst": "zstd",
|
|
70
|
+
".gz": "gzip",
|
|
71
|
+
".bz2": "bzip2",
|
|
72
|
+
".xz": "xz",
|
|
73
|
+
".lz4": "lz4",
|
|
74
|
+
".snappy": "snappy",
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# Mapping from data format extension to format name
|
|
78
|
+
_FORMAT_NAMES: dict[str, str] = {
|
|
79
|
+
".parquet": "parquet",
|
|
80
|
+
".vortex": "vortex",
|
|
81
|
+
".tbl": "tbl",
|
|
82
|
+
".csv": "csv",
|
|
83
|
+
".dat": "tbl", # .dat files use same pipe-delimited format as .tbl
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def is_compression_extension(suffix: str) -> bool:
|
|
88
|
+
"""Check if a suffix is a known compression extension.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
suffix: File suffix to check (with or without leading dot)
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
True if the suffix is a recognized compression extension
|
|
95
|
+
|
|
96
|
+
Examples:
|
|
97
|
+
>>> is_compression_extension(".zst")
|
|
98
|
+
True
|
|
99
|
+
>>> is_compression_extension("gz")
|
|
100
|
+
True
|
|
101
|
+
>>> is_compression_extension(".parquet")
|
|
102
|
+
False
|
|
103
|
+
"""
|
|
104
|
+
# Normalize: ensure leading dot and lowercase
|
|
105
|
+
if not suffix.startswith("."):
|
|
106
|
+
suffix = f".{suffix}"
|
|
107
|
+
return suffix.lower() in COMPRESSION_EXTENSIONS
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def is_data_format_extension(suffix: str) -> bool:
|
|
111
|
+
"""Check if a suffix is a known data format extension.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
suffix: File suffix to check (with or without leading dot)
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
True if the suffix is a recognized data format extension
|
|
118
|
+
|
|
119
|
+
Examples:
|
|
120
|
+
>>> is_data_format_extension(".parquet")
|
|
121
|
+
True
|
|
122
|
+
>>> is_data_format_extension("tbl")
|
|
123
|
+
True
|
|
124
|
+
>>> is_data_format_extension(".zst")
|
|
125
|
+
False
|
|
126
|
+
"""
|
|
127
|
+
# Normalize: ensure leading dot and lowercase
|
|
128
|
+
if not suffix.startswith("."):
|
|
129
|
+
suffix = f".{suffix}"
|
|
130
|
+
return suffix.lower() in DATA_FORMAT_EXTENSIONS
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def detect_compression(path: Union[str, Path]) -> str | None:
|
|
134
|
+
"""Detect compression type from file extension.
|
|
135
|
+
|
|
136
|
+
Examines the file's suffix to determine if it's compressed and
|
|
137
|
+
returns the canonical compression name.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
path: File path to analyze
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Compression type name ('zstd', 'gzip', 'bzip2', 'xz', 'lz4', 'snappy')
|
|
144
|
+
or None if not compressed
|
|
145
|
+
|
|
146
|
+
Examples:
|
|
147
|
+
>>> detect_compression(Path("data.tbl.zst"))
|
|
148
|
+
'zstd'
|
|
149
|
+
>>> detect_compression(Path("data.csv.gz"))
|
|
150
|
+
'gzip'
|
|
151
|
+
>>> detect_compression(Path("data.parquet"))
|
|
152
|
+
None
|
|
153
|
+
>>> detect_compression("archive.tar.bz2")
|
|
154
|
+
'bzip2'
|
|
155
|
+
"""
|
|
156
|
+
path = Path(path)
|
|
157
|
+
suffix = path.suffix.lower()
|
|
158
|
+
return _COMPRESSION_NAMES.get(suffix)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def detect_data_format(path: Union[str, Path]) -> str:
|
|
162
|
+
"""Detect data format from path, handling compressed files.
|
|
163
|
+
|
|
164
|
+
Examines file suffixes to determine the underlying data format,
|
|
165
|
+
correctly handling compressed files by looking past compression
|
|
166
|
+
extensions.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
path: File path to analyze
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Format name: 'parquet', 'tbl', 'csv', or 'csv' as fallback
|
|
173
|
+
|
|
174
|
+
Examples:
|
|
175
|
+
>>> detect_data_format(Path("data.tbl.zst"))
|
|
176
|
+
'tbl'
|
|
177
|
+
>>> detect_data_format(Path("data.parquet"))
|
|
178
|
+
'parquet'
|
|
179
|
+
>>> detect_data_format(Path("data.csv.gz"))
|
|
180
|
+
'csv'
|
|
181
|
+
>>> detect_data_format(Path("customer.dat"))
|
|
182
|
+
'tbl'
|
|
183
|
+
>>> detect_data_format(Path("unknown.txt"))
|
|
184
|
+
'csv'
|
|
185
|
+
"""
|
|
186
|
+
path = Path(path)
|
|
187
|
+
suffixes = [s.lower() for s in path.suffixes]
|
|
188
|
+
|
|
189
|
+
# Check each suffix, skipping compression extensions
|
|
190
|
+
for suffix in suffixes:
|
|
191
|
+
if suffix in COMPRESSION_EXTENSIONS:
|
|
192
|
+
continue
|
|
193
|
+
if suffix in _FORMAT_NAMES:
|
|
194
|
+
return _FORMAT_NAMES[suffix]
|
|
195
|
+
|
|
196
|
+
# Fallback to csv for unknown formats
|
|
197
|
+
# Log at debug level to help diagnose unexpected file types
|
|
198
|
+
if suffixes:
|
|
199
|
+
# Only log if there were non-compression suffixes we couldn't recognize
|
|
200
|
+
non_compression_suffixes = [s for s in suffixes if s not in COMPRESSION_EXTENSIONS]
|
|
201
|
+
if non_compression_suffixes:
|
|
202
|
+
logger.debug(f"Unknown format extension(s) {non_compression_suffixes} in '{path}', defaulting to csv")
|
|
203
|
+
return "csv"
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def strip_compression_suffix(path: Union[str, Path]) -> Path:
|
|
207
|
+
"""Remove compression suffix from path.
|
|
208
|
+
|
|
209
|
+
If the path ends with a recognized compression extension, returns
|
|
210
|
+
a new path with that extension removed. Otherwise returns the
|
|
211
|
+
original path unchanged.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
path: File path that may have compression suffix
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
Path with compression suffix removed (if present)
|
|
218
|
+
|
|
219
|
+
Examples:
|
|
220
|
+
>>> strip_compression_suffix(Path("data.tbl.zst"))
|
|
221
|
+
PosixPath('data.tbl')
|
|
222
|
+
>>> strip_compression_suffix(Path("data.csv.gz"))
|
|
223
|
+
PosixPath('data.csv')
|
|
224
|
+
>>> strip_compression_suffix(Path("data.parquet"))
|
|
225
|
+
PosixPath('data.parquet')
|
|
226
|
+
>>> strip_compression_suffix("archive.tar.bz2")
|
|
227
|
+
PosixPath('archive.tar')
|
|
228
|
+
"""
|
|
229
|
+
path = Path(path)
|
|
230
|
+
suffix = path.suffix.lower()
|
|
231
|
+
|
|
232
|
+
if suffix in COMPRESSION_EXTENSIONS:
|
|
233
|
+
return path.with_suffix("")
|
|
234
|
+
|
|
235
|
+
return path
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def get_base_name_without_compression(path: Union[str, Path]) -> str:
|
|
239
|
+
"""Get the base filename without compression extension.
|
|
240
|
+
|
|
241
|
+
Similar to strip_compression_suffix but returns just the filename
|
|
242
|
+
string, not a Path object.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
path: File path that may have compression suffix
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Filename without compression suffix
|
|
249
|
+
|
|
250
|
+
Examples:
|
|
251
|
+
>>> get_base_name_without_compression(Path("/data/file.tbl.zst"))
|
|
252
|
+
'file.tbl'
|
|
253
|
+
>>> get_base_name_without_compression("file.csv.gz")
|
|
254
|
+
'file.csv'
|
|
255
|
+
"""
|
|
256
|
+
path = Path(path)
|
|
257
|
+
stripped = strip_compression_suffix(path)
|
|
258
|
+
return stripped.name
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def normalize_format_extension(suffix: str) -> str:
|
|
262
|
+
"""Normalize a format extension to its canonical form.
|
|
263
|
+
|
|
264
|
+
Handles cases where different extensions represent the same format
|
|
265
|
+
(e.g., .dat and .tbl are both pipe-delimited TPC formats).
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
suffix: File suffix (with or without leading dot)
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
Canonical format name
|
|
272
|
+
|
|
273
|
+
Examples:
|
|
274
|
+
>>> normalize_format_extension(".dat")
|
|
275
|
+
'tbl'
|
|
276
|
+
>>> normalize_format_extension("parquet")
|
|
277
|
+
'parquet'
|
|
278
|
+
>>> normalize_format_extension(".unknown")
|
|
279
|
+
'csv'
|
|
280
|
+
"""
|
|
281
|
+
if not suffix.startswith("."):
|
|
282
|
+
suffix = f".{suffix}"
|
|
283
|
+
suffix = suffix.lower()
|
|
284
|
+
return _FORMAT_NAMES.get(suffix, "csv")
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
# TPC benchmark file extensions (pipe-delimited format)
|
|
288
|
+
TPC_FORMAT_EXTENSIONS: frozenset[str] = frozenset({".tbl", ".dat"})
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def is_tpc_format(path: Union[str, Path]) -> bool:
|
|
292
|
+
"""Check if a file is in TPC benchmark format (.tbl or .dat).
|
|
293
|
+
|
|
294
|
+
TPC-H uses .tbl files and TPC-DS uses .dat files. Both are pipe-delimited
|
|
295
|
+
with a trailing delimiter on each line.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
path: File path or string to check
|
|
299
|
+
|
|
300
|
+
Returns:
|
|
301
|
+
True if the file has a TPC format extension
|
|
302
|
+
|
|
303
|
+
Examples:
|
|
304
|
+
>>> is_tpc_format("lineitem.tbl")
|
|
305
|
+
True
|
|
306
|
+
>>> is_tpc_format("store_sales.dat")
|
|
307
|
+
True
|
|
308
|
+
>>> is_tpc_format("data.csv")
|
|
309
|
+
False
|
|
310
|
+
>>> is_tpc_format(Path("lineitem.tbl.zst"))
|
|
311
|
+
True
|
|
312
|
+
"""
|
|
313
|
+
path = Path(path)
|
|
314
|
+
# Check suffixes in order, skipping compression extensions
|
|
315
|
+
for suffix in path.suffixes:
|
|
316
|
+
suffix_lower = suffix.lower()
|
|
317
|
+
if suffix_lower in COMPRESSION_EXTENSIONS:
|
|
318
|
+
continue
|
|
319
|
+
return suffix_lower in TPC_FORMAT_EXTENSIONS
|
|
320
|
+
return False
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def get_delimiter_for_file(path: Union[str, Path]) -> str:
|
|
324
|
+
"""Get the appropriate CSV delimiter for a file based on its format.
|
|
325
|
+
|
|
326
|
+
TPC benchmark files (.tbl, .dat) use pipe (|) delimiter.
|
|
327
|
+
All other formats default to comma (,).
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
path: File path to analyze
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
Delimiter character: "|" for TPC formats, "," otherwise
|
|
334
|
+
|
|
335
|
+
Examples:
|
|
336
|
+
>>> get_delimiter_for_file("lineitem.tbl")
|
|
337
|
+
'|'
|
|
338
|
+
>>> get_delimiter_for_file("store_sales.dat.zst")
|
|
339
|
+
'|'
|
|
340
|
+
>>> get_delimiter_for_file("data.csv")
|
|
341
|
+
','
|
|
342
|
+
>>> get_delimiter_for_file("unknown.txt")
|
|
343
|
+
','
|
|
344
|
+
"""
|
|
345
|
+
return "|" if is_tpc_format(path) else ","
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def is_parquet_format(path: Union[str, Path]) -> bool:
|
|
349
|
+
"""Check if a file is in Parquet format.
|
|
350
|
+
|
|
351
|
+
Handles compressed files by looking past compression extensions.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
path: File path or string to check
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
True if the file has a .parquet extension
|
|
358
|
+
|
|
359
|
+
Examples:
|
|
360
|
+
>>> is_parquet_format("data.parquet")
|
|
361
|
+
True
|
|
362
|
+
>>> is_parquet_format(Path("data.parquet.zst"))
|
|
363
|
+
True
|
|
364
|
+
>>> is_parquet_format("data.csv")
|
|
365
|
+
False
|
|
366
|
+
>>> is_parquet_format("data.tbl")
|
|
367
|
+
False
|
|
368
|
+
"""
|
|
369
|
+
path = Path(path)
|
|
370
|
+
# Check suffixes in order, skipping compression extensions
|
|
371
|
+
for suffix in path.suffixes:
|
|
372
|
+
suffix_lower = suffix.lower()
|
|
373
|
+
if suffix_lower in COMPRESSION_EXTENSIONS:
|
|
374
|
+
continue
|
|
375
|
+
return suffix_lower == ".parquet"
|
|
376
|
+
return False
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def is_csv_format(path: Union[str, Path]) -> bool:
|
|
380
|
+
"""Check if a file is in CSV format.
|
|
381
|
+
|
|
382
|
+
Handles compressed files by looking past compression extensions.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
path: File path or string to check
|
|
386
|
+
|
|
387
|
+
Returns:
|
|
388
|
+
True if the file has a .csv extension
|
|
389
|
+
|
|
390
|
+
Examples:
|
|
391
|
+
>>> is_csv_format("data.csv")
|
|
392
|
+
True
|
|
393
|
+
>>> is_csv_format(Path("data.csv.gz"))
|
|
394
|
+
True
|
|
395
|
+
>>> is_csv_format("data.parquet")
|
|
396
|
+
False
|
|
397
|
+
>>> is_csv_format("data.tbl")
|
|
398
|
+
False
|
|
399
|
+
"""
|
|
400
|
+
path = Path(path)
|
|
401
|
+
# Check suffixes in order, skipping compression extensions
|
|
402
|
+
for suffix in path.suffixes:
|
|
403
|
+
suffix_lower = suffix.lower()
|
|
404
|
+
if suffix_lower in COMPRESSION_EXTENSIONS:
|
|
405
|
+
continue
|
|
406
|
+
return suffix_lower == ".csv"
|
|
407
|
+
return False
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Format conversion utilities for BenchBox.
|
|
2
2
|
|
|
3
3
|
This module provides converters for transforming benchmark data between different
|
|
4
|
-
table formats (TBL → Parquet → Delta Lake/Iceberg).
|
|
4
|
+
table formats (TBL → Parquet → Delta Lake/Iceberg/Vortex).
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from benchbox.utils.format_converters.base import (
|
|
@@ -11,8 +11,10 @@ from benchbox.utils.format_converters.base import (
|
|
|
11
11
|
FormatConverter,
|
|
12
12
|
)
|
|
13
13
|
from benchbox.utils.format_converters.delta_converter import DeltaConverter
|
|
14
|
+
from benchbox.utils.format_converters.ducklake_converter import DuckLakeConverter
|
|
14
15
|
from benchbox.utils.format_converters.iceberg_converter import IcebergConverter
|
|
15
16
|
from benchbox.utils.format_converters.parquet_converter import ParquetConverter
|
|
17
|
+
from benchbox.utils.format_converters.vortex_converter import VortexConverter
|
|
16
18
|
|
|
17
19
|
__all__ = [
|
|
18
20
|
"ArrowTypeMapper",
|
|
@@ -21,5 +23,7 @@ __all__ = [
|
|
|
21
23
|
"ConversionResult",
|
|
22
24
|
"ParquetConverter",
|
|
23
25
|
"DeltaConverter",
|
|
26
|
+
"DuckLakeConverter",
|
|
24
27
|
"IcebergConverter",
|
|
28
|
+
"VortexConverter",
|
|
25
29
|
]
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""DuckLake format converter for BenchBox.
|
|
2
|
+
|
|
3
|
+
Converts TPC benchmark data from TBL (pipe-delimited) format to DuckLake format.
|
|
4
|
+
|
|
5
|
+
DuckLake is DuckDB's native open table format that provides:
|
|
6
|
+
- ACID transactions
|
|
7
|
+
- Time travel and snapshot isolation
|
|
8
|
+
- Schema evolution
|
|
9
|
+
- Native DuckDB integration with optimal performance
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import shutil
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any, Callable
|
|
17
|
+
|
|
18
|
+
from benchbox.utils.format_converters.base import (
|
|
19
|
+
BaseFormatConverter,
|
|
20
|
+
ConversionError,
|
|
21
|
+
ConversionOptions,
|
|
22
|
+
ConversionResult,
|
|
23
|
+
SchemaError,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DuckLakeConverter(BaseFormatConverter):
|
|
28
|
+
"""Converter for TBL → DuckLake format.
|
|
29
|
+
|
|
30
|
+
DuckLake is DuckDB's native open table format that provides ACID transactions
|
|
31
|
+
on top of Parquet files with metadata stored in a DuckDB database. This converter:
|
|
32
|
+
1. Reads TBL files using PyArrow CSV reader
|
|
33
|
+
2. Converts to Arrow tables
|
|
34
|
+
3. Writes to DuckLake using DuckDB with the ducklake extension
|
|
35
|
+
|
|
36
|
+
DuckLake structure:
|
|
37
|
+
- Metadata catalog: A DuckDB database file storing table metadata
|
|
38
|
+
- Data directory: Parquet files containing the actual data
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def get_file_extension(self) -> str:
|
|
42
|
+
"""Get file extension for DuckLake format (directory-based)."""
|
|
43
|
+
return "" # DuckLake uses directories, not file extensions
|
|
44
|
+
|
|
45
|
+
def get_format_name(self) -> str:
|
|
46
|
+
"""Get human-readable format name."""
|
|
47
|
+
return "DuckLake"
|
|
48
|
+
|
|
49
|
+
def convert(
|
|
50
|
+
self,
|
|
51
|
+
source_files: list[Path],
|
|
52
|
+
table_name: str,
|
|
53
|
+
schema: dict[str, Any],
|
|
54
|
+
options: ConversionOptions | None = None,
|
|
55
|
+
progress_callback: Callable[[str, float], None] | None = None,
|
|
56
|
+
) -> ConversionResult:
|
|
57
|
+
"""Convert TBL files to DuckLake format.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
source_files: List of source TBL file paths (may be sharded)
|
|
61
|
+
table_name: Name of the table being converted
|
|
62
|
+
schema: Table schema definition
|
|
63
|
+
options: Conversion options (uses defaults if None)
|
|
64
|
+
progress_callback: Optional callback for progress updates
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
ConversionResult with details about the conversion
|
|
68
|
+
|
|
69
|
+
Raises:
|
|
70
|
+
ConversionError: If conversion fails
|
|
71
|
+
SchemaError: If schema is invalid
|
|
72
|
+
"""
|
|
73
|
+
try:
|
|
74
|
+
import duckdb
|
|
75
|
+
except ImportError as e:
|
|
76
|
+
raise ConversionError("DuckLake support requires DuckDB. Install it with: uv add duckdb") from e
|
|
77
|
+
|
|
78
|
+
opts = options or ConversionOptions()
|
|
79
|
+
|
|
80
|
+
# Validate inputs
|
|
81
|
+
self.validate_source_files(source_files)
|
|
82
|
+
self.validate_schema(schema)
|
|
83
|
+
|
|
84
|
+
if progress_callback:
|
|
85
|
+
progress_callback(f"Starting DuckLake conversion for {table_name}", 0.0)
|
|
86
|
+
|
|
87
|
+
# Build PyArrow schema (validates schema can be converted)
|
|
88
|
+
try:
|
|
89
|
+
self._build_arrow_schema(schema)
|
|
90
|
+
except SchemaError:
|
|
91
|
+
raise
|
|
92
|
+
except Exception as e:
|
|
93
|
+
raise SchemaError(f"Failed to build Arrow schema: {e}") from e
|
|
94
|
+
|
|
95
|
+
# Read TBL files using shared method
|
|
96
|
+
combined_table = self.read_tbl_files(
|
|
97
|
+
source_files, schema, progress_callback, progress_start=0.0, progress_end=0.6
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Get column names for metadata
|
|
101
|
+
column_names = [col["name"] for col in schema["columns"]]
|
|
102
|
+
|
|
103
|
+
# Determine output path - DuckLake uses a directory
|
|
104
|
+
source_dir = source_files[0].parent
|
|
105
|
+
output_dir = opts.output_dir if opts.output_dir else source_dir
|
|
106
|
+
ducklake_table_path = output_dir / table_name
|
|
107
|
+
|
|
108
|
+
# Clean up existing table directory if it exists
|
|
109
|
+
if ducklake_table_path.exists():
|
|
110
|
+
shutil.rmtree(ducklake_table_path, ignore_errors=True)
|
|
111
|
+
|
|
112
|
+
ducklake_table_path.mkdir(parents=True, exist_ok=True)
|
|
113
|
+
|
|
114
|
+
conn = None
|
|
115
|
+
try:
|
|
116
|
+
if progress_callback:
|
|
117
|
+
progress_callback("Installing DuckLake extension", 0.65)
|
|
118
|
+
|
|
119
|
+
# Create an in-memory connection to work with the data
|
|
120
|
+
conn = duckdb.connect(":memory:")
|
|
121
|
+
|
|
122
|
+
# Install and load the ducklake extension
|
|
123
|
+
try:
|
|
124
|
+
conn.execute("INSTALL ducklake")
|
|
125
|
+
conn.execute("LOAD ducklake")
|
|
126
|
+
except Exception as e:
|
|
127
|
+
raise ConversionError(
|
|
128
|
+
f"Failed to load DuckLake extension. Ensure DuckDB >= 1.2.0 is installed. Error: {e}"
|
|
129
|
+
) from e
|
|
130
|
+
|
|
131
|
+
if progress_callback:
|
|
132
|
+
progress_callback("Creating DuckLake catalog", 0.7)
|
|
133
|
+
|
|
134
|
+
# DuckLake stores metadata in a metadata file and data in a data directory
|
|
135
|
+
metadata_path = ducklake_table_path / "metadata.ducklake"
|
|
136
|
+
data_path = ducklake_table_path / "data"
|
|
137
|
+
data_path.mkdir(parents=True, exist_ok=True)
|
|
138
|
+
|
|
139
|
+
# Attach DuckLake database with DATA_PATH option
|
|
140
|
+
conn.execute(f"ATTACH 'ducklake:{metadata_path}' AS ducklake_db (DATA_PATH '{data_path}')")
|
|
141
|
+
|
|
142
|
+
if progress_callback:
|
|
143
|
+
progress_callback("Writing DuckLake table", 0.8)
|
|
144
|
+
|
|
145
|
+
# Register the Arrow table as a view
|
|
146
|
+
conn.register("source_data", combined_table)
|
|
147
|
+
|
|
148
|
+
# Create the schema (namespace) if it doesn't exist
|
|
149
|
+
conn.execute("CREATE SCHEMA IF NOT EXISTS ducklake_db.main")
|
|
150
|
+
|
|
151
|
+
# Create table in DuckLake catalog
|
|
152
|
+
conn.execute(f"CREATE OR REPLACE TABLE ducklake_db.main.{table_name} AS SELECT * FROM source_data")
|
|
153
|
+
|
|
154
|
+
if progress_callback:
|
|
155
|
+
progress_callback("Finalizing DuckLake table", 0.9)
|
|
156
|
+
|
|
157
|
+
# Get row count for verification
|
|
158
|
+
row_count = combined_table.num_rows
|
|
159
|
+
|
|
160
|
+
# Close connection to ensure all data is flushed
|
|
161
|
+
conn.close()
|
|
162
|
+
conn = None
|
|
163
|
+
|
|
164
|
+
except ConversionError:
|
|
165
|
+
raise
|
|
166
|
+
except Exception as e:
|
|
167
|
+
# Clean up partial output on failure
|
|
168
|
+
if ducklake_table_path.exists():
|
|
169
|
+
shutil.rmtree(ducklake_table_path, ignore_errors=True)
|
|
170
|
+
raise ConversionError(f"Failed to write DuckLake table: {e}") from e
|
|
171
|
+
finally:
|
|
172
|
+
if conn is not None:
|
|
173
|
+
try:
|
|
174
|
+
conn.close()
|
|
175
|
+
except Exception:
|
|
176
|
+
pass
|
|
177
|
+
|
|
178
|
+
# Calculate metrics
|
|
179
|
+
source_size = self.calculate_file_size(source_files)
|
|
180
|
+
row_count = combined_table.num_rows
|
|
181
|
+
|
|
182
|
+
# Validate row count integrity (critical for TPC compliance)
|
|
183
|
+
# If validation fails, clean up output before raising
|
|
184
|
+
if opts.validate_row_count:
|
|
185
|
+
try:
|
|
186
|
+
self.validate_row_count(source_files, row_count, table_name)
|
|
187
|
+
except ConversionError:
|
|
188
|
+
# Clean up output directory since validation failed
|
|
189
|
+
if ducklake_table_path.exists():
|
|
190
|
+
shutil.rmtree(ducklake_table_path, ignore_errors=True)
|
|
191
|
+
raise
|
|
192
|
+
|
|
193
|
+
# Calculate DuckLake table size (data files + catalog)
|
|
194
|
+
output_size = sum(f.stat().st_size for f in ducklake_table_path.rglob("*") if f.is_file())
|
|
195
|
+
|
|
196
|
+
# Build metadata
|
|
197
|
+
metadata = {
|
|
198
|
+
"format": "ducklake",
|
|
199
|
+
"partition_cols": opts.partition_cols if opts.partition_cols else [],
|
|
200
|
+
"num_columns": len(column_names),
|
|
201
|
+
"table_path": str(ducklake_table_path),
|
|
202
|
+
"metadata_path": str(metadata_path),
|
|
203
|
+
"data_path": str(data_path),
|
|
204
|
+
"compression": opts.compression,
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
if progress_callback:
|
|
208
|
+
progress_callback(f"Conversion complete: {row_count:,} rows", 1.0)
|
|
209
|
+
|
|
210
|
+
# Detect source format from file extensions
|
|
211
|
+
source_format = self._detect_source_format(source_files)
|
|
212
|
+
|
|
213
|
+
return ConversionResult(
|
|
214
|
+
output_files=[ducklake_table_path],
|
|
215
|
+
row_count=row_count,
|
|
216
|
+
source_size_bytes=source_size,
|
|
217
|
+
output_size_bytes=output_size,
|
|
218
|
+
metadata=metadata,
|
|
219
|
+
source_format=source_format,
|
|
220
|
+
converted_at=self.get_current_timestamp(),
|
|
221
|
+
conversion_options={
|
|
222
|
+
"compression": opts.compression,
|
|
223
|
+
"merge_shards": opts.merge_shards,
|
|
224
|
+
"partition_cols": opts.partition_cols,
|
|
225
|
+
"validate_row_count": opts.validate_row_count,
|
|
226
|
+
},
|
|
227
|
+
)
|