benchbox 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchbox/__init__.py +1 -1
- benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query72.tpl +1 -1
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/ansi.tpl → templates/query_templates/sqlserver.tpl} +1 -1
- benchbox/_binaries/tpc-ds/templates/query_variants/README +6 -0
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query10.tpl → templates/query_variants/query10a.tpl} +13 -14
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/query14.tpl → templates/query_variants/query14a.tpl} +30 -26
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query18.tpl → templates/query_variants/query18a.tpl} +40 -19
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query22.tpl → templates/query_variants/query22a.tpl} +31 -9
- benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/query27.tpl → templates/query_variants/query27a.tpl} +23 -10
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query35.tpl → templates/query_variants/query35a.tpl} +9 -8
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query36.tpl → templates/query_variants/query36a.tpl} +24 -12
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query51.tpl → templates/query_variants/query51a.tpl} +37 -20
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query5.tpl → templates/query_variants/query5a.tpl} +15 -10
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query67.tpl → templates/query_variants/query67a.tpl} +46 -18
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query70.tpl → templates/query_variants/query70a.tpl} +31 -27
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query77.tpl → templates/query_variants/query77a.tpl} +22 -15
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query80.tpl → templates/query_variants/query80a.tpl} +22 -8
- benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query86.tpl → templates/query_variants/query86a.tpl} +22 -13
- benchbox/_binaries/tpc-h/templates/dists.dss +836 -0
- benchbox/_binaries/tpc-h/templates/queries/1.sql +28 -0
- benchbox/_binaries/tpc-h/templates/queries/10.sql +38 -0
- benchbox/_binaries/tpc-h/templates/queries/11.sql +34 -0
- benchbox/_binaries/tpc-h/templates/queries/12.sql +35 -0
- benchbox/_binaries/tpc-h/templates/queries/13.sql +27 -0
- benchbox/_binaries/tpc-h/templates/queries/14.sql +20 -0
- benchbox/_binaries/tpc-h/templates/queries/15.sql +40 -0
- benchbox/_binaries/tpc-h/templates/queries/16.sql +37 -0
- benchbox/_binaries/tpc-h/templates/queries/17.sql +24 -0
- benchbox/_binaries/tpc-h/templates/queries/18.sql +39 -0
- benchbox/_binaries/tpc-h/templates/queries/19.sql +42 -0
- benchbox/_binaries/tpc-h/templates/queries/2.sql +50 -0
- benchbox/_binaries/tpc-h/templates/queries/20.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/21.sql +46 -0
- benchbox/_binaries/tpc-h/templates/queries/22.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/3.sql +29 -0
- benchbox/_binaries/tpc-h/templates/queries/4.sql +28 -0
- benchbox/_binaries/tpc-h/templates/queries/5.sql +31 -0
- benchbox/_binaries/tpc-h/templates/queries/6.sql +16 -0
- benchbox/_binaries/tpc-h/templates/queries/7.sql +46 -0
- benchbox/_binaries/tpc-h/templates/queries/8.sql +44 -0
- benchbox/_binaries/tpc-h/templates/queries/9.sql +39 -0
- benchbox/_binaries/tpc-h/templates/variants/12a.sql +27 -0
- benchbox/_binaries/tpc-h/templates/variants/13a.sql +30 -0
- benchbox/_binaries/tpc-h/templates/variants/14a.sql +18 -0
- benchbox/_binaries/tpc-h/templates/variants/15a.sql +39 -0
- benchbox/_binaries/tpc-h/templates/variants/8a.sql +77 -0
- benchbox/base.py +88 -121
- benchbox/cli/benchmarks.py +3 -3
- benchbox/cli/commands/calculate_qphh.py +55 -14
- benchbox/cli/commands/checks.py +1 -4
- benchbox/cli/commands/convert.py +8 -3
- benchbox/cli/commands/metrics.py +55 -14
- benchbox/cli/commands/results.py +131 -3
- benchbox/cli/commands/run.py +157 -22
- benchbox/cli/commands/visualize.py +3 -3
- benchbox/cli/composite_params.py +1 -1
- benchbox/cli/config.py +13 -3
- benchbox/cli/database.py +3 -3
- benchbox/cli/dryrun.py +30 -4
- benchbox/cli/exceptions.py +2 -1
- benchbox/cli/execution_pipeline.py +2 -1
- benchbox/cli/orchestrator.py +25 -71
- benchbox/cli/tuning.py +1 -1
- benchbox/core/ai_primitives/benchmark.py +53 -0
- benchbox/core/ai_primitives/dataframe_operations.py +1217 -0
- benchbox/core/base_benchmark.py +90 -68
- benchbox/core/coffeeshop/queries.py +1 -1
- benchbox/core/coffeeshop/schema.py +1 -1
- benchbox/core/comparison/plotter.py +5 -4
- benchbox/core/dataframe/__init__.py +26 -0
- benchbox/core/dataframe/benchmark_suite.py +5 -4
- benchbox/core/dataframe/context.py +45 -0
- benchbox/core/dataframe/data_loader.py +180 -79
- benchbox/core/dataframe/maintenance_interface.py +866 -0
- benchbox/core/dryrun.py +152 -22
- benchbox/core/expected_results/registry.py +22 -5
- benchbox/core/manifest/io.py +4 -3
- benchbox/core/metadata_primitives/__init__.py +31 -0
- benchbox/core/metadata_primitives/benchmark.py +337 -0
- benchbox/core/metadata_primitives/dataframe_operations.py +1824 -0
- benchbox/core/platform_registry.py +134 -45
- benchbox/core/read_primitives/benchmark.py +56 -4
- benchbox/core/read_primitives/dataframe_queries.py +6547 -0
- benchbox/core/results/__init__.py +47 -6
- benchbox/core/results/builder.py +909 -0
- benchbox/core/results/database.py +5 -5
- benchbox/core/results/exporter.py +58 -96
- benchbox/core/results/filenames.py +102 -0
- benchbox/core/results/loader.py +10 -9
- benchbox/core/results/metrics.py +211 -0
- benchbox/core/results/models.py +3 -1
- benchbox/core/results/normalizer.py +346 -0
- benchbox/core/results/platform_info.py +235 -0
- benchbox/core/results/query_normalizer.py +200 -0
- benchbox/core/results/schema.py +368 -69
- benchbox/core/runner/conversion.py +2 -0
- benchbox/core/runner/dataframe_runner.py +135 -131
- benchbox/core/runner/runner.py +111 -18
- benchbox/core/schemas.py +145 -3
- benchbox/core/ssb/generator.py +14 -2
- benchbox/core/tpc_compliance.py +4 -4
- benchbox/core/tpc_metrics.py +9 -4
- benchbox/core/tpcdi/generator/manifest.py +15 -2
- benchbox/core/tpcds/benchmark/runner.py +3 -7
- benchbox/core/tpcds/c_tools.py +34 -28
- benchbox/core/tpcds/dataframe_queries/queries.py +44 -21
- benchbox/core/tpcds/generator/filesystem.py +23 -11
- benchbox/core/tpcds/generator/manager.py +3 -2
- benchbox/core/tpcds/maintenance_test.py +281 -0
- benchbox/core/tpcds/power_test.py +21 -11
- benchbox/core/tpcds/throughput_test.py +27 -9
- benchbox/core/tpcds_obt/etl/transformer.py +24 -5
- benchbox/core/tpch/dataframe_queries.py +46 -43
- benchbox/core/tpch/generator.py +21 -8
- benchbox/core/tpch/maintenance_test.py +87 -0
- benchbox/core/tpch/power_test.py +21 -5
- benchbox/core/tpch/queries.py +2 -7
- benchbox/core/tpch/streams.py +3 -19
- benchbox/core/transaction_primitives/benchmark.py +99 -0
- benchbox/core/transaction_primitives/dataframe_operations.py +1294 -0
- benchbox/core/transaction_primitives/generator.py +11 -4
- benchbox/core/visualization/__init__.py +2 -2
- benchbox/core/visualization/charts.py +4 -4
- benchbox/core/visualization/dependencies.py +1 -12
- benchbox/core/visualization/exporters.py +15 -26
- benchbox/core/visualization/result_plotter.py +90 -49
- benchbox/core/visualization/templates.py +6 -6
- benchbox/core/write_primitives/__init__.py +13 -0
- benchbox/core/write_primitives/benchmark.py +66 -0
- benchbox/core/write_primitives/dataframe_operations.py +912 -0
- benchbox/core/write_primitives/generator.py +11 -4
- benchbox/mcp/__init__.py +5 -1
- benchbox/mcp/errors.py +29 -0
- benchbox/mcp/resources/registry.py +12 -7
- benchbox/mcp/schemas.py +62 -0
- benchbox/mcp/server.py +17 -14
- benchbox/mcp/tools/__init__.py +3 -0
- benchbox/mcp/tools/analytics.py +550 -582
- benchbox/mcp/tools/benchmark.py +603 -611
- benchbox/mcp/tools/discovery.py +156 -205
- benchbox/mcp/tools/results.py +332 -533
- benchbox/mcp/tools/visualization.py +449 -0
- benchbox/platforms/__init__.py +740 -622
- benchbox/platforms/adapter_factory.py +6 -6
- benchbox/platforms/azure_synapse.py +3 -7
- benchbox/platforms/base/adapter.py +189 -49
- benchbox/platforms/base/cloud_spark/config.py +8 -0
- benchbox/platforms/base/cloud_spark/mixins.py +96 -0
- benchbox/platforms/base/cloud_spark/session.py +4 -2
- benchbox/platforms/base/cloud_spark/staging.py +15 -7
- benchbox/platforms/base/data_loading.py +315 -1
- benchbox/platforms/base/format_capabilities.py +37 -2
- benchbox/platforms/base/utils.py +6 -4
- benchbox/platforms/bigquery.py +5 -6
- benchbox/platforms/clickhouse_cloud.py +263 -0
- benchbox/platforms/databricks/adapter.py +16 -15
- benchbox/platforms/databricks/dataframe_adapter.py +4 -1
- benchbox/platforms/dataframe/__init__.py +31 -0
- benchbox/platforms/dataframe/benchmark_mixin.py +779 -0
- benchbox/platforms/dataframe/cudf_df.py +3 -3
- benchbox/platforms/dataframe/dask_df.py +3 -3
- benchbox/platforms/dataframe/datafusion_df.py +152 -15
- benchbox/platforms/dataframe/delta_lake_maintenance.py +341 -0
- benchbox/platforms/dataframe/ducklake_maintenance.py +402 -0
- benchbox/platforms/dataframe/expression_family.py +47 -8
- benchbox/platforms/dataframe/hudi_maintenance.py +437 -0
- benchbox/platforms/dataframe/iceberg_maintenance.py +605 -0
- benchbox/platforms/dataframe/modin_df.py +3 -3
- benchbox/platforms/dataframe/pandas_df.py +3 -3
- benchbox/platforms/dataframe/pandas_family.py +59 -8
- benchbox/platforms/dataframe/platform_checker.py +16 -49
- benchbox/platforms/dataframe/polars_df.py +14 -12
- benchbox/platforms/dataframe/polars_maintenance.py +630 -0
- benchbox/platforms/dataframe/pyspark_df.py +15 -0
- benchbox/platforms/dataframe/pyspark_maintenance.py +613 -0
- benchbox/platforms/datafusion.py +5 -6
- benchbox/platforms/duckdb.py +2 -1
- benchbox/platforms/fabric_warehouse.py +15 -15
- benchbox/platforms/firebolt.py +3 -2
- benchbox/platforms/influxdb/adapter.py +7 -3
- benchbox/platforms/motherduck.py +3 -2
- benchbox/platforms/onehouse/__init__.py +39 -0
- benchbox/platforms/onehouse/onehouse_client.py +509 -0
- benchbox/platforms/onehouse/quanton_adapter.py +646 -0
- benchbox/platforms/postgresql.py +5 -9
- benchbox/platforms/presto.py +2 -2
- benchbox/platforms/pyspark/session.py +3 -3
- benchbox/platforms/pyspark/sql_adapter.py +2 -3
- benchbox/platforms/redshift.py +7 -7
- benchbox/platforms/snowflake.py +4 -4
- benchbox/platforms/snowpark_connect.py +2 -1
- benchbox/platforms/trino.py +2 -2
- benchbox/release/__init__.py +17 -0
- benchbox/release/content_validation.py +745 -0
- benchbox/release/workflow.py +17 -0
- benchbox/utils/VERSION_MANAGEMENT.md +1 -1
- benchbox/utils/cloud_storage.py +7 -5
- benchbox/utils/compression.py +8 -8
- benchbox/utils/compression_mixin.py +2 -1
- benchbox/utils/data_validation.py +23 -14
- benchbox/utils/dependencies.py +47 -7
- benchbox/utils/file_format.py +407 -0
- benchbox/utils/format_converters/__init__.py +5 -1
- benchbox/utils/format_converters/ducklake_converter.py +227 -0
- benchbox/utils/format_converters/vortex_converter.py +168 -0
- benchbox/utils/tpc_compilation.py +43 -0
- benchbox/utils/version.py +14 -2
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/METADATA +15 -15
- benchbox-0.1.1.dist-info/RECORD +839 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/WHEEL +1 -1
- benchbox/_binaries/tpc-ds/darwin-arm64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/linux-arm64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/templates.lst +0 -99
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/README +0 -4
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/ansi.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/db2.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/netezza.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/oracle.tpl +0 -38
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query1.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query10.tpl +0 -98
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query11.tpl +0 -119
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query12.tpl +0 -72
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query13.tpl +0 -89
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query14.tpl +0 -247
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query15.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query16.tpl +0 -76
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query17.tpl +0 -80
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query18.tpl +0 -73
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query19.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query2.tpl +0 -94
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query20.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query21.tpl +0 -65
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query22.tpl +0 -54
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query23.tpl +0 -144
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query24.tpl +0 -147
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query25.tpl +0 -84
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query26.tpl +0 -61
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query27.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query28.tpl +0 -90
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query29.tpl +0 -85
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query3.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query30.tpl +0 -66
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query31.tpl +0 -88
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query32.tpl +0 -65
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query33.tpl +0 -113
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query34.tpl +0 -77
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query35.tpl +0 -98
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query36.tpl +0 -74
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query37.tpl +0 -57
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query38.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query39.tpl +0 -93
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query4.tpl +0 -154
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query40.tpl +0 -63
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query41.tpl +0 -90
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query42.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query43.tpl +0 -55
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query44.tpl +0 -72
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query45.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query46.tpl +0 -78
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query47.tpl +0 -89
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query48.tpl +0 -104
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query49.tpl +0 -164
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query5.tpl +0 -165
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query50.tpl +0 -96
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query51.tpl +0 -80
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query52.tpl +0 -59
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query53.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query54.tpl +0 -95
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query55.tpl +0 -52
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query56.tpl +0 -108
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query57.tpl +0 -87
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query58.tpl +0 -101
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query59.tpl +0 -79
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query6.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query60.tpl +0 -115
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query61.tpl +0 -83
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query62.tpl +0 -71
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query63.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query64.tpl +0 -157
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query65.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query66.tpl +0 -261
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query67.tpl +0 -81
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query68.tpl +0 -82
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query69.tpl +0 -85
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query7.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query70.tpl +0 -73
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query71.tpl +0 -74
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query72.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query73.tpl +0 -69
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query74.tpl +0 -99
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query75.tpl +0 -107
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query76.tpl +0 -64
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query77.tpl +0 -145
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query78.tpl +0 -94
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query79.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query8.tpl +0 -144
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query80.tpl +0 -131
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query81.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query82.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query83.tpl +0 -104
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query84.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query85.tpl +0 -121
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query86.tpl +0 -60
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query87.tpl +0 -56
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query88.tpl +0 -128
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query89.tpl +0 -75
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query9.tpl +0 -88
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query90.tpl +0 -58
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query91.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query92.tpl +0 -68
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query93.tpl +0 -53
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query94.tpl +0 -67
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query95.tpl +0 -71
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query96.tpl +0 -52
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query97.tpl +0 -62
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query98.tpl +0 -70
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query99.tpl +0 -69
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/sqlserver.tpl +0 -37
- benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/templates.lst +0 -99
- benchbox-0.1.0.dist-info/RECORD +0 -1192
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/README +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/ansi.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/db2.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/netezza.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/oracle.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query1.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query10.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query11.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query12.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query13.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query14.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query15.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query16.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query17.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query18.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query19.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query2.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query20.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query21.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query22.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query23.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query24.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query25.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query26.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query27.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query28.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query29.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query3.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query30.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query31.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query32.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query33.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query34.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query35.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query36.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query37.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query38.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query39.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query4.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query40.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query41.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query42.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query43.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query44.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query45.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query46.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query47.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query48.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query49.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query5.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query50.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query51.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query52.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query53.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query54.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query55.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query56.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query57.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query58.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query59.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query6.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query60.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query61.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query62.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query63.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query64.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query65.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query66.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query67.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query68.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query69.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query7.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query70.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query71.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query73.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query74.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query75.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query76.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query77.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query78.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query79.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query8.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query80.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query81.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query82.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query83.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query84.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query85.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query86.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query87.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query88.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query89.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query9.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query90.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query91.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query92.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query93.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query94.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query95.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query96.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query97.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query98.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query99.tpl +0 -0
- /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/templates.lst +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/entry_points.txt +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1294 @@
|
|
|
1
|
+
"""DataFrame operations for Transaction Primitives benchmark.
|
|
2
|
+
|
|
3
|
+
This module provides DataFrame implementations of Transaction Primitives operations,
|
|
4
|
+
enabling benchmarking of ACID transaction semantics on DataFrame platforms that
|
|
5
|
+
support Delta Lake, Iceberg, or other table formats with transaction support.
|
|
6
|
+
|
|
7
|
+
Transaction Primitives tests fundamental database transaction semantics:
|
|
8
|
+
- COMMIT: Atomic commit of changes
|
|
9
|
+
- ROLLBACK: Rollback to previous state (via RESTORE for Delta Lake)
|
|
10
|
+
- Isolation: Snapshot isolation verification
|
|
11
|
+
- Concurrency: Parallel write conflict handling
|
|
12
|
+
- Time Travel: Query and restore historical versions
|
|
13
|
+
|
|
14
|
+
Platform Support:
|
|
15
|
+
- PySpark + Delta Lake: Full ACID support
|
|
16
|
+
- Atomic writes (each operation is a transaction)
|
|
17
|
+
- RESTORE TO VERSION/TIMESTAMP for rollback
|
|
18
|
+
- Snapshot isolation
|
|
19
|
+
- Time travel queries
|
|
20
|
+
- PySpark + Iceberg: Full ACID support
|
|
21
|
+
- Snapshot-based isolation
|
|
22
|
+
- Time travel via snapshots
|
|
23
|
+
- Polars/Pandas: NOT SUPPORTED
|
|
24
|
+
- No transaction semantics
|
|
25
|
+
- Users directed to use Delta Lake or Iceberg
|
|
26
|
+
|
|
27
|
+
Note:
|
|
28
|
+
Unlike traditional SQL databases with BEGIN/COMMIT/ROLLBACK, Delta Lake and
|
|
29
|
+
Iceberg use atomic operations where each write is automatically committed.
|
|
30
|
+
"Rollback" is achieved via RESTORE (Delta) or rollback_to_snapshot (Iceberg).
|
|
31
|
+
|
|
32
|
+
Copyright 2026 Joe Harris / BenchBox Project
|
|
33
|
+
|
|
34
|
+
Licensed under the MIT License. See LICENSE file in the project root for details.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
import logging
|
|
40
|
+
import time
|
|
41
|
+
from dataclasses import dataclass, field
|
|
42
|
+
from enum import Enum
|
|
43
|
+
from pathlib import Path
|
|
44
|
+
from typing import Any
|
|
45
|
+
|
|
46
|
+
from benchbox.core.dataframe.maintenance_interface import (
|
|
47
|
+
DataFrameMaintenanceCapabilities,
|
|
48
|
+
TransactionIsolation,
|
|
49
|
+
get_maintenance_operations_for_platform,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
logger = logging.getLogger(__name__)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class TransactionOperationType(Enum):
|
|
56
|
+
"""Types of transaction operations supported by the benchmark.
|
|
57
|
+
|
|
58
|
+
These operations test ACID transaction semantics on DataFrame platforms
|
|
59
|
+
that support table formats like Delta Lake or Iceberg.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
# Atomic write operations (tests implicit commit)
|
|
63
|
+
ATOMIC_INSERT = "atomic_insert"
|
|
64
|
+
ATOMIC_UPDATE = "atomic_update"
|
|
65
|
+
ATOMIC_DELETE = "atomic_delete"
|
|
66
|
+
ATOMIC_MERGE = "atomic_merge"
|
|
67
|
+
|
|
68
|
+
# Rollback operations (Delta Lake RESTORE, Iceberg rollback)
|
|
69
|
+
ROLLBACK_TO_VERSION = "rollback_to_version"
|
|
70
|
+
ROLLBACK_TO_TIMESTAMP = "rollback_to_timestamp"
|
|
71
|
+
|
|
72
|
+
# Time travel operations
|
|
73
|
+
TIME_TRAVEL_QUERY = "time_travel_query"
|
|
74
|
+
VERSION_COMPARE = "version_compare"
|
|
75
|
+
|
|
76
|
+
# Concurrency tests
|
|
77
|
+
CONCURRENT_WRITE = "concurrent_write"
|
|
78
|
+
CONFLICT_RESOLUTION = "conflict_resolution"
|
|
79
|
+
|
|
80
|
+
# Isolation verification
|
|
81
|
+
SNAPSHOT_ISOLATION = "snapshot_isolation"
|
|
82
|
+
READ_YOUR_WRITES = "read_your_writes"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class DataFrameTransactionCapabilities:
|
|
87
|
+
"""Platform capabilities for DataFrame transaction operations.
|
|
88
|
+
|
|
89
|
+
Declares what transaction-related operations a DataFrame platform supports.
|
|
90
|
+
This is used to validate configurations and provide helpful error messages
|
|
91
|
+
when users attempt to run transaction benchmarks on unsupported platforms.
|
|
92
|
+
|
|
93
|
+
Attributes:
|
|
94
|
+
platform_name: Name of the platform
|
|
95
|
+
supports_transactions: Has atomic transaction support
|
|
96
|
+
supports_rollback: Can rollback to previous versions
|
|
97
|
+
supports_time_travel: Can query historical versions
|
|
98
|
+
supports_concurrent_writes: Has concurrency control
|
|
99
|
+
transaction_isolation: Isolation level supported
|
|
100
|
+
table_format: Underlying table format (delta, iceberg, parquet, none)
|
|
101
|
+
notes: Platform-specific notes
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
platform_name: str
|
|
105
|
+
supports_transactions: bool = False
|
|
106
|
+
supports_rollback: bool = False
|
|
107
|
+
supports_time_travel: bool = False
|
|
108
|
+
supports_concurrent_writes: bool = False
|
|
109
|
+
transaction_isolation: TransactionIsolation = TransactionIsolation.NONE
|
|
110
|
+
table_format: str = "none"
|
|
111
|
+
notes: str = ""
|
|
112
|
+
|
|
113
|
+
def supports_operation(self, operation: TransactionOperationType) -> bool:
|
|
114
|
+
"""Check if an operation type is supported.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
operation: The operation type to check
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
True if the operation is supported
|
|
121
|
+
"""
|
|
122
|
+
mapping = {
|
|
123
|
+
# Atomic writes require transaction support
|
|
124
|
+
TransactionOperationType.ATOMIC_INSERT: self.supports_transactions,
|
|
125
|
+
TransactionOperationType.ATOMIC_UPDATE: self.supports_transactions,
|
|
126
|
+
TransactionOperationType.ATOMIC_DELETE: self.supports_transactions,
|
|
127
|
+
TransactionOperationType.ATOMIC_MERGE: self.supports_transactions,
|
|
128
|
+
# Rollback requires RESTORE or equivalent
|
|
129
|
+
TransactionOperationType.ROLLBACK_TO_VERSION: self.supports_rollback,
|
|
130
|
+
TransactionOperationType.ROLLBACK_TO_TIMESTAMP: self.supports_rollback,
|
|
131
|
+
# Time travel queries
|
|
132
|
+
TransactionOperationType.TIME_TRAVEL_QUERY: self.supports_time_travel,
|
|
133
|
+
TransactionOperationType.VERSION_COMPARE: self.supports_time_travel,
|
|
134
|
+
# Concurrency tests
|
|
135
|
+
TransactionOperationType.CONCURRENT_WRITE: self.supports_concurrent_writes,
|
|
136
|
+
TransactionOperationType.CONFLICT_RESOLUTION: self.supports_concurrent_writes,
|
|
137
|
+
# Isolation tests
|
|
138
|
+
TransactionOperationType.SNAPSHOT_ISOLATION: (self.transaction_isolation == TransactionIsolation.SNAPSHOT),
|
|
139
|
+
TransactionOperationType.READ_YOUR_WRITES: self.supports_transactions,
|
|
140
|
+
}
|
|
141
|
+
return mapping.get(operation, False)
|
|
142
|
+
|
|
143
|
+
def get_unsupported_operations(self) -> list[TransactionOperationType]:
|
|
144
|
+
"""Get list of operations not supported by this platform.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
List of unsupported TransactionOperationType values
|
|
148
|
+
"""
|
|
149
|
+
return [op for op in TransactionOperationType if not self.supports_operation(op)]
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# Pre-defined capability profiles for platforms
|
|
153
|
+
DELTA_LAKE_TRANSACTION_CAPABILITIES = DataFrameTransactionCapabilities(
|
|
154
|
+
platform_name="delta-lake",
|
|
155
|
+
supports_transactions=True,
|
|
156
|
+
supports_rollback=True,
|
|
157
|
+
supports_time_travel=True,
|
|
158
|
+
supports_concurrent_writes=True,
|
|
159
|
+
transaction_isolation=TransactionIsolation.SNAPSHOT,
|
|
160
|
+
table_format="delta",
|
|
161
|
+
notes="Full ACID via Delta Lake. RESTORE for rollback, version queries for time travel.",
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
PYSPARK_DELTA_TRANSACTION_CAPABILITIES = DataFrameTransactionCapabilities(
|
|
165
|
+
platform_name="pyspark-delta",
|
|
166
|
+
supports_transactions=True,
|
|
167
|
+
supports_rollback=True,
|
|
168
|
+
supports_time_travel=True,
|
|
169
|
+
supports_concurrent_writes=True,
|
|
170
|
+
transaction_isolation=TransactionIsolation.SNAPSHOT,
|
|
171
|
+
table_format="delta",
|
|
172
|
+
notes="Full ACID via delta-spark. Uses DeltaTable API for transactions.",
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
ICEBERG_TRANSACTION_CAPABILITIES = DataFrameTransactionCapabilities(
|
|
176
|
+
platform_name="iceberg",
|
|
177
|
+
supports_transactions=True,
|
|
178
|
+
supports_rollback=True,
|
|
179
|
+
supports_time_travel=True,
|
|
180
|
+
supports_concurrent_writes=True,
|
|
181
|
+
transaction_isolation=TransactionIsolation.SNAPSHOT,
|
|
182
|
+
table_format="iceberg",
|
|
183
|
+
notes="Full ACID via Apache Iceberg. Snapshot-based transactions.",
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
POLARS_TRANSACTION_CAPABILITIES = DataFrameTransactionCapabilities(
|
|
187
|
+
platform_name="polars-df",
|
|
188
|
+
supports_transactions=False,
|
|
189
|
+
supports_rollback=False,
|
|
190
|
+
supports_time_travel=False,
|
|
191
|
+
supports_concurrent_writes=False,
|
|
192
|
+
transaction_isolation=TransactionIsolation.NONE,
|
|
193
|
+
table_format="parquet",
|
|
194
|
+
notes="No transaction support. Use Delta Lake or Iceberg for ACID operations.",
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
PANDAS_TRANSACTION_CAPABILITIES = DataFrameTransactionCapabilities(
|
|
198
|
+
platform_name="pandas-df",
|
|
199
|
+
supports_transactions=False,
|
|
200
|
+
supports_rollback=False,
|
|
201
|
+
supports_time_travel=False,
|
|
202
|
+
supports_concurrent_writes=False,
|
|
203
|
+
transaction_isolation=TransactionIsolation.NONE,
|
|
204
|
+
table_format="parquet",
|
|
205
|
+
notes="No transaction support. Use Delta Lake or Iceberg for ACID operations.",
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
@dataclass
|
|
210
|
+
class DataFrameTransactionResult:
|
|
211
|
+
"""Result of a DataFrame transaction operation.
|
|
212
|
+
|
|
213
|
+
Standardized result container for transaction operations, capturing
|
|
214
|
+
timing, success status, and operation-specific metrics.
|
|
215
|
+
|
|
216
|
+
Attributes:
|
|
217
|
+
operation_type: Type of transaction operation
|
|
218
|
+
success: Whether the operation completed successfully
|
|
219
|
+
start_time: Operation start timestamp (Unix time)
|
|
220
|
+
end_time: Operation end timestamp (Unix time)
|
|
221
|
+
duration_ms: Operation duration in milliseconds
|
|
222
|
+
rows_affected: Number of rows affected
|
|
223
|
+
version_before: Table version before operation (if applicable)
|
|
224
|
+
version_after: Table version after operation (if applicable)
|
|
225
|
+
error_message: Error description if operation failed
|
|
226
|
+
validation_passed: Whether validation checks passed
|
|
227
|
+
validation_results: Details of validation checks
|
|
228
|
+
metrics: Additional operation-specific metrics
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
operation_type: TransactionOperationType
|
|
232
|
+
success: bool
|
|
233
|
+
start_time: float
|
|
234
|
+
end_time: float
|
|
235
|
+
duration_ms: float
|
|
236
|
+
rows_affected: int
|
|
237
|
+
version_before: int | None = None
|
|
238
|
+
version_after: int | None = None
|
|
239
|
+
error_message: str | None = None
|
|
240
|
+
validation_passed: bool = True
|
|
241
|
+
validation_results: list[dict[str, Any]] = field(default_factory=list)
|
|
242
|
+
metrics: dict[str, Any] = field(default_factory=dict)
|
|
243
|
+
|
|
244
|
+
@classmethod
|
|
245
|
+
def failure(
|
|
246
|
+
cls,
|
|
247
|
+
operation_type: TransactionOperationType,
|
|
248
|
+
error_message: str,
|
|
249
|
+
start_time: float | None = None,
|
|
250
|
+
) -> DataFrameTransactionResult:
|
|
251
|
+
"""Create a failure result.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
operation_type: The operation that failed
|
|
255
|
+
error_message: Description of the failure
|
|
256
|
+
start_time: Optional start time (defaults to now)
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
DataFrameTransactionResult indicating failure
|
|
260
|
+
"""
|
|
261
|
+
now = time.time()
|
|
262
|
+
return cls(
|
|
263
|
+
operation_type=operation_type,
|
|
264
|
+
success=False,
|
|
265
|
+
start_time=start_time or now,
|
|
266
|
+
end_time=now,
|
|
267
|
+
duration_ms=0.0 if start_time is None else (now - start_time) * 1000,
|
|
268
|
+
rows_affected=0,
|
|
269
|
+
error_message=error_message,
|
|
270
|
+
validation_passed=False,
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class DataFrameTransactionOperationsManager:
|
|
275
|
+
"""Manager for DataFrame transaction operations.
|
|
276
|
+
|
|
277
|
+
Provides transaction-specific operations for DataFrame platforms that support
|
|
278
|
+
ACID semantics via Delta Lake, Iceberg, or similar table formats.
|
|
279
|
+
|
|
280
|
+
This manager wraps the maintenance operations interface with transaction-specific
|
|
281
|
+
functionality including:
|
|
282
|
+
- Atomic write operations with version tracking
|
|
283
|
+
- Rollback via RESTORE (Delta Lake) or snapshot rollback (Iceberg)
|
|
284
|
+
- Time travel queries
|
|
285
|
+
- Concurrency and isolation testing
|
|
286
|
+
|
|
287
|
+
Example:
|
|
288
|
+
# With PySpark + Delta Lake
|
|
289
|
+
manager = DataFrameTransactionOperationsManager(
|
|
290
|
+
"pyspark-df", spark_session=spark
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# Check capabilities before running
|
|
294
|
+
if not manager.supports_transactions():
|
|
295
|
+
raise RuntimeError(manager.get_unsupported_message())
|
|
296
|
+
|
|
297
|
+
# Execute atomic insert
|
|
298
|
+
result = manager.execute_atomic_insert(
|
|
299
|
+
table_path="/data/orders",
|
|
300
|
+
dataframe=new_orders_df
|
|
301
|
+
)
|
|
302
|
+
print(f"Version: {result.version_before} -> {result.version_after}")
|
|
303
|
+
|
|
304
|
+
# Rollback to previous version
|
|
305
|
+
result = manager.execute_rollback_to_version(
|
|
306
|
+
table_path="/data/orders",
|
|
307
|
+
version=result.version_before
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
Note:
|
|
311
|
+
For non-ACID platforms (Polars, Pandas), this manager will raise
|
|
312
|
+
clear errors directing users to use Delta Lake or Iceberg.
|
|
313
|
+
"""
|
|
314
|
+
|
|
315
|
+
def __init__(self, platform_name: str, spark_session: Any = None) -> None:
|
|
316
|
+
"""Initialize the transaction operations manager.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
platform_name: Platform name (e.g., "pyspark-df", "delta-lake")
|
|
320
|
+
spark_session: SparkSession instance (required for pyspark-df)
|
|
321
|
+
|
|
322
|
+
Raises:
|
|
323
|
+
ValueError: If platform is not recognized
|
|
324
|
+
"""
|
|
325
|
+
self.platform_name = platform_name.lower()
|
|
326
|
+
self.spark_session = spark_session
|
|
327
|
+
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
|
328
|
+
|
|
329
|
+
# Get maintenance operations handler (for atomic writes)
|
|
330
|
+
self._maintenance_ops = self._get_maintenance_ops()
|
|
331
|
+
|
|
332
|
+
# Build transaction capabilities
|
|
333
|
+
self._capabilities = self._build_capabilities()
|
|
334
|
+
|
|
335
|
+
def _get_maintenance_ops(self) -> Any:
|
|
336
|
+
"""Get the appropriate maintenance operations handler.
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
Maintenance operations handler or None
|
|
340
|
+
"""
|
|
341
|
+
if "pyspark" in self.platform_name or "spark" in self.platform_name:
|
|
342
|
+
if self.spark_session is not None:
|
|
343
|
+
try:
|
|
344
|
+
from benchbox.platforms.dataframe.pyspark_maintenance import (
|
|
345
|
+
get_pyspark_maintenance_operations,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
return get_pyspark_maintenance_operations(
|
|
349
|
+
spark_session=self.spark_session,
|
|
350
|
+
prefer_delta=True,
|
|
351
|
+
)
|
|
352
|
+
except ImportError:
|
|
353
|
+
self.logger.debug("PySpark maintenance module not available")
|
|
354
|
+
return None
|
|
355
|
+
else:
|
|
356
|
+
self.logger.debug("No SparkSession provided for PySpark platform")
|
|
357
|
+
return None
|
|
358
|
+
|
|
359
|
+
return get_maintenance_operations_for_platform(self.platform_name)
|
|
360
|
+
|
|
361
|
+
def _build_capabilities(self) -> DataFrameTransactionCapabilities:
|
|
362
|
+
"""Build platform transaction capabilities.
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
DataFrameTransactionCapabilities for this platform
|
|
366
|
+
"""
|
|
367
|
+
# Get maintenance capabilities if available
|
|
368
|
+
maintenance_caps: DataFrameMaintenanceCapabilities | None = None
|
|
369
|
+
if self._maintenance_ops is not None:
|
|
370
|
+
maintenance_caps = self._maintenance_ops.get_capabilities()
|
|
371
|
+
|
|
372
|
+
# Platform-specific capability profiles
|
|
373
|
+
if "delta" in self.platform_name:
|
|
374
|
+
# Standalone Delta Lake (delta-rs)
|
|
375
|
+
return DELTA_LAKE_TRANSACTION_CAPABILITIES
|
|
376
|
+
|
|
377
|
+
if "pyspark" in self.platform_name or "spark" in self.platform_name:
|
|
378
|
+
# PySpark - check if Delta Lake is available
|
|
379
|
+
if maintenance_caps and maintenance_caps.supports_transactions:
|
|
380
|
+
return PYSPARK_DELTA_TRANSACTION_CAPABILITIES
|
|
381
|
+
# PySpark without Delta Lake - limited support
|
|
382
|
+
return DataFrameTransactionCapabilities(
|
|
383
|
+
platform_name=self.platform_name,
|
|
384
|
+
supports_transactions=False,
|
|
385
|
+
table_format="parquet",
|
|
386
|
+
notes="PySpark without Delta Lake. Install delta-spark for ACID support.",
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
if "iceberg" in self.platform_name:
|
|
390
|
+
return ICEBERG_TRANSACTION_CAPABILITIES
|
|
391
|
+
|
|
392
|
+
if "polars" in self.platform_name:
|
|
393
|
+
return POLARS_TRANSACTION_CAPABILITIES
|
|
394
|
+
|
|
395
|
+
if "pandas" in self.platform_name:
|
|
396
|
+
return PANDAS_TRANSACTION_CAPABILITIES
|
|
397
|
+
|
|
398
|
+
# Unknown platform - check maintenance capabilities
|
|
399
|
+
if maintenance_caps and maintenance_caps.supports_transactions:
|
|
400
|
+
return DataFrameTransactionCapabilities(
|
|
401
|
+
platform_name=self.platform_name,
|
|
402
|
+
supports_transactions=True,
|
|
403
|
+
supports_rollback=maintenance_caps.supports_time_travel,
|
|
404
|
+
supports_time_travel=maintenance_caps.supports_time_travel,
|
|
405
|
+
supports_concurrent_writes=True,
|
|
406
|
+
transaction_isolation=maintenance_caps.transaction_isolation,
|
|
407
|
+
table_format="unknown",
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
# Default: no transaction support
|
|
411
|
+
return DataFrameTransactionCapabilities(
|
|
412
|
+
platform_name=self.platform_name,
|
|
413
|
+
supports_transactions=False,
|
|
414
|
+
notes="Platform does not support ACID transactions.",
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
def get_capabilities(self) -> DataFrameTransactionCapabilities:
|
|
418
|
+
"""Get platform transaction capabilities.
|
|
419
|
+
|
|
420
|
+
Returns:
|
|
421
|
+
DataFrameTransactionCapabilities for this platform
|
|
422
|
+
"""
|
|
423
|
+
return self._capabilities
|
|
424
|
+
|
|
425
|
+
def supports_transactions(self) -> bool:
|
|
426
|
+
"""Check if the platform supports ACID transactions.
|
|
427
|
+
|
|
428
|
+
Returns:
|
|
429
|
+
True if the platform supports transactions
|
|
430
|
+
"""
|
|
431
|
+
return self._capabilities.supports_transactions
|
|
432
|
+
|
|
433
|
+
def supports_operation(self, operation: TransactionOperationType) -> bool:
|
|
434
|
+
"""Check if an operation type is supported.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
operation: The operation to check
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
True if supported
|
|
441
|
+
"""
|
|
442
|
+
return self._capabilities.supports_operation(operation)
|
|
443
|
+
|
|
444
|
+
def get_unsupported_message(self) -> str:
|
|
445
|
+
"""Get error message when transactions are not supported.
|
|
446
|
+
|
|
447
|
+
Returns:
|
|
448
|
+
Helpful error message with alternatives
|
|
449
|
+
"""
|
|
450
|
+
return (
|
|
451
|
+
f"Transaction Primitives benchmark requires ACID transaction support.\n"
|
|
452
|
+
f"Platform '{self.platform_name}' does not support transactions.\n"
|
|
453
|
+
f"\n"
|
|
454
|
+
f"Alternatives:\n"
|
|
455
|
+
f" - Use pyspark-df with Delta Lake table format:\n"
|
|
456
|
+
f" benchbox run --platform pyspark-df --benchmark transaction_primitives\n"
|
|
457
|
+
f" (Requires: pip install pyspark delta-spark)\n"
|
|
458
|
+
f"\n"
|
|
459
|
+
f" - Use delta-lake platform directly:\n"
|
|
460
|
+
f" benchbox run --platform delta-lake --benchmark transaction_primitives\n"
|
|
461
|
+
f" (Requires: pip install deltalake)\n"
|
|
462
|
+
f"\n"
|
|
463
|
+
f"Note: {self._capabilities.notes}"
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
def _validate_path_safe(self, table_path: Path | str) -> tuple[Path | None, str]:
|
|
467
|
+
"""Validate that a path is safe to use (no path traversal).
|
|
468
|
+
|
|
469
|
+
Args:
|
|
470
|
+
table_path: Path to validate
|
|
471
|
+
|
|
472
|
+
Returns:
|
|
473
|
+
Tuple of (resolved_path, error_message). resolved_path is None if invalid.
|
|
474
|
+
"""
|
|
475
|
+
try:
|
|
476
|
+
path = Path(table_path)
|
|
477
|
+
# Resolve to absolute path to detect traversal
|
|
478
|
+
resolved = path.resolve()
|
|
479
|
+
|
|
480
|
+
# Check for path traversal attempts (.. in the original path)
|
|
481
|
+
path_str = str(table_path)
|
|
482
|
+
if ".." in path_str:
|
|
483
|
+
return None, f"Path traversal detected in '{table_path}'. Use absolute paths."
|
|
484
|
+
|
|
485
|
+
return resolved, ""
|
|
486
|
+
except (ValueError, OSError) as e:
|
|
487
|
+
return None, f"Invalid path '{table_path}': {e}"
|
|
488
|
+
|
|
489
|
+
def validate_table_format(self, table_path: Path | str) -> tuple[bool, str]:
|
|
490
|
+
"""Validate that a table path is a supported transactional table.
|
|
491
|
+
|
|
492
|
+
Checks if the table at the given path is a Delta Lake or Iceberg table
|
|
493
|
+
that supports transaction operations.
|
|
494
|
+
|
|
495
|
+
Args:
|
|
496
|
+
table_path: Path to the table directory
|
|
497
|
+
|
|
498
|
+
Returns:
|
|
499
|
+
Tuple of (is_valid, error_message)
|
|
500
|
+
"""
|
|
501
|
+
# Validate path safety first
|
|
502
|
+
resolved_path, error_msg = self._validate_path_safe(table_path)
|
|
503
|
+
if resolved_path is None:
|
|
504
|
+
return False, error_msg
|
|
505
|
+
table_path = resolved_path
|
|
506
|
+
|
|
507
|
+
# Check for Delta Lake table
|
|
508
|
+
delta_log = table_path / "_delta_log"
|
|
509
|
+
if delta_log.exists() and delta_log.is_dir():
|
|
510
|
+
return True, ""
|
|
511
|
+
|
|
512
|
+
# Check for Iceberg table (metadata directory)
|
|
513
|
+
iceberg_metadata = table_path / "metadata"
|
|
514
|
+
if iceberg_metadata.exists() and iceberg_metadata.is_dir():
|
|
515
|
+
# Look for Iceberg-specific files
|
|
516
|
+
version_hint = iceberg_metadata / "version-hint.text"
|
|
517
|
+
if version_hint.exists() or list(iceberg_metadata.glob("*.metadata.json")):
|
|
518
|
+
return True, ""
|
|
519
|
+
|
|
520
|
+
# Table exists but is not transactional
|
|
521
|
+
if table_path.exists():
|
|
522
|
+
return False, (
|
|
523
|
+
f"Table at '{table_path}' is not a Delta Lake or Iceberg table.\n"
|
|
524
|
+
f"Transaction Primitives requires a transactional table format.\n"
|
|
525
|
+
f"\n"
|
|
526
|
+
f"To convert to Delta Lake:\n"
|
|
527
|
+
f" df.write.format('delta').mode('overwrite').save('{table_path}')\n"
|
|
528
|
+
f"\n"
|
|
529
|
+
f"Or to create a new Delta table:\n"
|
|
530
|
+
f" spark.sql(\"CREATE TABLE ... USING DELTA LOCATION '{table_path}'\")"
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
return False, f"Table path '{table_path}' does not exist."
|
|
534
|
+
|
|
535
|
+
def get_table_version(self, table_path: Path | str) -> int | None:
|
|
536
|
+
"""Get the current version of a transactional table.
|
|
537
|
+
|
|
538
|
+
Args:
|
|
539
|
+
table_path: Path to the table directory
|
|
540
|
+
|
|
541
|
+
Returns:
|
|
542
|
+
Current version number, or None if not available
|
|
543
|
+
"""
|
|
544
|
+
table_path = str(table_path)
|
|
545
|
+
|
|
546
|
+
# Delta Lake version
|
|
547
|
+
if self._capabilities.table_format == "delta":
|
|
548
|
+
try:
|
|
549
|
+
if "pyspark" in self.platform_name and self.spark_session:
|
|
550
|
+
from delta.tables import DeltaTable
|
|
551
|
+
|
|
552
|
+
dt = DeltaTable.forPath(self.spark_session, table_path)
|
|
553
|
+
history = dt.history(1).collect()
|
|
554
|
+
if history:
|
|
555
|
+
return history[0]["version"]
|
|
556
|
+
else:
|
|
557
|
+
# delta-rs
|
|
558
|
+
from deltalake import DeltaTable
|
|
559
|
+
|
|
560
|
+
dt = DeltaTable(table_path)
|
|
561
|
+
return dt.version()
|
|
562
|
+
except Exception as e:
|
|
563
|
+
self.logger.warning(f"Could not get table version: {e}")
|
|
564
|
+
return None
|
|
565
|
+
|
|
566
|
+
# Iceberg snapshot ID (not numeric version, but serves similar purpose)
|
|
567
|
+
if self._capabilities.table_format == "iceberg":
|
|
568
|
+
# Would need pyiceberg catalog API
|
|
569
|
+
self.logger.debug("Iceberg version tracking not yet implemented")
|
|
570
|
+
return None
|
|
571
|
+
|
|
572
|
+
return None
|
|
573
|
+
|
|
574
|
+
def execute_atomic_insert(
|
|
575
|
+
self,
|
|
576
|
+
table_path: Path | str,
|
|
577
|
+
dataframe: Any,
|
|
578
|
+
partition_columns: list[str] | None = None,
|
|
579
|
+
) -> DataFrameTransactionResult:
|
|
580
|
+
"""Execute an atomic INSERT operation.
|
|
581
|
+
|
|
582
|
+
Each INSERT is a single atomic transaction. Measures the overhead
|
|
583
|
+
of transaction commit vs raw write performance.
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
table_path: Path to the target table
|
|
587
|
+
dataframe: DataFrame containing rows to insert
|
|
588
|
+
partition_columns: Optional partition columns
|
|
589
|
+
|
|
590
|
+
Returns:
|
|
591
|
+
DataFrameTransactionResult with operation outcome
|
|
592
|
+
"""
|
|
593
|
+
start_time = time.time()
|
|
594
|
+
operation = TransactionOperationType.ATOMIC_INSERT
|
|
595
|
+
|
|
596
|
+
if not self.supports_operation(operation):
|
|
597
|
+
return DataFrameTransactionResult.failure(
|
|
598
|
+
operation,
|
|
599
|
+
self.get_unsupported_message(),
|
|
600
|
+
start_time,
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
# Validate table format
|
|
604
|
+
is_valid, error_msg = self.validate_table_format(table_path)
|
|
605
|
+
if not is_valid:
|
|
606
|
+
return DataFrameTransactionResult.failure(operation, error_msg, start_time)
|
|
607
|
+
|
|
608
|
+
# Get version before operation
|
|
609
|
+
version_before = self.get_table_version(table_path)
|
|
610
|
+
|
|
611
|
+
try:
|
|
612
|
+
if self._maintenance_ops is None:
|
|
613
|
+
return DataFrameTransactionResult.failure(
|
|
614
|
+
operation,
|
|
615
|
+
f"Maintenance operations not available for {self.platform_name}",
|
|
616
|
+
start_time,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
# Execute INSERT via maintenance operations
|
|
620
|
+
result = self._maintenance_ops.insert_rows(
|
|
621
|
+
table_path=table_path,
|
|
622
|
+
dataframe=dataframe,
|
|
623
|
+
partition_columns=partition_columns,
|
|
624
|
+
mode="append",
|
|
625
|
+
)
|
|
626
|
+
|
|
627
|
+
# Get version after operation
|
|
628
|
+
version_after = self.get_table_version(table_path)
|
|
629
|
+
|
|
630
|
+
end_time = time.time()
|
|
631
|
+
total_duration_ms = (end_time - start_time) * 1000
|
|
632
|
+
write_duration_ms = result.duration * 1000
|
|
633
|
+
|
|
634
|
+
return DataFrameTransactionResult(
|
|
635
|
+
operation_type=operation,
|
|
636
|
+
success=result.success,
|
|
637
|
+
start_time=start_time,
|
|
638
|
+
end_time=end_time,
|
|
639
|
+
duration_ms=total_duration_ms,
|
|
640
|
+
rows_affected=result.rows_affected,
|
|
641
|
+
version_before=version_before,
|
|
642
|
+
version_after=version_after,
|
|
643
|
+
error_message=result.error_message,
|
|
644
|
+
metrics={
|
|
645
|
+
"write_duration_ms": write_duration_ms,
|
|
646
|
+
"version_check_overhead_ms": total_duration_ms - write_duration_ms,
|
|
647
|
+
},
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
except Exception as e:
|
|
651
|
+
self.logger.error(f"Atomic INSERT failed: {e}")
|
|
652
|
+
return DataFrameTransactionResult.failure(operation, str(e), start_time)
|
|
653
|
+
|
|
654
|
+
def execute_atomic_update(
|
|
655
|
+
self,
|
|
656
|
+
table_path: Path | str,
|
|
657
|
+
condition: str,
|
|
658
|
+
updates: dict[str, Any],
|
|
659
|
+
) -> DataFrameTransactionResult:
|
|
660
|
+
"""Execute an atomic UPDATE operation.
|
|
661
|
+
|
|
662
|
+
Args:
|
|
663
|
+
table_path: Path to the target table
|
|
664
|
+
condition: SQL-like condition string
|
|
665
|
+
updates: Column name to new value mapping
|
|
666
|
+
|
|
667
|
+
Returns:
|
|
668
|
+
DataFrameTransactionResult with operation outcome
|
|
669
|
+
"""
|
|
670
|
+
start_time = time.time()
|
|
671
|
+
operation = TransactionOperationType.ATOMIC_UPDATE
|
|
672
|
+
|
|
673
|
+
if not self.supports_operation(operation):
|
|
674
|
+
return DataFrameTransactionResult.failure(
|
|
675
|
+
operation,
|
|
676
|
+
self.get_unsupported_message(),
|
|
677
|
+
start_time,
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
is_valid, error_msg = self.validate_table_format(table_path)
|
|
681
|
+
if not is_valid:
|
|
682
|
+
return DataFrameTransactionResult.failure(operation, error_msg, start_time)
|
|
683
|
+
|
|
684
|
+
version_before = self.get_table_version(table_path)
|
|
685
|
+
|
|
686
|
+
try:
|
|
687
|
+
if self._maintenance_ops is None:
|
|
688
|
+
return DataFrameTransactionResult.failure(
|
|
689
|
+
operation,
|
|
690
|
+
f"Maintenance operations not available for {self.platform_name}",
|
|
691
|
+
start_time,
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
result = self._maintenance_ops.update_rows(
|
|
695
|
+
table_path=table_path,
|
|
696
|
+
condition=condition,
|
|
697
|
+
updates=updates,
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
version_after = self.get_table_version(table_path)
|
|
701
|
+
|
|
702
|
+
end_time = time.time()
|
|
703
|
+
return DataFrameTransactionResult(
|
|
704
|
+
operation_type=operation,
|
|
705
|
+
success=result.success,
|
|
706
|
+
start_time=start_time,
|
|
707
|
+
end_time=end_time,
|
|
708
|
+
duration_ms=(end_time - start_time) * 1000,
|
|
709
|
+
rows_affected=result.rows_affected,
|
|
710
|
+
version_before=version_before,
|
|
711
|
+
version_after=version_after,
|
|
712
|
+
error_message=result.error_message,
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
except Exception as e:
|
|
716
|
+
self.logger.error(f"Atomic UPDATE failed: {e}")
|
|
717
|
+
return DataFrameTransactionResult.failure(operation, str(e), start_time)
|
|
718
|
+
|
|
719
|
+
def execute_atomic_delete(
|
|
720
|
+
self,
|
|
721
|
+
table_path: Path | str,
|
|
722
|
+
condition: str,
|
|
723
|
+
) -> DataFrameTransactionResult:
|
|
724
|
+
"""Execute an atomic DELETE operation.
|
|
725
|
+
|
|
726
|
+
Args:
|
|
727
|
+
table_path: Path to the target table
|
|
728
|
+
condition: SQL-like condition string
|
|
729
|
+
|
|
730
|
+
Returns:
|
|
731
|
+
DataFrameTransactionResult with operation outcome
|
|
732
|
+
"""
|
|
733
|
+
start_time = time.time()
|
|
734
|
+
operation = TransactionOperationType.ATOMIC_DELETE
|
|
735
|
+
|
|
736
|
+
if not self.supports_operation(operation):
|
|
737
|
+
return DataFrameTransactionResult.failure(
|
|
738
|
+
operation,
|
|
739
|
+
self.get_unsupported_message(),
|
|
740
|
+
start_time,
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
is_valid, error_msg = self.validate_table_format(table_path)
|
|
744
|
+
if not is_valid:
|
|
745
|
+
return DataFrameTransactionResult.failure(operation, error_msg, start_time)
|
|
746
|
+
|
|
747
|
+
version_before = self.get_table_version(table_path)
|
|
748
|
+
|
|
749
|
+
try:
|
|
750
|
+
if self._maintenance_ops is None:
|
|
751
|
+
return DataFrameTransactionResult.failure(
|
|
752
|
+
operation,
|
|
753
|
+
f"Maintenance operations not available for {self.platform_name}",
|
|
754
|
+
start_time,
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
result = self._maintenance_ops.delete_rows(
|
|
758
|
+
table_path=table_path,
|
|
759
|
+
condition=condition,
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
version_after = self.get_table_version(table_path)
|
|
763
|
+
|
|
764
|
+
end_time = time.time()
|
|
765
|
+
return DataFrameTransactionResult(
|
|
766
|
+
operation_type=operation,
|
|
767
|
+
success=result.success,
|
|
768
|
+
start_time=start_time,
|
|
769
|
+
end_time=end_time,
|
|
770
|
+
duration_ms=(end_time - start_time) * 1000,
|
|
771
|
+
rows_affected=result.rows_affected,
|
|
772
|
+
version_before=version_before,
|
|
773
|
+
version_after=version_after,
|
|
774
|
+
error_message=result.error_message,
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
except Exception as e:
|
|
778
|
+
self.logger.error(f"Atomic DELETE failed: {e}")
|
|
779
|
+
return DataFrameTransactionResult.failure(operation, str(e), start_time)
|
|
780
|
+
|
|
781
|
+
def execute_atomic_merge(
|
|
782
|
+
self,
|
|
783
|
+
table_path: Path | str,
|
|
784
|
+
source_dataframe: Any,
|
|
785
|
+
merge_condition: str,
|
|
786
|
+
when_matched: dict[str, Any] | None = None,
|
|
787
|
+
when_not_matched: dict[str, Any] | None = None,
|
|
788
|
+
) -> DataFrameTransactionResult:
|
|
789
|
+
"""Execute an atomic MERGE (upsert) operation.
|
|
790
|
+
|
|
791
|
+
Args:
|
|
792
|
+
table_path: Path to the target table
|
|
793
|
+
source_dataframe: DataFrame containing source rows
|
|
794
|
+
merge_condition: Join condition for matching rows
|
|
795
|
+
when_matched: Updates to apply when matched
|
|
796
|
+
when_not_matched: Values for insert when not matched
|
|
797
|
+
|
|
798
|
+
Returns:
|
|
799
|
+
DataFrameTransactionResult with operation outcome
|
|
800
|
+
"""
|
|
801
|
+
start_time = time.time()
|
|
802
|
+
operation = TransactionOperationType.ATOMIC_MERGE
|
|
803
|
+
|
|
804
|
+
if not self.supports_operation(operation):
|
|
805
|
+
return DataFrameTransactionResult.failure(
|
|
806
|
+
operation,
|
|
807
|
+
self.get_unsupported_message(),
|
|
808
|
+
start_time,
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
is_valid, error_msg = self.validate_table_format(table_path)
|
|
812
|
+
if not is_valid:
|
|
813
|
+
return DataFrameTransactionResult.failure(operation, error_msg, start_time)
|
|
814
|
+
|
|
815
|
+
version_before = self.get_table_version(table_path)
|
|
816
|
+
|
|
817
|
+
try:
|
|
818
|
+
if self._maintenance_ops is None:
|
|
819
|
+
return DataFrameTransactionResult.failure(
|
|
820
|
+
operation,
|
|
821
|
+
f"Maintenance operations not available for {self.platform_name}",
|
|
822
|
+
start_time,
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
result = self._maintenance_ops.merge_rows(
|
|
826
|
+
table_path=table_path,
|
|
827
|
+
source_dataframe=source_dataframe,
|
|
828
|
+
merge_condition=merge_condition,
|
|
829
|
+
when_matched=when_matched,
|
|
830
|
+
when_not_matched=when_not_matched,
|
|
831
|
+
)
|
|
832
|
+
|
|
833
|
+
version_after = self.get_table_version(table_path)
|
|
834
|
+
|
|
835
|
+
end_time = time.time()
|
|
836
|
+
return DataFrameTransactionResult(
|
|
837
|
+
operation_type=operation,
|
|
838
|
+
success=result.success,
|
|
839
|
+
start_time=start_time,
|
|
840
|
+
end_time=end_time,
|
|
841
|
+
duration_ms=(end_time - start_time) * 1000,
|
|
842
|
+
rows_affected=result.rows_affected,
|
|
843
|
+
version_before=version_before,
|
|
844
|
+
version_after=version_after,
|
|
845
|
+
error_message=result.error_message,
|
|
846
|
+
)
|
|
847
|
+
|
|
848
|
+
except Exception as e:
|
|
849
|
+
self.logger.error(f"Atomic MERGE failed: {e}")
|
|
850
|
+
return DataFrameTransactionResult.failure(operation, str(e), start_time)
|
|
851
|
+
|
|
852
|
+
def execute_rollback_to_version(
|
|
853
|
+
self,
|
|
854
|
+
table_path: Path | str,
|
|
855
|
+
version: int,
|
|
856
|
+
) -> DataFrameTransactionResult:
|
|
857
|
+
"""Rollback a table to a previous version.
|
|
858
|
+
|
|
859
|
+
For Delta Lake, this uses RESTORE TO VERSION.
|
|
860
|
+
For Iceberg, this uses rollback_to_snapshot (not yet implemented).
|
|
861
|
+
|
|
862
|
+
Args:
|
|
863
|
+
table_path: Path to the table
|
|
864
|
+
version: Target version number
|
|
865
|
+
|
|
866
|
+
Returns:
|
|
867
|
+
DataFrameTransactionResult with operation outcome
|
|
868
|
+
"""
|
|
869
|
+
start_time = time.time()
|
|
870
|
+
operation = TransactionOperationType.ROLLBACK_TO_VERSION
|
|
871
|
+
|
|
872
|
+
if not self.supports_operation(operation):
|
|
873
|
+
return DataFrameTransactionResult.failure(
|
|
874
|
+
operation,
|
|
875
|
+
f"Rollback not supported on {self.platform_name}. "
|
|
876
|
+
f"Use Delta Lake or Iceberg for RESTORE/rollback operations.",
|
|
877
|
+
start_time,
|
|
878
|
+
)
|
|
879
|
+
|
|
880
|
+
is_valid, error_msg = self.validate_table_format(table_path)
|
|
881
|
+
if not is_valid:
|
|
882
|
+
return DataFrameTransactionResult.failure(operation, error_msg, start_time)
|
|
883
|
+
|
|
884
|
+
version_before = self.get_table_version(table_path)
|
|
885
|
+
table_path_str = str(table_path)
|
|
886
|
+
|
|
887
|
+
try:
|
|
888
|
+
# Delta Lake RESTORE
|
|
889
|
+
if self._capabilities.table_format == "delta":
|
|
890
|
+
if "pyspark" in self.platform_name and self.spark_session:
|
|
891
|
+
from delta.tables import DeltaTable
|
|
892
|
+
|
|
893
|
+
dt = DeltaTable.forPath(self.spark_session, table_path_str)
|
|
894
|
+
dt.restoreToVersion(version)
|
|
895
|
+
else:
|
|
896
|
+
# delta-rs
|
|
897
|
+
from deltalake import DeltaTable
|
|
898
|
+
|
|
899
|
+
dt = DeltaTable(table_path_str)
|
|
900
|
+
dt.restore(version)
|
|
901
|
+
|
|
902
|
+
version_after = self.get_table_version(table_path)
|
|
903
|
+
|
|
904
|
+
end_time = time.time()
|
|
905
|
+
return DataFrameTransactionResult(
|
|
906
|
+
operation_type=operation,
|
|
907
|
+
success=True,
|
|
908
|
+
start_time=start_time,
|
|
909
|
+
end_time=end_time,
|
|
910
|
+
duration_ms=(end_time - start_time) * 1000,
|
|
911
|
+
rows_affected=0, # RESTORE doesn't report rows
|
|
912
|
+
version_before=version_before,
|
|
913
|
+
version_after=version_after,
|
|
914
|
+
metrics={"target_version": version},
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
return DataFrameTransactionResult.failure(
|
|
918
|
+
operation,
|
|
919
|
+
f"Rollback not implemented for table format: {self._capabilities.table_format}",
|
|
920
|
+
start_time,
|
|
921
|
+
)
|
|
922
|
+
|
|
923
|
+
except Exception as e:
|
|
924
|
+
self.logger.error(f"Rollback to version {version} failed: {e}")
|
|
925
|
+
return DataFrameTransactionResult.failure(operation, str(e), start_time)
|
|
926
|
+
|
|
927
|
+
def execute_rollback_to_timestamp(
|
|
928
|
+
self,
|
|
929
|
+
table_path: Path | str,
|
|
930
|
+
timestamp: str,
|
|
931
|
+
) -> DataFrameTransactionResult:
|
|
932
|
+
"""Rollback a table to a previous timestamp.
|
|
933
|
+
|
|
934
|
+
For Delta Lake, this uses RESTORE TO TIMESTAMP.
|
|
935
|
+
|
|
936
|
+
Args:
|
|
937
|
+
table_path: Path to the table
|
|
938
|
+
timestamp: Target timestamp (ISO format or SQL timestamp)
|
|
939
|
+
|
|
940
|
+
Returns:
|
|
941
|
+
DataFrameTransactionResult with operation outcome
|
|
942
|
+
"""
|
|
943
|
+
start_time = time.time()
|
|
944
|
+
operation = TransactionOperationType.ROLLBACK_TO_TIMESTAMP
|
|
945
|
+
|
|
946
|
+
if not self.supports_operation(operation):
|
|
947
|
+
return DataFrameTransactionResult.failure(
|
|
948
|
+
operation,
|
|
949
|
+
f"Rollback not supported on {self.platform_name}.",
|
|
950
|
+
start_time,
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
is_valid, error_msg = self.validate_table_format(table_path)
|
|
954
|
+
if not is_valid:
|
|
955
|
+
return DataFrameTransactionResult.failure(operation, error_msg, start_time)
|
|
956
|
+
|
|
957
|
+
version_before = self.get_table_version(table_path)
|
|
958
|
+
table_path_str = str(table_path)
|
|
959
|
+
|
|
960
|
+
try:
|
|
961
|
+
if self._capabilities.table_format == "delta":
|
|
962
|
+
if "pyspark" in self.platform_name and self.spark_session:
|
|
963
|
+
from delta.tables import DeltaTable
|
|
964
|
+
|
|
965
|
+
dt = DeltaTable.forPath(self.spark_session, table_path_str)
|
|
966
|
+
dt.restoreToTimestamp(timestamp)
|
|
967
|
+
else:
|
|
968
|
+
# delta-rs - restore to timestamp
|
|
969
|
+
from deltalake import DeltaTable
|
|
970
|
+
|
|
971
|
+
dt = DeltaTable(table_path_str)
|
|
972
|
+
# delta-rs uses datetime object
|
|
973
|
+
from datetime import datetime
|
|
974
|
+
|
|
975
|
+
if isinstance(timestamp, str):
|
|
976
|
+
# Parse ISO format
|
|
977
|
+
ts = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
|
|
978
|
+
else:
|
|
979
|
+
ts = timestamp
|
|
980
|
+
dt.restore(datetime_target=ts)
|
|
981
|
+
|
|
982
|
+
version_after = self.get_table_version(table_path)
|
|
983
|
+
|
|
984
|
+
end_time = time.time()
|
|
985
|
+
return DataFrameTransactionResult(
|
|
986
|
+
operation_type=operation,
|
|
987
|
+
success=True,
|
|
988
|
+
start_time=start_time,
|
|
989
|
+
end_time=end_time,
|
|
990
|
+
duration_ms=(end_time - start_time) * 1000,
|
|
991
|
+
rows_affected=0,
|
|
992
|
+
version_before=version_before,
|
|
993
|
+
version_after=version_after,
|
|
994
|
+
metrics={"target_timestamp": timestamp},
|
|
995
|
+
)
|
|
996
|
+
|
|
997
|
+
return DataFrameTransactionResult.failure(
|
|
998
|
+
operation,
|
|
999
|
+
f"Rollback not implemented for table format: {self._capabilities.table_format}",
|
|
1000
|
+
start_time,
|
|
1001
|
+
)
|
|
1002
|
+
|
|
1003
|
+
except Exception as e:
|
|
1004
|
+
self.logger.error(f"Rollback to timestamp {timestamp} failed: {e}")
|
|
1005
|
+
return DataFrameTransactionResult.failure(operation, str(e), start_time)
|
|
1006
|
+
|
|
1007
|
+
def execute_time_travel_query(
|
|
1008
|
+
self,
|
|
1009
|
+
table_path: Path | str,
|
|
1010
|
+
version: int | None = None,
|
|
1011
|
+
timestamp: str | None = None,
|
|
1012
|
+
) -> DataFrameTransactionResult:
|
|
1013
|
+
"""Query a table at a historical version or timestamp.
|
|
1014
|
+
|
|
1015
|
+
Args:
|
|
1016
|
+
table_path: Path to the table
|
|
1017
|
+
version: Target version number (mutually exclusive with timestamp)
|
|
1018
|
+
timestamp: Target timestamp (mutually exclusive with version)
|
|
1019
|
+
|
|
1020
|
+
Returns:
|
|
1021
|
+
DataFrameTransactionResult with operation outcome and row count
|
|
1022
|
+
"""
|
|
1023
|
+
start_time = time.time()
|
|
1024
|
+
operation = TransactionOperationType.TIME_TRAVEL_QUERY
|
|
1025
|
+
|
|
1026
|
+
if not self.supports_operation(operation):
|
|
1027
|
+
return DataFrameTransactionResult.failure(
|
|
1028
|
+
operation,
|
|
1029
|
+
f"Time travel not supported on {self.platform_name}. Use Delta Lake or Iceberg for historical queries.",
|
|
1030
|
+
start_time,
|
|
1031
|
+
)
|
|
1032
|
+
|
|
1033
|
+
if version is None and timestamp is None:
|
|
1034
|
+
return DataFrameTransactionResult.failure(
|
|
1035
|
+
operation,
|
|
1036
|
+
"Either version or timestamp must be provided for time travel query.",
|
|
1037
|
+
start_time,
|
|
1038
|
+
)
|
|
1039
|
+
|
|
1040
|
+
is_valid, error_msg = self.validate_table_format(table_path)
|
|
1041
|
+
if not is_valid:
|
|
1042
|
+
return DataFrameTransactionResult.failure(operation, error_msg, start_time)
|
|
1043
|
+
|
|
1044
|
+
table_path_str = str(table_path)
|
|
1045
|
+
current_version = self.get_table_version(table_path)
|
|
1046
|
+
|
|
1047
|
+
try:
|
|
1048
|
+
if self._capabilities.table_format == "delta":
|
|
1049
|
+
if "pyspark" in self.platform_name and self.spark_session:
|
|
1050
|
+
# PySpark time travel via options
|
|
1051
|
+
reader = self.spark_session.read.format("delta")
|
|
1052
|
+
if version is not None:
|
|
1053
|
+
reader = reader.option("versionAsOf", version)
|
|
1054
|
+
elif timestamp is not None:
|
|
1055
|
+
reader = reader.option("timestampAsOf", timestamp)
|
|
1056
|
+
|
|
1057
|
+
df = reader.load(table_path_str)
|
|
1058
|
+
row_count = df.count()
|
|
1059
|
+
else:
|
|
1060
|
+
# delta-rs time travel
|
|
1061
|
+
from deltalake import DeltaTable
|
|
1062
|
+
|
|
1063
|
+
if version is not None:
|
|
1064
|
+
dt = DeltaTable(table_path_str, version=version)
|
|
1065
|
+
else:
|
|
1066
|
+
# delta-rs timestamp query
|
|
1067
|
+
from datetime import datetime
|
|
1068
|
+
|
|
1069
|
+
ts = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
|
|
1070
|
+
dt = DeltaTable(table_path_str, datetime_target=ts)
|
|
1071
|
+
|
|
1072
|
+
arrow_table = dt.to_pyarrow_table()
|
|
1073
|
+
row_count = arrow_table.num_rows
|
|
1074
|
+
|
|
1075
|
+
end_time = time.time()
|
|
1076
|
+
return DataFrameTransactionResult(
|
|
1077
|
+
operation_type=operation,
|
|
1078
|
+
success=True,
|
|
1079
|
+
start_time=start_time,
|
|
1080
|
+
end_time=end_time,
|
|
1081
|
+
duration_ms=(end_time - start_time) * 1000,
|
|
1082
|
+
rows_affected=row_count,
|
|
1083
|
+
version_before=current_version,
|
|
1084
|
+
version_after=current_version, # No change from query
|
|
1085
|
+
metrics={
|
|
1086
|
+
"query_version": version,
|
|
1087
|
+
"query_timestamp": timestamp,
|
|
1088
|
+
"row_count": row_count,
|
|
1089
|
+
},
|
|
1090
|
+
)
|
|
1091
|
+
|
|
1092
|
+
return DataFrameTransactionResult.failure(
|
|
1093
|
+
operation,
|
|
1094
|
+
f"Time travel not implemented for table format: {self._capabilities.table_format}",
|
|
1095
|
+
start_time,
|
|
1096
|
+
)
|
|
1097
|
+
|
|
1098
|
+
except Exception as e:
|
|
1099
|
+
self.logger.error(f"Time travel query failed: {e}")
|
|
1100
|
+
return DataFrameTransactionResult.failure(operation, str(e), start_time)
|
|
1101
|
+
|
|
1102
|
+
def execute_version_compare(
|
|
1103
|
+
self,
|
|
1104
|
+
table_path: Path | str,
|
|
1105
|
+
version1: int,
|
|
1106
|
+
version2: int,
|
|
1107
|
+
) -> DataFrameTransactionResult:
|
|
1108
|
+
"""Compare two versions of a table and return difference metrics.
|
|
1109
|
+
|
|
1110
|
+
Queries both versions and computes the difference in row counts and
|
|
1111
|
+
optionally schema changes.
|
|
1112
|
+
|
|
1113
|
+
Args:
|
|
1114
|
+
table_path: Path to the table
|
|
1115
|
+
version1: First version number (typically older)
|
|
1116
|
+
version2: Second version number (typically newer)
|
|
1117
|
+
|
|
1118
|
+
Returns:
|
|
1119
|
+
DataFrameTransactionResult with comparison metrics
|
|
1120
|
+
"""
|
|
1121
|
+
start_time = time.time()
|
|
1122
|
+
operation = TransactionOperationType.VERSION_COMPARE
|
|
1123
|
+
|
|
1124
|
+
if not self.supports_operation(operation):
|
|
1125
|
+
return DataFrameTransactionResult.failure(
|
|
1126
|
+
operation,
|
|
1127
|
+
f"Version compare not supported on {self.platform_name}. "
|
|
1128
|
+
f"Use Delta Lake or Iceberg for time travel operations.",
|
|
1129
|
+
start_time,
|
|
1130
|
+
)
|
|
1131
|
+
|
|
1132
|
+
is_valid, error_msg = self.validate_table_format(table_path)
|
|
1133
|
+
if not is_valid:
|
|
1134
|
+
return DataFrameTransactionResult.failure(operation, error_msg, start_time)
|
|
1135
|
+
|
|
1136
|
+
table_path_str = str(table_path)
|
|
1137
|
+
current_version = self.get_table_version(table_path)
|
|
1138
|
+
|
|
1139
|
+
try:
|
|
1140
|
+
if self._capabilities.table_format == "delta":
|
|
1141
|
+
if "pyspark" in self.platform_name and self.spark_session:
|
|
1142
|
+
# Query both versions via PySpark
|
|
1143
|
+
df1 = self.spark_session.read.format("delta").option("versionAsOf", version1).load(table_path_str)
|
|
1144
|
+
df2 = self.spark_session.read.format("delta").option("versionAsOf", version2).load(table_path_str)
|
|
1145
|
+
count1 = df1.count()
|
|
1146
|
+
count2 = df2.count()
|
|
1147
|
+
schema1 = [f.name for f in df1.schema.fields]
|
|
1148
|
+
schema2 = [f.name for f in df2.schema.fields]
|
|
1149
|
+
else:
|
|
1150
|
+
# delta-rs
|
|
1151
|
+
from deltalake import DeltaTable
|
|
1152
|
+
|
|
1153
|
+
dt1 = DeltaTable(table_path_str, version=version1)
|
|
1154
|
+
dt2 = DeltaTable(table_path_str, version=version2)
|
|
1155
|
+
|
|
1156
|
+
arrow1 = dt1.to_pyarrow_table()
|
|
1157
|
+
arrow2 = dt2.to_pyarrow_table()
|
|
1158
|
+
|
|
1159
|
+
count1 = arrow1.num_rows
|
|
1160
|
+
count2 = arrow2.num_rows
|
|
1161
|
+
schema1 = arrow1.schema.names
|
|
1162
|
+
schema2 = arrow2.schema.names
|
|
1163
|
+
|
|
1164
|
+
# Compute differences
|
|
1165
|
+
row_diff = count2 - count1
|
|
1166
|
+
schema_added = [c for c in schema2 if c not in schema1]
|
|
1167
|
+
schema_removed = [c for c in schema1 if c not in schema2]
|
|
1168
|
+
|
|
1169
|
+
end_time = time.time()
|
|
1170
|
+
return DataFrameTransactionResult(
|
|
1171
|
+
operation_type=operation,
|
|
1172
|
+
success=True,
|
|
1173
|
+
start_time=start_time,
|
|
1174
|
+
end_time=end_time,
|
|
1175
|
+
duration_ms=(end_time - start_time) * 1000,
|
|
1176
|
+
rows_affected=abs(row_diff),
|
|
1177
|
+
version_before=current_version,
|
|
1178
|
+
version_after=current_version, # No change from comparison
|
|
1179
|
+
metrics={
|
|
1180
|
+
"version1": version1,
|
|
1181
|
+
"version2": version2,
|
|
1182
|
+
"row_count_v1": count1,
|
|
1183
|
+
"row_count_v2": count2,
|
|
1184
|
+
"row_difference": row_diff,
|
|
1185
|
+
"columns_added": schema_added,
|
|
1186
|
+
"columns_removed": schema_removed,
|
|
1187
|
+
"schema_changed": len(schema_added) > 0 or len(schema_removed) > 0,
|
|
1188
|
+
},
|
|
1189
|
+
)
|
|
1190
|
+
|
|
1191
|
+
return DataFrameTransactionResult.failure(
|
|
1192
|
+
operation,
|
|
1193
|
+
f"Version compare not implemented for table format: {self._capabilities.table_format}",
|
|
1194
|
+
start_time,
|
|
1195
|
+
)
|
|
1196
|
+
|
|
1197
|
+
except Exception as e:
|
|
1198
|
+
self.logger.error(f"Version compare failed: {e}")
|
|
1199
|
+
return DataFrameTransactionResult.failure(operation, str(e), start_time)
|
|
1200
|
+
|
|
1201
|
+
|
|
1202
|
+
def get_dataframe_transaction_manager(
|
|
1203
|
+
platform_name: str,
|
|
1204
|
+
spark_session: Any = None,
|
|
1205
|
+
) -> DataFrameTransactionOperationsManager | None:
|
|
1206
|
+
"""Get a DataFrame transaction operations manager for a platform.
|
|
1207
|
+
|
|
1208
|
+
Args:
|
|
1209
|
+
platform_name: Platform name (e.g., "pyspark-df", "delta-lake")
|
|
1210
|
+
spark_session: SparkSession instance (required for pyspark-df)
|
|
1211
|
+
|
|
1212
|
+
Returns:
|
|
1213
|
+
DataFrameTransactionOperationsManager if platform is recognized,
|
|
1214
|
+
None if platform is not a DataFrame platform.
|
|
1215
|
+
"""
|
|
1216
|
+
platform_lower = platform_name.lower()
|
|
1217
|
+
|
|
1218
|
+
# Check if this is a DataFrame platform
|
|
1219
|
+
df_platforms = (
|
|
1220
|
+
"polars-df",
|
|
1221
|
+
"polars",
|
|
1222
|
+
"pandas-df",
|
|
1223
|
+
"pandas",
|
|
1224
|
+
"pyspark-df",
|
|
1225
|
+
"pyspark",
|
|
1226
|
+
"delta-lake",
|
|
1227
|
+
"delta",
|
|
1228
|
+
"iceberg",
|
|
1229
|
+
)
|
|
1230
|
+
if not any(p in platform_lower for p in df_platforms):
|
|
1231
|
+
logger.debug(f"Platform {platform_name} is not a DataFrame platform")
|
|
1232
|
+
return None
|
|
1233
|
+
|
|
1234
|
+
try:
|
|
1235
|
+
return DataFrameTransactionOperationsManager(platform_name, spark_session=spark_session)
|
|
1236
|
+
except Exception as e:
|
|
1237
|
+
logger.warning(f"Failed to create transaction manager for {platform_name}: {e}")
|
|
1238
|
+
return None
|
|
1239
|
+
|
|
1240
|
+
|
|
1241
|
+
def validate_transaction_primitives_platform(platform_name: str) -> tuple[bool, str]:
|
|
1242
|
+
"""Validate that a platform can run Transaction Primitives benchmark.
|
|
1243
|
+
|
|
1244
|
+
This is called during benchmark configuration to provide early feedback
|
|
1245
|
+
when users attempt to run Transaction Primitives on unsupported platforms.
|
|
1246
|
+
|
|
1247
|
+
Args:
|
|
1248
|
+
platform_name: Platform name to validate
|
|
1249
|
+
|
|
1250
|
+
Returns:
|
|
1251
|
+
Tuple of (is_valid, error_message)
|
|
1252
|
+
"""
|
|
1253
|
+
platform_lower = platform_name.lower()
|
|
1254
|
+
|
|
1255
|
+
# Platforms that definitely support transactions
|
|
1256
|
+
supported_patterns = ("delta", "iceberg", "pyspark")
|
|
1257
|
+
|
|
1258
|
+
if any(p in platform_lower for p in supported_patterns):
|
|
1259
|
+
return True, ""
|
|
1260
|
+
|
|
1261
|
+
# Platforms that definitely don't support transactions
|
|
1262
|
+
unsupported_patterns = ("polars", "pandas", "duckdb", "sqlite", "datafusion")
|
|
1263
|
+
|
|
1264
|
+
if any(p in platform_lower for p in unsupported_patterns):
|
|
1265
|
+
return False, (
|
|
1266
|
+
f"Transaction Primitives benchmark requires ACID transaction support.\n"
|
|
1267
|
+
f"Platform '{platform_name}' does not support DataFrame transactions.\n"
|
|
1268
|
+
f"\n"
|
|
1269
|
+
f"Supported platforms:\n"
|
|
1270
|
+
f" - pyspark-df (with Delta Lake table format)\n"
|
|
1271
|
+
f" - delta-lake (standalone Delta Lake)\n"
|
|
1272
|
+
f" - iceberg (Apache Iceberg tables)\n"
|
|
1273
|
+
f"\n"
|
|
1274
|
+
f"Example:\n"
|
|
1275
|
+
f" benchbox run --platform pyspark-df --benchmark transaction_primitives\n"
|
|
1276
|
+
)
|
|
1277
|
+
|
|
1278
|
+
# Unknown platform - allow but warn
|
|
1279
|
+
return True, ""
|
|
1280
|
+
|
|
1281
|
+
|
|
1282
|
+
__all__ = [
|
|
1283
|
+
"TransactionOperationType",
|
|
1284
|
+
"DataFrameTransactionCapabilities",
|
|
1285
|
+
"DataFrameTransactionResult",
|
|
1286
|
+
"DataFrameTransactionOperationsManager",
|
|
1287
|
+
"get_dataframe_transaction_manager",
|
|
1288
|
+
"validate_transaction_primitives_platform",
|
|
1289
|
+
"DELTA_LAKE_TRANSACTION_CAPABILITIES",
|
|
1290
|
+
"PYSPARK_DELTA_TRANSACTION_CAPABILITIES",
|
|
1291
|
+
"ICEBERG_TRANSACTION_CAPABILITIES",
|
|
1292
|
+
"POLARS_TRANSACTION_CAPABILITIES",
|
|
1293
|
+
"PANDAS_TRANSACTION_CAPABILITIES",
|
|
1294
|
+
]
|