benchbox 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (728) hide show
  1. benchbox/__init__.py +1 -1
  2. benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query72.tpl +1 -1
  3. benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/ansi.tpl → templates/query_templates/sqlserver.tpl} +1 -1
  4. benchbox/_binaries/tpc-ds/templates/query_variants/README +6 -0
  5. benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query10.tpl → templates/query_variants/query10a.tpl} +13 -14
  6. benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/query14.tpl → templates/query_variants/query14a.tpl} +30 -26
  7. benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query18.tpl → templates/query_variants/query18a.tpl} +40 -19
  8. benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query22.tpl → templates/query_variants/query22a.tpl} +31 -9
  9. benchbox/_binaries/tpc-ds/{darwin-x86_64/query_templates/query27.tpl → templates/query_variants/query27a.tpl} +23 -10
  10. benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query35.tpl → templates/query_variants/query35a.tpl} +9 -8
  11. benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query36.tpl → templates/query_variants/query36a.tpl} +24 -12
  12. benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query51.tpl → templates/query_variants/query51a.tpl} +37 -20
  13. benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query5.tpl → templates/query_variants/query5a.tpl} +15 -10
  14. benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query67.tpl → templates/query_variants/query67a.tpl} +46 -18
  15. benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query70.tpl → templates/query_variants/query70a.tpl} +31 -27
  16. benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query77.tpl → templates/query_variants/query77a.tpl} +22 -15
  17. benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query80.tpl → templates/query_variants/query80a.tpl} +22 -8
  18. benchbox/_binaries/tpc-ds/{linux-arm64/query_templates/query86.tpl → templates/query_variants/query86a.tpl} +22 -13
  19. benchbox/_binaries/tpc-h/templates/dists.dss +836 -0
  20. benchbox/_binaries/tpc-h/templates/queries/1.sql +28 -0
  21. benchbox/_binaries/tpc-h/templates/queries/10.sql +38 -0
  22. benchbox/_binaries/tpc-h/templates/queries/11.sql +34 -0
  23. benchbox/_binaries/tpc-h/templates/queries/12.sql +35 -0
  24. benchbox/_binaries/tpc-h/templates/queries/13.sql +27 -0
  25. benchbox/_binaries/tpc-h/templates/queries/14.sql +20 -0
  26. benchbox/_binaries/tpc-h/templates/queries/15.sql +40 -0
  27. benchbox/_binaries/tpc-h/templates/queries/16.sql +37 -0
  28. benchbox/_binaries/tpc-h/templates/queries/17.sql +24 -0
  29. benchbox/_binaries/tpc-h/templates/queries/18.sql +39 -0
  30. benchbox/_binaries/tpc-h/templates/queries/19.sql +42 -0
  31. benchbox/_binaries/tpc-h/templates/queries/2.sql +50 -0
  32. benchbox/_binaries/tpc-h/templates/queries/20.sql +44 -0
  33. benchbox/_binaries/tpc-h/templates/queries/21.sql +46 -0
  34. benchbox/_binaries/tpc-h/templates/queries/22.sql +44 -0
  35. benchbox/_binaries/tpc-h/templates/queries/3.sql +29 -0
  36. benchbox/_binaries/tpc-h/templates/queries/4.sql +28 -0
  37. benchbox/_binaries/tpc-h/templates/queries/5.sql +31 -0
  38. benchbox/_binaries/tpc-h/templates/queries/6.sql +16 -0
  39. benchbox/_binaries/tpc-h/templates/queries/7.sql +46 -0
  40. benchbox/_binaries/tpc-h/templates/queries/8.sql +44 -0
  41. benchbox/_binaries/tpc-h/templates/queries/9.sql +39 -0
  42. benchbox/_binaries/tpc-h/templates/variants/12a.sql +27 -0
  43. benchbox/_binaries/tpc-h/templates/variants/13a.sql +30 -0
  44. benchbox/_binaries/tpc-h/templates/variants/14a.sql +18 -0
  45. benchbox/_binaries/tpc-h/templates/variants/15a.sql +39 -0
  46. benchbox/_binaries/tpc-h/templates/variants/8a.sql +77 -0
  47. benchbox/base.py +88 -121
  48. benchbox/cli/benchmarks.py +3 -3
  49. benchbox/cli/commands/calculate_qphh.py +55 -14
  50. benchbox/cli/commands/checks.py +1 -4
  51. benchbox/cli/commands/convert.py +8 -3
  52. benchbox/cli/commands/metrics.py +55 -14
  53. benchbox/cli/commands/results.py +131 -3
  54. benchbox/cli/commands/run.py +157 -22
  55. benchbox/cli/commands/visualize.py +3 -3
  56. benchbox/cli/composite_params.py +1 -1
  57. benchbox/cli/config.py +13 -3
  58. benchbox/cli/database.py +3 -3
  59. benchbox/cli/dryrun.py +30 -4
  60. benchbox/cli/exceptions.py +2 -1
  61. benchbox/cli/execution_pipeline.py +2 -1
  62. benchbox/cli/orchestrator.py +25 -71
  63. benchbox/cli/tuning.py +1 -1
  64. benchbox/core/ai_primitives/benchmark.py +53 -0
  65. benchbox/core/ai_primitives/dataframe_operations.py +1217 -0
  66. benchbox/core/base_benchmark.py +90 -68
  67. benchbox/core/coffeeshop/queries.py +1 -1
  68. benchbox/core/coffeeshop/schema.py +1 -1
  69. benchbox/core/comparison/plotter.py +5 -4
  70. benchbox/core/dataframe/__init__.py +26 -0
  71. benchbox/core/dataframe/benchmark_suite.py +5 -4
  72. benchbox/core/dataframe/context.py +45 -0
  73. benchbox/core/dataframe/data_loader.py +180 -79
  74. benchbox/core/dataframe/maintenance_interface.py +866 -0
  75. benchbox/core/dryrun.py +152 -22
  76. benchbox/core/expected_results/registry.py +22 -5
  77. benchbox/core/manifest/io.py +4 -3
  78. benchbox/core/metadata_primitives/__init__.py +31 -0
  79. benchbox/core/metadata_primitives/benchmark.py +337 -0
  80. benchbox/core/metadata_primitives/dataframe_operations.py +1824 -0
  81. benchbox/core/platform_registry.py +134 -45
  82. benchbox/core/read_primitives/benchmark.py +56 -4
  83. benchbox/core/read_primitives/dataframe_queries.py +6547 -0
  84. benchbox/core/results/__init__.py +47 -6
  85. benchbox/core/results/builder.py +909 -0
  86. benchbox/core/results/database.py +5 -5
  87. benchbox/core/results/exporter.py +58 -96
  88. benchbox/core/results/filenames.py +102 -0
  89. benchbox/core/results/loader.py +10 -9
  90. benchbox/core/results/metrics.py +211 -0
  91. benchbox/core/results/models.py +3 -1
  92. benchbox/core/results/normalizer.py +346 -0
  93. benchbox/core/results/platform_info.py +235 -0
  94. benchbox/core/results/query_normalizer.py +200 -0
  95. benchbox/core/results/schema.py +368 -69
  96. benchbox/core/runner/conversion.py +2 -0
  97. benchbox/core/runner/dataframe_runner.py +135 -131
  98. benchbox/core/runner/runner.py +111 -18
  99. benchbox/core/schemas.py +145 -3
  100. benchbox/core/ssb/generator.py +14 -2
  101. benchbox/core/tpc_compliance.py +4 -4
  102. benchbox/core/tpc_metrics.py +9 -4
  103. benchbox/core/tpcdi/generator/manifest.py +15 -2
  104. benchbox/core/tpcds/benchmark/runner.py +3 -7
  105. benchbox/core/tpcds/c_tools.py +34 -28
  106. benchbox/core/tpcds/dataframe_queries/queries.py +44 -21
  107. benchbox/core/tpcds/generator/filesystem.py +23 -11
  108. benchbox/core/tpcds/generator/manager.py +3 -2
  109. benchbox/core/tpcds/maintenance_test.py +281 -0
  110. benchbox/core/tpcds/power_test.py +21 -11
  111. benchbox/core/tpcds/throughput_test.py +27 -9
  112. benchbox/core/tpcds_obt/etl/transformer.py +24 -5
  113. benchbox/core/tpch/dataframe_queries.py +46 -43
  114. benchbox/core/tpch/generator.py +21 -8
  115. benchbox/core/tpch/maintenance_test.py +87 -0
  116. benchbox/core/tpch/power_test.py +21 -5
  117. benchbox/core/tpch/queries.py +2 -7
  118. benchbox/core/tpch/streams.py +3 -19
  119. benchbox/core/transaction_primitives/benchmark.py +99 -0
  120. benchbox/core/transaction_primitives/dataframe_operations.py +1294 -0
  121. benchbox/core/transaction_primitives/generator.py +11 -4
  122. benchbox/core/visualization/__init__.py +2 -2
  123. benchbox/core/visualization/charts.py +4 -4
  124. benchbox/core/visualization/dependencies.py +1 -12
  125. benchbox/core/visualization/exporters.py +15 -26
  126. benchbox/core/visualization/result_plotter.py +90 -49
  127. benchbox/core/visualization/templates.py +6 -6
  128. benchbox/core/write_primitives/__init__.py +13 -0
  129. benchbox/core/write_primitives/benchmark.py +66 -0
  130. benchbox/core/write_primitives/dataframe_operations.py +912 -0
  131. benchbox/core/write_primitives/generator.py +11 -4
  132. benchbox/mcp/__init__.py +5 -1
  133. benchbox/mcp/errors.py +29 -0
  134. benchbox/mcp/resources/registry.py +12 -7
  135. benchbox/mcp/schemas.py +62 -0
  136. benchbox/mcp/server.py +17 -14
  137. benchbox/mcp/tools/__init__.py +3 -0
  138. benchbox/mcp/tools/analytics.py +550 -582
  139. benchbox/mcp/tools/benchmark.py +603 -611
  140. benchbox/mcp/tools/discovery.py +156 -205
  141. benchbox/mcp/tools/results.py +332 -533
  142. benchbox/mcp/tools/visualization.py +449 -0
  143. benchbox/platforms/__init__.py +740 -622
  144. benchbox/platforms/adapter_factory.py +6 -6
  145. benchbox/platforms/azure_synapse.py +3 -7
  146. benchbox/platforms/base/adapter.py +189 -49
  147. benchbox/platforms/base/cloud_spark/config.py +8 -0
  148. benchbox/platforms/base/cloud_spark/mixins.py +96 -0
  149. benchbox/platforms/base/cloud_spark/session.py +4 -2
  150. benchbox/platforms/base/cloud_spark/staging.py +15 -7
  151. benchbox/platforms/base/data_loading.py +315 -1
  152. benchbox/platforms/base/format_capabilities.py +37 -2
  153. benchbox/platforms/base/utils.py +6 -4
  154. benchbox/platforms/bigquery.py +5 -6
  155. benchbox/platforms/clickhouse_cloud.py +263 -0
  156. benchbox/platforms/databricks/adapter.py +16 -15
  157. benchbox/platforms/databricks/dataframe_adapter.py +4 -1
  158. benchbox/platforms/dataframe/__init__.py +31 -0
  159. benchbox/platforms/dataframe/benchmark_mixin.py +779 -0
  160. benchbox/platforms/dataframe/cudf_df.py +3 -3
  161. benchbox/platforms/dataframe/dask_df.py +3 -3
  162. benchbox/platforms/dataframe/datafusion_df.py +152 -15
  163. benchbox/platforms/dataframe/delta_lake_maintenance.py +341 -0
  164. benchbox/platforms/dataframe/ducklake_maintenance.py +402 -0
  165. benchbox/platforms/dataframe/expression_family.py +47 -8
  166. benchbox/platforms/dataframe/hudi_maintenance.py +437 -0
  167. benchbox/platforms/dataframe/iceberg_maintenance.py +605 -0
  168. benchbox/platforms/dataframe/modin_df.py +3 -3
  169. benchbox/platforms/dataframe/pandas_df.py +3 -3
  170. benchbox/platforms/dataframe/pandas_family.py +59 -8
  171. benchbox/platforms/dataframe/platform_checker.py +16 -49
  172. benchbox/platforms/dataframe/polars_df.py +14 -12
  173. benchbox/platforms/dataframe/polars_maintenance.py +630 -0
  174. benchbox/platforms/dataframe/pyspark_df.py +15 -0
  175. benchbox/platforms/dataframe/pyspark_maintenance.py +613 -0
  176. benchbox/platforms/datafusion.py +5 -6
  177. benchbox/platforms/duckdb.py +2 -1
  178. benchbox/platforms/fabric_warehouse.py +15 -15
  179. benchbox/platforms/firebolt.py +3 -2
  180. benchbox/platforms/influxdb/adapter.py +7 -3
  181. benchbox/platforms/motherduck.py +3 -2
  182. benchbox/platforms/onehouse/__init__.py +39 -0
  183. benchbox/platforms/onehouse/onehouse_client.py +509 -0
  184. benchbox/platforms/onehouse/quanton_adapter.py +646 -0
  185. benchbox/platforms/postgresql.py +5 -9
  186. benchbox/platforms/presto.py +2 -2
  187. benchbox/platforms/pyspark/session.py +3 -3
  188. benchbox/platforms/pyspark/sql_adapter.py +2 -3
  189. benchbox/platforms/redshift.py +7 -7
  190. benchbox/platforms/snowflake.py +4 -4
  191. benchbox/platforms/snowpark_connect.py +2 -1
  192. benchbox/platforms/trino.py +2 -2
  193. benchbox/release/__init__.py +17 -0
  194. benchbox/release/content_validation.py +745 -0
  195. benchbox/release/workflow.py +17 -0
  196. benchbox/utils/VERSION_MANAGEMENT.md +1 -1
  197. benchbox/utils/cloud_storage.py +7 -5
  198. benchbox/utils/compression.py +8 -8
  199. benchbox/utils/compression_mixin.py +2 -1
  200. benchbox/utils/data_validation.py +23 -14
  201. benchbox/utils/dependencies.py +47 -7
  202. benchbox/utils/file_format.py +407 -0
  203. benchbox/utils/format_converters/__init__.py +5 -1
  204. benchbox/utils/format_converters/ducklake_converter.py +227 -0
  205. benchbox/utils/format_converters/vortex_converter.py +168 -0
  206. benchbox/utils/tpc_compilation.py +43 -0
  207. benchbox/utils/version.py +14 -2
  208. {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/METADATA +15 -15
  209. benchbox-0.1.1.dist-info/RECORD +839 -0
  210. {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/WHEEL +1 -1
  211. benchbox/_binaries/tpc-ds/darwin-arm64/query_templates/sqlserver.tpl +0 -37
  212. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/README +0 -4
  213. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/db2.tpl +0 -38
  214. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/netezza.tpl +0 -38
  215. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/oracle.tpl +0 -38
  216. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query1.tpl +0 -62
  217. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query10.tpl +0 -98
  218. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query11.tpl +0 -119
  219. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query12.tpl +0 -72
  220. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query13.tpl +0 -89
  221. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query15.tpl +0 -56
  222. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query16.tpl +0 -76
  223. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query17.tpl +0 -80
  224. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query18.tpl +0 -73
  225. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query19.tpl +0 -64
  226. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query2.tpl +0 -94
  227. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query20.tpl +0 -67
  228. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query21.tpl +0 -65
  229. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query22.tpl +0 -54
  230. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query23.tpl +0 -144
  231. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query24.tpl +0 -147
  232. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query25.tpl +0 -84
  233. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query26.tpl +0 -61
  234. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query28.tpl +0 -90
  235. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query29.tpl +0 -85
  236. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query3.tpl +0 -58
  237. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query30.tpl +0 -66
  238. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query31.tpl +0 -88
  239. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query32.tpl +0 -65
  240. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query33.tpl +0 -113
  241. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query34.tpl +0 -77
  242. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query35.tpl +0 -98
  243. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query36.tpl +0 -74
  244. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query37.tpl +0 -57
  245. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query38.tpl +0 -58
  246. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query39.tpl +0 -93
  247. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query4.tpl +0 -154
  248. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query40.tpl +0 -63
  249. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query41.tpl +0 -90
  250. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query42.tpl +0 -64
  251. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query43.tpl +0 -55
  252. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query44.tpl +0 -72
  253. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query45.tpl +0 -56
  254. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query46.tpl +0 -78
  255. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query47.tpl +0 -89
  256. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query48.tpl +0 -104
  257. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query49.tpl +0 -164
  258. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query5.tpl +0 -165
  259. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query50.tpl +0 -96
  260. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query51.tpl +0 -80
  261. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query52.tpl +0 -59
  262. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query53.tpl +0 -64
  263. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query54.tpl +0 -95
  264. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query55.tpl +0 -52
  265. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query56.tpl +0 -108
  266. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query57.tpl +0 -87
  267. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query58.tpl +0 -101
  268. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query59.tpl +0 -79
  269. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query6.tpl +0 -62
  270. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query60.tpl +0 -115
  271. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query61.tpl +0 -83
  272. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query62.tpl +0 -71
  273. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query63.tpl +0 -64
  274. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query64.tpl +0 -157
  275. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query65.tpl +0 -62
  276. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query66.tpl +0 -261
  277. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query67.tpl +0 -81
  278. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query68.tpl +0 -82
  279. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query69.tpl +0 -85
  280. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query7.tpl +0 -60
  281. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query70.tpl +0 -73
  282. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query71.tpl +0 -74
  283. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query72.tpl +0 -67
  284. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query73.tpl +0 -69
  285. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query74.tpl +0 -99
  286. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query75.tpl +0 -107
  287. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query76.tpl +0 -64
  288. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query77.tpl +0 -145
  289. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query78.tpl +0 -94
  290. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query79.tpl +0 -60
  291. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query8.tpl +0 -144
  292. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query80.tpl +0 -131
  293. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query81.tpl +0 -68
  294. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query82.tpl +0 -56
  295. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query83.tpl +0 -104
  296. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query84.tpl +0 -58
  297. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query85.tpl +0 -121
  298. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query86.tpl +0 -60
  299. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query87.tpl +0 -56
  300. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query88.tpl +0 -128
  301. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query89.tpl +0 -75
  302. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query9.tpl +0 -88
  303. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query90.tpl +0 -58
  304. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query91.tpl +0 -68
  305. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query92.tpl +0 -68
  306. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query93.tpl +0 -53
  307. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query94.tpl +0 -67
  308. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query95.tpl +0 -71
  309. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query96.tpl +0 -52
  310. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query97.tpl +0 -62
  311. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query98.tpl +0 -70
  312. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/query99.tpl +0 -69
  313. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/sqlserver.tpl +0 -37
  314. benchbox/_binaries/tpc-ds/darwin-x86_64/query_templates/templates.lst +0 -99
  315. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/README +0 -4
  316. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/ansi.tpl +0 -38
  317. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/db2.tpl +0 -38
  318. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/netezza.tpl +0 -38
  319. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/oracle.tpl +0 -38
  320. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query1.tpl +0 -62
  321. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query11.tpl +0 -119
  322. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query12.tpl +0 -72
  323. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query13.tpl +0 -89
  324. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query14.tpl +0 -247
  325. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query15.tpl +0 -56
  326. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query16.tpl +0 -76
  327. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query17.tpl +0 -80
  328. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query19.tpl +0 -64
  329. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query2.tpl +0 -94
  330. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query20.tpl +0 -67
  331. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query21.tpl +0 -65
  332. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query23.tpl +0 -144
  333. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query24.tpl +0 -147
  334. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query25.tpl +0 -84
  335. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query26.tpl +0 -61
  336. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query27.tpl +0 -68
  337. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query28.tpl +0 -90
  338. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query29.tpl +0 -85
  339. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query3.tpl +0 -58
  340. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query30.tpl +0 -66
  341. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query31.tpl +0 -88
  342. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query32.tpl +0 -65
  343. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query33.tpl +0 -113
  344. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query34.tpl +0 -77
  345. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query37.tpl +0 -57
  346. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query38.tpl +0 -58
  347. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query39.tpl +0 -93
  348. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query4.tpl +0 -154
  349. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query40.tpl +0 -63
  350. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query41.tpl +0 -90
  351. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query42.tpl +0 -64
  352. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query43.tpl +0 -55
  353. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query44.tpl +0 -72
  354. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query45.tpl +0 -56
  355. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query46.tpl +0 -78
  356. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query47.tpl +0 -89
  357. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query48.tpl +0 -104
  358. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query49.tpl +0 -164
  359. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query50.tpl +0 -96
  360. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query52.tpl +0 -59
  361. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query53.tpl +0 -64
  362. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query54.tpl +0 -95
  363. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query55.tpl +0 -52
  364. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query56.tpl +0 -108
  365. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query57.tpl +0 -87
  366. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query58.tpl +0 -101
  367. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query59.tpl +0 -79
  368. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query6.tpl +0 -62
  369. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query60.tpl +0 -115
  370. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query61.tpl +0 -83
  371. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query62.tpl +0 -71
  372. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query63.tpl +0 -64
  373. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query64.tpl +0 -157
  374. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query65.tpl +0 -62
  375. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query66.tpl +0 -261
  376. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query68.tpl +0 -82
  377. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query69.tpl +0 -85
  378. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query7.tpl +0 -60
  379. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query71.tpl +0 -74
  380. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query72.tpl +0 -67
  381. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query73.tpl +0 -69
  382. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query74.tpl +0 -99
  383. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query75.tpl +0 -107
  384. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query76.tpl +0 -64
  385. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query78.tpl +0 -94
  386. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query79.tpl +0 -60
  387. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query8.tpl +0 -144
  388. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query81.tpl +0 -68
  389. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query82.tpl +0 -56
  390. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query83.tpl +0 -104
  391. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query84.tpl +0 -58
  392. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query85.tpl +0 -121
  393. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query87.tpl +0 -56
  394. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query88.tpl +0 -128
  395. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query89.tpl +0 -75
  396. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query9.tpl +0 -88
  397. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query90.tpl +0 -58
  398. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query91.tpl +0 -68
  399. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query92.tpl +0 -68
  400. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query93.tpl +0 -53
  401. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query94.tpl +0 -67
  402. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query95.tpl +0 -71
  403. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query96.tpl +0 -52
  404. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query97.tpl +0 -62
  405. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query98.tpl +0 -70
  406. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/query99.tpl +0 -69
  407. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/sqlserver.tpl +0 -37
  408. benchbox/_binaries/tpc-ds/linux-arm64/query_templates/templates.lst +0 -99
  409. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/README +0 -4
  410. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/ansi.tpl +0 -38
  411. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/db2.tpl +0 -38
  412. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/netezza.tpl +0 -38
  413. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/oracle.tpl +0 -38
  414. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query1.tpl +0 -62
  415. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query10.tpl +0 -98
  416. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query11.tpl +0 -119
  417. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query12.tpl +0 -72
  418. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query13.tpl +0 -89
  419. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query14.tpl +0 -247
  420. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query15.tpl +0 -56
  421. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query16.tpl +0 -76
  422. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query17.tpl +0 -80
  423. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query18.tpl +0 -73
  424. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query19.tpl +0 -64
  425. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query2.tpl +0 -94
  426. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query20.tpl +0 -67
  427. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query21.tpl +0 -65
  428. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query22.tpl +0 -54
  429. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query23.tpl +0 -144
  430. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query24.tpl +0 -147
  431. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query25.tpl +0 -84
  432. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query26.tpl +0 -61
  433. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query27.tpl +0 -68
  434. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query28.tpl +0 -90
  435. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query29.tpl +0 -85
  436. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query3.tpl +0 -58
  437. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query30.tpl +0 -66
  438. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query31.tpl +0 -88
  439. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query32.tpl +0 -65
  440. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query33.tpl +0 -113
  441. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query34.tpl +0 -77
  442. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query35.tpl +0 -98
  443. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query36.tpl +0 -74
  444. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query37.tpl +0 -57
  445. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query38.tpl +0 -58
  446. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query39.tpl +0 -93
  447. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query4.tpl +0 -154
  448. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query40.tpl +0 -63
  449. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query41.tpl +0 -90
  450. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query42.tpl +0 -64
  451. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query43.tpl +0 -55
  452. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query44.tpl +0 -72
  453. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query45.tpl +0 -56
  454. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query46.tpl +0 -78
  455. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query47.tpl +0 -89
  456. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query48.tpl +0 -104
  457. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query49.tpl +0 -164
  458. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query5.tpl +0 -165
  459. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query50.tpl +0 -96
  460. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query51.tpl +0 -80
  461. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query52.tpl +0 -59
  462. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query53.tpl +0 -64
  463. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query54.tpl +0 -95
  464. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query55.tpl +0 -52
  465. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query56.tpl +0 -108
  466. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query57.tpl +0 -87
  467. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query58.tpl +0 -101
  468. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query59.tpl +0 -79
  469. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query6.tpl +0 -62
  470. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query60.tpl +0 -115
  471. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query61.tpl +0 -83
  472. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query62.tpl +0 -71
  473. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query63.tpl +0 -64
  474. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query64.tpl +0 -157
  475. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query65.tpl +0 -62
  476. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query66.tpl +0 -261
  477. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query67.tpl +0 -81
  478. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query68.tpl +0 -82
  479. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query69.tpl +0 -85
  480. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query7.tpl +0 -60
  481. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query70.tpl +0 -73
  482. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query71.tpl +0 -74
  483. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query72.tpl +0 -67
  484. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query73.tpl +0 -69
  485. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query74.tpl +0 -99
  486. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query75.tpl +0 -107
  487. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query76.tpl +0 -64
  488. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query77.tpl +0 -145
  489. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query78.tpl +0 -94
  490. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query79.tpl +0 -60
  491. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query8.tpl +0 -144
  492. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query80.tpl +0 -131
  493. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query81.tpl +0 -68
  494. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query82.tpl +0 -56
  495. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query83.tpl +0 -104
  496. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query84.tpl +0 -58
  497. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query85.tpl +0 -121
  498. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query86.tpl +0 -60
  499. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query87.tpl +0 -56
  500. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query88.tpl +0 -128
  501. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query89.tpl +0 -75
  502. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query9.tpl +0 -88
  503. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query90.tpl +0 -58
  504. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query91.tpl +0 -68
  505. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query92.tpl +0 -68
  506. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query93.tpl +0 -53
  507. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query94.tpl +0 -67
  508. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query95.tpl +0 -71
  509. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query96.tpl +0 -52
  510. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query97.tpl +0 -62
  511. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query98.tpl +0 -70
  512. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/query99.tpl +0 -69
  513. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/sqlserver.tpl +0 -37
  514. benchbox/_binaries/tpc-ds/linux-x86_64/query_templates/templates.lst +0 -99
  515. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/README +0 -4
  516. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/ansi.tpl +0 -38
  517. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/db2.tpl +0 -38
  518. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/netezza.tpl +0 -38
  519. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/oracle.tpl +0 -38
  520. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query1.tpl +0 -62
  521. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query10.tpl +0 -98
  522. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query11.tpl +0 -119
  523. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query12.tpl +0 -72
  524. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query13.tpl +0 -89
  525. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query14.tpl +0 -247
  526. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query15.tpl +0 -56
  527. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query16.tpl +0 -76
  528. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query17.tpl +0 -80
  529. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query18.tpl +0 -73
  530. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query19.tpl +0 -64
  531. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query2.tpl +0 -94
  532. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query20.tpl +0 -67
  533. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query21.tpl +0 -65
  534. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query22.tpl +0 -54
  535. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query23.tpl +0 -144
  536. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query24.tpl +0 -147
  537. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query25.tpl +0 -84
  538. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query26.tpl +0 -61
  539. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query27.tpl +0 -68
  540. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query28.tpl +0 -90
  541. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query29.tpl +0 -85
  542. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query3.tpl +0 -58
  543. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query30.tpl +0 -66
  544. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query31.tpl +0 -88
  545. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query32.tpl +0 -65
  546. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query33.tpl +0 -113
  547. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query34.tpl +0 -77
  548. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query35.tpl +0 -98
  549. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query36.tpl +0 -74
  550. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query37.tpl +0 -57
  551. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query38.tpl +0 -58
  552. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query39.tpl +0 -93
  553. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query4.tpl +0 -154
  554. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query40.tpl +0 -63
  555. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query41.tpl +0 -90
  556. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query42.tpl +0 -64
  557. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query43.tpl +0 -55
  558. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query44.tpl +0 -72
  559. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query45.tpl +0 -56
  560. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query46.tpl +0 -78
  561. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query47.tpl +0 -89
  562. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query48.tpl +0 -104
  563. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query49.tpl +0 -164
  564. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query5.tpl +0 -165
  565. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query50.tpl +0 -96
  566. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query51.tpl +0 -80
  567. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query52.tpl +0 -59
  568. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query53.tpl +0 -64
  569. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query54.tpl +0 -95
  570. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query55.tpl +0 -52
  571. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query56.tpl +0 -108
  572. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query57.tpl +0 -87
  573. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query58.tpl +0 -101
  574. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query59.tpl +0 -79
  575. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query6.tpl +0 -62
  576. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query60.tpl +0 -115
  577. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query61.tpl +0 -83
  578. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query62.tpl +0 -71
  579. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query63.tpl +0 -64
  580. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query64.tpl +0 -157
  581. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query65.tpl +0 -62
  582. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query66.tpl +0 -261
  583. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query67.tpl +0 -81
  584. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query68.tpl +0 -82
  585. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query69.tpl +0 -85
  586. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query7.tpl +0 -60
  587. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query70.tpl +0 -73
  588. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query71.tpl +0 -74
  589. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query72.tpl +0 -67
  590. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query73.tpl +0 -69
  591. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query74.tpl +0 -99
  592. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query75.tpl +0 -107
  593. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query76.tpl +0 -64
  594. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query77.tpl +0 -145
  595. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query78.tpl +0 -94
  596. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query79.tpl +0 -60
  597. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query8.tpl +0 -144
  598. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query80.tpl +0 -131
  599. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query81.tpl +0 -68
  600. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query82.tpl +0 -56
  601. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query83.tpl +0 -104
  602. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query84.tpl +0 -58
  603. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query85.tpl +0 -121
  604. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query86.tpl +0 -60
  605. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query87.tpl +0 -56
  606. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query88.tpl +0 -128
  607. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query89.tpl +0 -75
  608. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query9.tpl +0 -88
  609. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query90.tpl +0 -58
  610. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query91.tpl +0 -68
  611. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query92.tpl +0 -68
  612. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query93.tpl +0 -53
  613. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query94.tpl +0 -67
  614. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query95.tpl +0 -71
  615. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query96.tpl +0 -52
  616. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query97.tpl +0 -62
  617. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query98.tpl +0 -70
  618. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/query99.tpl +0 -69
  619. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/sqlserver.tpl +0 -37
  620. benchbox/_binaries/tpc-ds/windows-x86_64/query_templates/templates.lst +0 -99
  621. benchbox-0.1.0.dist-info/RECORD +0 -1192
  622. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/README +0 -0
  623. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/ansi.tpl +0 -0
  624. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/db2.tpl +0 -0
  625. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/netezza.tpl +0 -0
  626. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/oracle.tpl +0 -0
  627. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query1.tpl +0 -0
  628. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query10.tpl +0 -0
  629. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query11.tpl +0 -0
  630. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query12.tpl +0 -0
  631. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query13.tpl +0 -0
  632. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query14.tpl +0 -0
  633. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query15.tpl +0 -0
  634. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query16.tpl +0 -0
  635. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query17.tpl +0 -0
  636. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query18.tpl +0 -0
  637. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query19.tpl +0 -0
  638. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query2.tpl +0 -0
  639. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query20.tpl +0 -0
  640. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query21.tpl +0 -0
  641. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query22.tpl +0 -0
  642. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query23.tpl +0 -0
  643. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query24.tpl +0 -0
  644. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query25.tpl +0 -0
  645. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query26.tpl +0 -0
  646. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query27.tpl +0 -0
  647. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query28.tpl +0 -0
  648. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query29.tpl +0 -0
  649. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query3.tpl +0 -0
  650. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query30.tpl +0 -0
  651. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query31.tpl +0 -0
  652. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query32.tpl +0 -0
  653. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query33.tpl +0 -0
  654. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query34.tpl +0 -0
  655. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query35.tpl +0 -0
  656. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query36.tpl +0 -0
  657. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query37.tpl +0 -0
  658. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query38.tpl +0 -0
  659. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query39.tpl +0 -0
  660. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query4.tpl +0 -0
  661. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query40.tpl +0 -0
  662. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query41.tpl +0 -0
  663. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query42.tpl +0 -0
  664. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query43.tpl +0 -0
  665. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query44.tpl +0 -0
  666. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query45.tpl +0 -0
  667. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query46.tpl +0 -0
  668. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query47.tpl +0 -0
  669. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query48.tpl +0 -0
  670. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query49.tpl +0 -0
  671. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query5.tpl +0 -0
  672. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query50.tpl +0 -0
  673. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query51.tpl +0 -0
  674. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query52.tpl +0 -0
  675. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query53.tpl +0 -0
  676. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query54.tpl +0 -0
  677. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query55.tpl +0 -0
  678. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query56.tpl +0 -0
  679. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query57.tpl +0 -0
  680. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query58.tpl +0 -0
  681. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query59.tpl +0 -0
  682. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query6.tpl +0 -0
  683. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query60.tpl +0 -0
  684. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query61.tpl +0 -0
  685. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query62.tpl +0 -0
  686. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query63.tpl +0 -0
  687. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query64.tpl +0 -0
  688. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query65.tpl +0 -0
  689. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query66.tpl +0 -0
  690. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query67.tpl +0 -0
  691. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query68.tpl +0 -0
  692. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query69.tpl +0 -0
  693. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query7.tpl +0 -0
  694. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query70.tpl +0 -0
  695. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query71.tpl +0 -0
  696. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query73.tpl +0 -0
  697. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query74.tpl +0 -0
  698. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query75.tpl +0 -0
  699. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query76.tpl +0 -0
  700. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query77.tpl +0 -0
  701. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query78.tpl +0 -0
  702. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query79.tpl +0 -0
  703. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query8.tpl +0 -0
  704. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query80.tpl +0 -0
  705. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query81.tpl +0 -0
  706. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query82.tpl +0 -0
  707. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query83.tpl +0 -0
  708. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query84.tpl +0 -0
  709. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query85.tpl +0 -0
  710. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query86.tpl +0 -0
  711. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query87.tpl +0 -0
  712. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query88.tpl +0 -0
  713. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query89.tpl +0 -0
  714. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query9.tpl +0 -0
  715. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query90.tpl +0 -0
  716. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query91.tpl +0 -0
  717. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query92.tpl +0 -0
  718. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query93.tpl +0 -0
  719. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query94.tpl +0 -0
  720. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query95.tpl +0 -0
  721. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query96.tpl +0 -0
  722. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query97.tpl +0 -0
  723. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query98.tpl +0 -0
  724. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/query99.tpl +0 -0
  725. /benchbox/_binaries/tpc-ds/{darwin-arm64 → templates}/query_templates/templates.lst +0 -0
  726. {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/entry_points.txt +0 -0
  727. {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/licenses/LICENSE +0 -0
  728. {benchbox-0.1.0.dist-info → benchbox-0.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1824 @@
1
+ """DataFrame operations for Metadata Primitives benchmark.
2
+
3
+ This module provides DataFrame implementations of metadata introspection operations,
4
+ enabling benchmarking of schema discovery, column introspection, and table statistics
5
+ on DataFrame platforms like Polars, PySpark, and Pandas.
6
+
7
+ Platform Support:
8
+ - Polars: Schema introspection (df.schema, df.dtypes, df.describe())
9
+ - Pandas: Schema introspection (df.dtypes, df.info(), df.describe())
10
+ - PySpark: Full catalog support via spark.catalog API + Delta Lake/Iceberg metadata
11
+
12
+ The operations are organized into categories based on capability level:
13
+ - Schema Introspection: Available on all platforms
14
+ - Catalog Operations: PySpark with configured catalog only
15
+ - Lakehouse Metadata: Delta Lake and Iceberg table formats
16
+
17
+ Copyright 2026 Joe Harris / BenchBox Project
18
+
19
+ Licensed under the MIT License. See LICENSE file in the project root for details.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import logging
25
+ import time
26
+ from dataclasses import dataclass, field
27
+ from enum import Enum
28
+ from typing import Any
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class MetadataOperationType(Enum):
34
+ """Types of metadata operations supported by the benchmark.
35
+
36
+ Operations are grouped by platform capability level:
37
+ - Schema introspection: All DataFrame platforms
38
+ - Catalog operations: PySpark with catalog
39
+ - Lakehouse metadata: Delta Lake / Iceberg
40
+ """
41
+
42
+ # Schema introspection (all platforms)
43
+ LIST_COLUMNS = "list_columns"
44
+ GET_DTYPES = "get_dtypes"
45
+ GET_SCHEMA = "get_schema"
46
+ DESCRIBE_STATS = "describe_stats"
47
+ ROW_COUNT = "row_count"
48
+ COLUMN_COUNT = "column_count"
49
+
50
+ # Catalog operations (PySpark with catalog)
51
+ LIST_DATABASES = "list_databases"
52
+ LIST_TABLES = "list_tables"
53
+ LIST_TABLE_COLUMNS = "list_table_columns"
54
+ TABLE_EXISTS = "table_exists"
55
+ GET_TABLE_INFO = "get_table_info"
56
+
57
+ # Lakehouse metadata (Delta Lake / Iceberg)
58
+ TABLE_HISTORY = "table_history"
59
+ TABLE_DETAIL = "table_detail"
60
+ FILE_METADATA = "file_metadata"
61
+ PARTITION_INFO = "partition_info"
62
+ SNAPSHOT_INFO = "snapshot_info"
63
+
64
+ # Complexity testing
65
+ WIDE_TABLE_SCHEMA = "wide_table_schema"
66
+ LARGE_CATALOG_LIST = "large_catalog_list"
67
+ COMPLEX_TYPE_INTROSPECTION = "complex_type_introspection"
68
+
69
+
70
+ class MetadataOperationCategory(Enum):
71
+ """Categories of metadata operations."""
72
+
73
+ SCHEMA = "schema" # Schema introspection (all platforms)
74
+ CATALOG = "catalog" # Catalog operations (PySpark)
75
+ LAKEHOUSE = "lakehouse" # Delta Lake / Iceberg metadata
76
+ COMPLEXITY = "complexity" # Complexity stress testing
77
+
78
+
79
+ # Mapping of operation types to categories
80
+ OPERATION_CATEGORIES: dict[MetadataOperationType, MetadataOperationCategory] = {
81
+ # Schema operations
82
+ MetadataOperationType.LIST_COLUMNS: MetadataOperationCategory.SCHEMA,
83
+ MetadataOperationType.GET_DTYPES: MetadataOperationCategory.SCHEMA,
84
+ MetadataOperationType.GET_SCHEMA: MetadataOperationCategory.SCHEMA,
85
+ MetadataOperationType.DESCRIBE_STATS: MetadataOperationCategory.SCHEMA,
86
+ MetadataOperationType.ROW_COUNT: MetadataOperationCategory.SCHEMA,
87
+ MetadataOperationType.COLUMN_COUNT: MetadataOperationCategory.SCHEMA,
88
+ # Catalog operations
89
+ MetadataOperationType.LIST_DATABASES: MetadataOperationCategory.CATALOG,
90
+ MetadataOperationType.LIST_TABLES: MetadataOperationCategory.CATALOG,
91
+ MetadataOperationType.LIST_TABLE_COLUMNS: MetadataOperationCategory.CATALOG,
92
+ MetadataOperationType.TABLE_EXISTS: MetadataOperationCategory.CATALOG,
93
+ MetadataOperationType.GET_TABLE_INFO: MetadataOperationCategory.CATALOG,
94
+ # Lakehouse operations
95
+ MetadataOperationType.TABLE_HISTORY: MetadataOperationCategory.LAKEHOUSE,
96
+ MetadataOperationType.TABLE_DETAIL: MetadataOperationCategory.LAKEHOUSE,
97
+ MetadataOperationType.FILE_METADATA: MetadataOperationCategory.LAKEHOUSE,
98
+ MetadataOperationType.PARTITION_INFO: MetadataOperationCategory.LAKEHOUSE,
99
+ MetadataOperationType.SNAPSHOT_INFO: MetadataOperationCategory.LAKEHOUSE,
100
+ # Complexity operations
101
+ MetadataOperationType.WIDE_TABLE_SCHEMA: MetadataOperationCategory.COMPLEXITY,
102
+ MetadataOperationType.LARGE_CATALOG_LIST: MetadataOperationCategory.COMPLEXITY,
103
+ MetadataOperationType.COMPLEX_TYPE_INTROSPECTION: MetadataOperationCategory.COMPLEXITY,
104
+ }
105
+
106
+
107
+ @dataclass
108
+ class DataFrameMetadataCapabilities:
109
+ """Platform capabilities for DataFrame metadata operations.
110
+
111
+ Different DataFrame platforms have varying levels of metadata introspection
112
+ support. This dataclass captures what operations each platform can perform.
113
+
114
+ Attributes:
115
+ platform_name: Name of the platform (e.g., "polars-df", "pyspark-df")
116
+ supports_schema_introspection: Can introspect DataFrame schema/dtypes
117
+ supports_describe: Can compute summary statistics
118
+ supports_catalog: Has catalog API (list databases, tables)
119
+ supports_delta_lake: Has Delta Lake metadata capabilities
120
+ supports_iceberg: Has Iceberg metadata capabilities
121
+ supports_partitions: Can introspect partition information
122
+ supports_complex_types: Can introspect nested/complex types
123
+ notes: Platform-specific notes
124
+ """
125
+
126
+ platform_name: str
127
+ supports_schema_introspection: bool = True # All platforms support basic schema
128
+ supports_describe: bool = True # Most platforms support describe
129
+ supports_catalog: bool = False # Only PySpark with catalog
130
+ supports_delta_lake: bool = False # Requires delta-spark or deltalake
131
+ supports_iceberg: bool = False # Requires iceberg-spark or pyiceberg
132
+ supports_partitions: bool = False # PySpark, some lakehouse formats
133
+ supports_complex_types: bool = True # Most platforms handle complex types
134
+ notes: str = ""
135
+
136
+ def supports_operation(self, operation: MetadataOperationType) -> bool:
137
+ """Check if an operation type is supported.
138
+
139
+ Args:
140
+ operation: The operation type to check
141
+
142
+ Returns:
143
+ True if the operation is supported
144
+ """
145
+ # Schema operations - all platforms
146
+ if operation in (
147
+ MetadataOperationType.LIST_COLUMNS,
148
+ MetadataOperationType.GET_DTYPES,
149
+ MetadataOperationType.GET_SCHEMA,
150
+ MetadataOperationType.ROW_COUNT,
151
+ MetadataOperationType.COLUMN_COUNT,
152
+ ):
153
+ return self.supports_schema_introspection
154
+
155
+ if operation == MetadataOperationType.DESCRIBE_STATS:
156
+ return self.supports_describe
157
+
158
+ # Catalog operations - PySpark with catalog
159
+ if operation in (
160
+ MetadataOperationType.LIST_DATABASES,
161
+ MetadataOperationType.LIST_TABLES,
162
+ MetadataOperationType.LIST_TABLE_COLUMNS,
163
+ MetadataOperationType.TABLE_EXISTS,
164
+ MetadataOperationType.GET_TABLE_INFO,
165
+ ):
166
+ return self.supports_catalog
167
+
168
+ # Lakehouse operations - Delta Lake / Iceberg
169
+ if operation in (
170
+ MetadataOperationType.TABLE_HISTORY,
171
+ MetadataOperationType.TABLE_DETAIL,
172
+ ):
173
+ return self.supports_delta_lake
174
+
175
+ if operation == MetadataOperationType.SNAPSHOT_INFO:
176
+ return self.supports_iceberg
177
+
178
+ if operation == MetadataOperationType.FILE_METADATA:
179
+ return self.supports_delta_lake or self.supports_iceberg
180
+
181
+ if operation == MetadataOperationType.PARTITION_INFO:
182
+ return self.supports_partitions
183
+
184
+ # Complexity operations
185
+ if operation == MetadataOperationType.WIDE_TABLE_SCHEMA:
186
+ return self.supports_schema_introspection
187
+
188
+ if operation == MetadataOperationType.LARGE_CATALOG_LIST:
189
+ return self.supports_catalog
190
+
191
+ if operation == MetadataOperationType.COMPLEX_TYPE_INTROSPECTION:
192
+ return self.supports_complex_types
193
+
194
+ return False
195
+
196
+ def get_supported_operations(self) -> list[MetadataOperationType]:
197
+ """Get list of operations supported by this platform.
198
+
199
+ Returns:
200
+ List of supported MetadataOperationType values
201
+ """
202
+ return [op for op in MetadataOperationType if self.supports_operation(op)]
203
+
204
+ def get_unsupported_operations(self) -> list[MetadataOperationType]:
205
+ """Get list of operations not supported by this platform.
206
+
207
+ Returns:
208
+ List of unsupported MetadataOperationType values
209
+ """
210
+ return [op for op in MetadataOperationType if not self.supports_operation(op)]
211
+
212
+ def get_supported_categories(self) -> list[MetadataOperationCategory]:
213
+ """Get list of operation categories supported by this platform.
214
+
215
+ Returns:
216
+ List of supported MetadataOperationCategory values
217
+ """
218
+ categories = set()
219
+ for op in self.get_supported_operations():
220
+ if op in OPERATION_CATEGORIES:
221
+ categories.add(OPERATION_CATEGORIES[op])
222
+ return sorted(categories, key=lambda c: c.value)
223
+
224
+
225
+ # Pre-defined capabilities for common DataFrame platforms
226
+ POLARS_METADATA_CAPABILITIES = DataFrameMetadataCapabilities(
227
+ platform_name="polars-df",
228
+ supports_schema_introspection=True,
229
+ supports_describe=True,
230
+ supports_catalog=False,
231
+ supports_delta_lake=False, # Can read Delta via polars, limited metadata
232
+ supports_iceberg=False,
233
+ supports_partitions=False,
234
+ supports_complex_types=True,
235
+ notes="Schema introspection via df.schema, df.dtypes. No catalog support.",
236
+ )
237
+
238
+ PANDAS_METADATA_CAPABILITIES = DataFrameMetadataCapabilities(
239
+ platform_name="pandas-df",
240
+ supports_schema_introspection=True,
241
+ supports_describe=True,
242
+ supports_catalog=False,
243
+ supports_delta_lake=False,
244
+ supports_iceberg=False,
245
+ supports_partitions=False,
246
+ supports_complex_types=False, # Pandas has limited nested type support
247
+ notes="Schema introspection via df.dtypes, df.info(). No catalog support.",
248
+ )
249
+
250
+ PYSPARK_METADATA_CAPABILITIES = DataFrameMetadataCapabilities(
251
+ platform_name="pyspark-df",
252
+ supports_schema_introspection=True,
253
+ supports_describe=True,
254
+ supports_catalog=True, # spark.catalog API
255
+ supports_delta_lake=False, # Set at runtime based on available packages
256
+ supports_iceberg=False, # Set at runtime based on available packages
257
+ supports_partitions=True,
258
+ supports_complex_types=True,
259
+ notes="Full catalog support via spark.catalog. Delta Lake/Iceberg require packages.",
260
+ )
261
+
262
+ DATAFUSION_METADATA_CAPABILITIES = DataFrameMetadataCapabilities(
263
+ platform_name="datafusion-df",
264
+ supports_schema_introspection=True,
265
+ supports_describe=True,
266
+ supports_catalog=False, # Limited catalog in standalone mode
267
+ supports_delta_lake=False,
268
+ supports_iceberg=False,
269
+ supports_partitions=False,
270
+ supports_complex_types=True,
271
+ notes="Schema introspection via DataFrame schema. Limited catalog support.",
272
+ )
273
+
274
+
275
+ def get_platform_capabilities(platform_name: str, **kwargs: Any) -> DataFrameMetadataCapabilities:
276
+ """Get metadata capabilities for a platform.
277
+
278
+ Args:
279
+ platform_name: Platform name (e.g., "polars-df", "pyspark-df")
280
+ **kwargs: Optional overrides (e.g., supports_delta_lake=True)
281
+
282
+ Returns:
283
+ DataFrameMetadataCapabilities for the platform
284
+ """
285
+ platform_lower = platform_name.lower()
286
+
287
+ # Select base capabilities
288
+ if "polars" in platform_lower:
289
+ base = POLARS_METADATA_CAPABILITIES
290
+ elif "pandas" in platform_lower:
291
+ base = PANDAS_METADATA_CAPABILITIES
292
+ elif "pyspark" in platform_lower or "spark" in platform_lower:
293
+ base = PYSPARK_METADATA_CAPABILITIES
294
+ elif "datafusion" in platform_lower:
295
+ base = DATAFUSION_METADATA_CAPABILITIES
296
+ else:
297
+ # Generic capabilities
298
+ base = DataFrameMetadataCapabilities(
299
+ platform_name=platform_name,
300
+ supports_schema_introspection=True,
301
+ supports_describe=True,
302
+ notes="Unknown platform - basic schema introspection only.",
303
+ )
304
+
305
+ # Apply overrides if any
306
+ if kwargs:
307
+ return DataFrameMetadataCapabilities(
308
+ platform_name=platform_name,
309
+ supports_schema_introspection=kwargs.get(
310
+ "supports_schema_introspection", base.supports_schema_introspection
311
+ ),
312
+ supports_describe=kwargs.get("supports_describe", base.supports_describe),
313
+ supports_catalog=kwargs.get("supports_catalog", base.supports_catalog),
314
+ supports_delta_lake=kwargs.get("supports_delta_lake", base.supports_delta_lake),
315
+ supports_iceberg=kwargs.get("supports_iceberg", base.supports_iceberg),
316
+ supports_partitions=kwargs.get("supports_partitions", base.supports_partitions),
317
+ supports_complex_types=kwargs.get("supports_complex_types", base.supports_complex_types),
318
+ notes=kwargs.get("notes", base.notes),
319
+ )
320
+
321
+ return base
322
+
323
+
324
+ @dataclass
325
+ class DataFrameMetadataResult:
326
+ """Result of a DataFrame metadata operation.
327
+
328
+ Attributes:
329
+ operation_type: Type of metadata operation
330
+ success: Whether the operation completed successfully
331
+ start_time: Operation start timestamp (Unix time)
332
+ end_time: Operation end timestamp (Unix time)
333
+ duration_ms: Operation duration in milliseconds
334
+ result_count: Number of items returned (columns, tables, etc.)
335
+ result_data: The actual metadata result (schema dict, column list, etc.)
336
+ error_message: Error description if operation failed
337
+ metrics: Additional platform-specific metrics
338
+ """
339
+
340
+ operation_type: MetadataOperationType
341
+ success: bool
342
+ start_time: float
343
+ end_time: float
344
+ duration_ms: float
345
+ result_count: int = 0
346
+ result_data: Any = None
347
+ error_message: str | None = None
348
+ metrics: dict[str, Any] = field(default_factory=dict)
349
+
350
+ @classmethod
351
+ def success_result(
352
+ cls,
353
+ operation_type: MetadataOperationType,
354
+ start_time: float,
355
+ result_count: int,
356
+ result_data: Any = None,
357
+ metrics: dict[str, Any] | None = None,
358
+ ) -> DataFrameMetadataResult:
359
+ """Create a successful result.
360
+
361
+ Args:
362
+ operation_type: The operation that completed
363
+ start_time: When the operation started
364
+ result_count: Number of items in the result
365
+ result_data: The actual result data
366
+ metrics: Additional metrics
367
+
368
+ Returns:
369
+ DataFrameMetadataResult indicating success
370
+ """
371
+ end_time = time.time()
372
+ return cls(
373
+ operation_type=operation_type,
374
+ success=True,
375
+ start_time=start_time,
376
+ end_time=end_time,
377
+ duration_ms=(end_time - start_time) * 1000,
378
+ result_count=result_count,
379
+ result_data=result_data,
380
+ metrics=metrics or {},
381
+ )
382
+
383
+ @classmethod
384
+ def failure_result(
385
+ cls,
386
+ operation_type: MetadataOperationType,
387
+ error_message: str,
388
+ start_time: float | None = None,
389
+ ) -> DataFrameMetadataResult:
390
+ """Create a failure result.
391
+
392
+ Args:
393
+ operation_type: The operation that failed
394
+ error_message: Description of the failure
395
+ start_time: Optional start time (defaults to now)
396
+
397
+ Returns:
398
+ DataFrameMetadataResult indicating failure
399
+ """
400
+ now = time.time()
401
+ return cls(
402
+ operation_type=operation_type,
403
+ success=False,
404
+ start_time=start_time or now,
405
+ end_time=now,
406
+ duration_ms=0.0 if start_time is None else (now - start_time) * 1000,
407
+ result_count=0,
408
+ error_message=error_message,
409
+ )
410
+
411
+
412
+ class UnsupportedOperationError(Exception):
413
+ """Raised when a metadata operation is not supported on the current platform.
414
+
415
+ Provides a helpful error message with alternatives.
416
+ """
417
+
418
+ def __init__(
419
+ self,
420
+ operation: MetadataOperationType,
421
+ platform_name: str,
422
+ suggestion: str | None = None,
423
+ ):
424
+ self.operation = operation
425
+ self.platform_name = platform_name
426
+ self.suggestion = suggestion
427
+
428
+ message = f"{platform_name} does not support {operation.value} operations."
429
+ if suggestion:
430
+ message += f"\n{suggestion}"
431
+
432
+ super().__init__(message)
433
+
434
+
435
+ def get_unsupported_message(operation: MetadataOperationType, platform_name: str) -> str:
436
+ """Get helpful error message for unsupported operations.
437
+
438
+ Args:
439
+ operation: The unsupported operation
440
+ platform_name: The platform name
441
+
442
+ Returns:
443
+ Helpful error message with alternatives
444
+ """
445
+ category = OPERATION_CATEGORIES.get(operation, MetadataOperationCategory.SCHEMA)
446
+
447
+ if category == MetadataOperationCategory.CATALOG:
448
+ return (
449
+ f"{platform_name} does not support catalog operations ({operation.value}).\n"
450
+ f"Catalog operations require PySpark with a configured catalog.\n"
451
+ f"Alternatives:\n"
452
+ f" - Use schema introspection operations (df.schema, df.dtypes)\n"
453
+ f" - Switch to pyspark-df platform with Hive metastore or Unity Catalog"
454
+ )
455
+
456
+ if category == MetadataOperationCategory.LAKEHOUSE:
457
+ if "delta" in operation.value.lower() or operation in (
458
+ MetadataOperationType.TABLE_HISTORY,
459
+ MetadataOperationType.TABLE_DETAIL,
460
+ ):
461
+ return (
462
+ f"{platform_name} does not support Delta Lake metadata operations ({operation.value}).\n"
463
+ f"Delta Lake operations require:\n"
464
+ f" - pyspark-df with delta-spark package, or\n"
465
+ f" - polars with delta support (read-only)"
466
+ )
467
+ if "iceberg" in operation.value.lower() or operation == MetadataOperationType.SNAPSHOT_INFO:
468
+ return (
469
+ f"{platform_name} does not support Iceberg metadata operations ({operation.value}).\n"
470
+ f"Iceberg operations require:\n"
471
+ f" - pyspark-df with iceberg-spark package, or\n"
472
+ f" - pyiceberg library"
473
+ )
474
+ return (
475
+ f"{platform_name} does not support lakehouse metadata operations ({operation.value}).\n"
476
+ f"Use pyspark-df with Delta Lake or Iceberg table format."
477
+ )
478
+
479
+ return f"{platform_name} does not support {operation.value} operations."
480
+
481
+
482
+ class DataFrameMetadataOperationsManager:
483
+ """Manager for DataFrame metadata introspection operations.
484
+
485
+ Provides a unified interface for metadata operations across DataFrame platforms.
486
+ Handles platform capability detection and provides helpful error messages for
487
+ unsupported operations.
488
+
489
+ Example:
490
+ manager = DataFrameMetadataOperationsManager("polars-df")
491
+
492
+ # Check capabilities
493
+ if manager.supports_operation(MetadataOperationType.GET_SCHEMA):
494
+ result = manager.execute_get_schema(df)
495
+
496
+ # Get all supported operations
497
+ ops = manager.get_supported_operations()
498
+ """
499
+
500
+ def __init__(
501
+ self,
502
+ platform_name: str,
503
+ spark_session: Any = None,
504
+ delta_available: bool | None = None,
505
+ iceberg_available: bool | None = None,
506
+ ) -> None:
507
+ """Initialize the metadata operations manager.
508
+
509
+ Args:
510
+ platform_name: Platform name (e.g., "polars-df", "pyspark-df")
511
+ spark_session: SparkSession instance (required for pyspark-df catalog ops)
512
+ delta_available: Override for Delta Lake availability detection
513
+ iceberg_available: Override for Iceberg availability detection
514
+
515
+ Raises:
516
+ ValueError: If platform is not supported for DataFrame operations
517
+ """
518
+ self.platform_name = platform_name.lower()
519
+ self.spark_session = spark_session
520
+ self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
521
+
522
+ # Build capabilities with optional overrides
523
+ overrides = {}
524
+
525
+ if "pyspark" in self.platform_name or "spark" in self.platform_name:
526
+ # Detect Delta Lake / Iceberg availability
527
+ if delta_available is None:
528
+ delta_available = self._detect_delta_lake()
529
+ if iceberg_available is None:
530
+ iceberg_available = self._detect_iceberg()
531
+
532
+ overrides["supports_delta_lake"] = delta_available
533
+ overrides["supports_iceberg"] = iceberg_available
534
+
535
+ self._capabilities = get_platform_capabilities(self.platform_name, **overrides)
536
+
537
+ def _detect_delta_lake(self) -> bool:
538
+ """Detect if Delta Lake is available.
539
+
540
+ Returns:
541
+ True if delta-spark or deltalake is available
542
+ """
543
+ try:
544
+ import delta # noqa: F401
545
+
546
+ return True
547
+ except ImportError:
548
+ pass
549
+
550
+ try:
551
+ import deltalake # noqa: F401
552
+
553
+ return True
554
+ except ImportError:
555
+ pass
556
+
557
+ return False
558
+
559
+ def _detect_iceberg(self) -> bool:
560
+ """Detect if Iceberg is available.
561
+
562
+ Returns:
563
+ True if iceberg-spark or pyiceberg is available
564
+ """
565
+ try:
566
+ import pyiceberg # noqa: F401
567
+
568
+ return True
569
+ except ImportError:
570
+ pass
571
+
572
+ # Check for iceberg-spark via SparkSession
573
+ if self.spark_session is not None:
574
+ try:
575
+ # Check if iceberg catalog is configured
576
+ catalogs = self.spark_session.conf.get("spark.sql.catalog", "")
577
+ if "iceberg" in catalogs.lower():
578
+ return True
579
+ except Exception:
580
+ pass
581
+
582
+ return False
583
+
584
+ def get_capabilities(self) -> DataFrameMetadataCapabilities:
585
+ """Get platform metadata capabilities.
586
+
587
+ Returns:
588
+ DataFrameMetadataCapabilities for this platform
589
+ """
590
+ return self._capabilities
591
+
592
+ def supports_operation(self, operation: MetadataOperationType) -> bool:
593
+ """Check if an operation type is supported.
594
+
595
+ Args:
596
+ operation: The operation to check
597
+
598
+ Returns:
599
+ True if supported
600
+ """
601
+ return self._capabilities.supports_operation(operation)
602
+
603
+ def get_supported_operations(self) -> list[MetadataOperationType]:
604
+ """Get list of supported operations.
605
+
606
+ Returns:
607
+ List of supported MetadataOperationType values
608
+ """
609
+ return self._capabilities.get_supported_operations()
610
+
611
+ def validate_operation(self, operation: MetadataOperationType) -> None:
612
+ """Validate that an operation is supported.
613
+
614
+ Args:
615
+ operation: The operation to validate
616
+
617
+ Raises:
618
+ UnsupportedOperationError: If operation is not supported
619
+ """
620
+ if not self.supports_operation(operation):
621
+ raise UnsupportedOperationError(
622
+ operation=operation,
623
+ platform_name=self.platform_name,
624
+ suggestion=get_unsupported_message(operation, self.platform_name),
625
+ )
626
+
627
+ # =========================================================================
628
+ # Schema Introspection Operations (All Platforms)
629
+ # =========================================================================
630
+
631
+ def execute_list_columns(self, dataframe: Any) -> DataFrameMetadataResult:
632
+ """List column names from a DataFrame.
633
+
634
+ Args:
635
+ dataframe: The DataFrame to introspect
636
+
637
+ Returns:
638
+ DataFrameMetadataResult with column names
639
+ """
640
+ start_time = time.time()
641
+
642
+ try:
643
+ self.validate_operation(MetadataOperationType.LIST_COLUMNS)
644
+
645
+ if "polars" in self.platform_name:
646
+ columns = dataframe.columns
647
+ elif "pandas" in self.platform_name:
648
+ columns = list(dataframe.columns)
649
+ elif "pyspark" in self.platform_name or "spark" in self.platform_name:
650
+ columns = dataframe.columns
651
+ elif "datafusion" in self.platform_name:
652
+ columns = [field.name for field in dataframe.schema()]
653
+ else:
654
+ # Generic fallback
655
+ columns = list(getattr(dataframe, "columns", []))
656
+
657
+ return DataFrameMetadataResult.success_result(
658
+ operation_type=MetadataOperationType.LIST_COLUMNS,
659
+ start_time=start_time,
660
+ result_count=len(columns),
661
+ result_data=columns,
662
+ )
663
+
664
+ except UnsupportedOperationError:
665
+ raise
666
+ except Exception as e:
667
+ self.logger.error(f"LIST_COLUMNS failed: {e}")
668
+ return DataFrameMetadataResult.failure_result(
669
+ MetadataOperationType.LIST_COLUMNS,
670
+ str(e),
671
+ start_time,
672
+ )
673
+
674
+ def execute_get_dtypes(self, dataframe: Any) -> DataFrameMetadataResult:
675
+ """Get data types for all columns.
676
+
677
+ Args:
678
+ dataframe: The DataFrame to introspect
679
+
680
+ Returns:
681
+ DataFrameMetadataResult with column name to dtype mapping
682
+ """
683
+ start_time = time.time()
684
+
685
+ try:
686
+ self.validate_operation(MetadataOperationType.GET_DTYPES)
687
+
688
+ if "polars" in self.platform_name:
689
+ dtypes = {col: str(dtype) for col, dtype in zip(dataframe.columns, dataframe.dtypes)}
690
+ elif "pandas" in self.platform_name:
691
+ dtypes = {col: str(dtype) for col, dtype in dataframe.dtypes.items()}
692
+ elif "pyspark" in self.platform_name or "spark" in self.platform_name:
693
+ dtypes = {field.name: str(field.dataType) for field in dataframe.schema.fields}
694
+ elif "datafusion" in self.platform_name:
695
+ dtypes = {field.name: str(field.type) for field in dataframe.schema()}
696
+ else:
697
+ # Generic fallback
698
+ dtypes = {}
699
+ if hasattr(dataframe, "dtypes"):
700
+ dtypes = dict(dataframe.dtypes)
701
+
702
+ return DataFrameMetadataResult.success_result(
703
+ operation_type=MetadataOperationType.GET_DTYPES,
704
+ start_time=start_time,
705
+ result_count=len(dtypes),
706
+ result_data=dtypes,
707
+ )
708
+
709
+ except UnsupportedOperationError:
710
+ raise
711
+ except Exception as e:
712
+ self.logger.error(f"GET_DTYPES failed: {e}")
713
+ return DataFrameMetadataResult.failure_result(
714
+ MetadataOperationType.GET_DTYPES,
715
+ str(e),
716
+ start_time,
717
+ )
718
+
719
+ def execute_get_schema(self, dataframe: Any) -> DataFrameMetadataResult:
720
+ """Get full schema information for a DataFrame.
721
+
722
+ Args:
723
+ dataframe: The DataFrame to introspect
724
+
725
+ Returns:
726
+ DataFrameMetadataResult with schema dict (name, dtype, nullable)
727
+ """
728
+ start_time = time.time()
729
+
730
+ try:
731
+ self.validate_operation(MetadataOperationType.GET_SCHEMA)
732
+
733
+ schema_info: list[dict[str, Any]] = []
734
+
735
+ if "polars" in self.platform_name:
736
+ for col, dtype in zip(dataframe.columns, dataframe.dtypes):
737
+ schema_info.append(
738
+ {
739
+ "name": col,
740
+ "dtype": str(dtype),
741
+ "nullable": True, # Polars columns are nullable by default
742
+ }
743
+ )
744
+ elif "pandas" in self.platform_name:
745
+ for col, dtype in dataframe.dtypes.items():
746
+ schema_info.append(
747
+ {
748
+ "name": col,
749
+ "dtype": str(dtype),
750
+ "nullable": dataframe[col].isna().any(),
751
+ }
752
+ )
753
+ elif "pyspark" in self.platform_name or "spark" in self.platform_name:
754
+ for field in dataframe.schema.fields:
755
+ schema_info.append(
756
+ {
757
+ "name": field.name,
758
+ "dtype": str(field.dataType),
759
+ "nullable": field.nullable,
760
+ }
761
+ )
762
+ elif "datafusion" in self.platform_name:
763
+ for field in dataframe.schema():
764
+ schema_info.append(
765
+ {
766
+ "name": field.name,
767
+ "dtype": str(field.type),
768
+ "nullable": field.is_nullable,
769
+ }
770
+ )
771
+ else:
772
+ # Generic fallback
773
+ if hasattr(dataframe, "columns") and hasattr(dataframe, "dtypes"):
774
+ for col, dtype in zip(dataframe.columns, dataframe.dtypes):
775
+ schema_info.append({"name": col, "dtype": str(dtype), "nullable": True})
776
+
777
+ return DataFrameMetadataResult.success_result(
778
+ operation_type=MetadataOperationType.GET_SCHEMA,
779
+ start_time=start_time,
780
+ result_count=len(schema_info),
781
+ result_data=schema_info,
782
+ )
783
+
784
+ except UnsupportedOperationError:
785
+ raise
786
+ except Exception as e:
787
+ self.logger.error(f"GET_SCHEMA failed: {e}")
788
+ return DataFrameMetadataResult.failure_result(
789
+ MetadataOperationType.GET_SCHEMA,
790
+ str(e),
791
+ start_time,
792
+ )
793
+
794
+ def execute_describe_stats(self, dataframe: Any) -> DataFrameMetadataResult:
795
+ """Get summary statistics for a DataFrame.
796
+
797
+ Args:
798
+ dataframe: The DataFrame to introspect
799
+
800
+ Returns:
801
+ DataFrameMetadataResult with statistics DataFrame/dict
802
+ """
803
+ start_time = time.time()
804
+
805
+ try:
806
+ self.validate_operation(MetadataOperationType.DESCRIBE_STATS)
807
+
808
+ if "polars" in self.platform_name:
809
+ stats_df = dataframe.describe()
810
+ result_data = stats_df.to_dicts()
811
+ result_count = stats_df.height
812
+ elif "pandas" in self.platform_name:
813
+ stats_df = dataframe.describe()
814
+ result_data = stats_df.to_dict()
815
+ result_count = len(stats_df)
816
+ elif "pyspark" in self.platform_name or "spark" in self.platform_name:
817
+ stats_df = dataframe.describe()
818
+ result_data = stats_df.collect()
819
+ result_count = stats_df.count()
820
+ elif "datafusion" in self.platform_name:
821
+ # DataFusion doesn't have describe() yet
822
+ result_data = None
823
+ result_count = 0
824
+ else:
825
+ result_data = None
826
+ result_count = 0
827
+
828
+ return DataFrameMetadataResult.success_result(
829
+ operation_type=MetadataOperationType.DESCRIBE_STATS,
830
+ start_time=start_time,
831
+ result_count=result_count,
832
+ result_data=result_data,
833
+ )
834
+
835
+ except UnsupportedOperationError:
836
+ raise
837
+ except Exception as e:
838
+ self.logger.error(f"DESCRIBE_STATS failed: {e}")
839
+ return DataFrameMetadataResult.failure_result(
840
+ MetadataOperationType.DESCRIBE_STATS,
841
+ str(e),
842
+ start_time,
843
+ )
844
+
845
+ def execute_row_count(self, dataframe: Any) -> DataFrameMetadataResult:
846
+ """Get row count for a DataFrame.
847
+
848
+ Note: This may require full scan on lazy/distributed platforms.
849
+
850
+ Args:
851
+ dataframe: The DataFrame to count
852
+
853
+ Returns:
854
+ DataFrameMetadataResult with row count
855
+ """
856
+ start_time = time.time()
857
+
858
+ try:
859
+ self.validate_operation(MetadataOperationType.ROW_COUNT)
860
+
861
+ if "polars" in self.platform_name:
862
+ count = dataframe.height
863
+ elif "pandas" in self.platform_name:
864
+ count = len(dataframe)
865
+ elif "pyspark" in self.platform_name or "spark" in self.platform_name or "datafusion" in self.platform_name:
866
+ count = dataframe.count()
867
+ else:
868
+ count = len(dataframe) if hasattr(dataframe, "__len__") else 0
869
+
870
+ return DataFrameMetadataResult.success_result(
871
+ operation_type=MetadataOperationType.ROW_COUNT,
872
+ start_time=start_time,
873
+ result_count=1,
874
+ result_data=count,
875
+ metrics={"row_count": count},
876
+ )
877
+
878
+ except UnsupportedOperationError:
879
+ raise
880
+ except Exception as e:
881
+ self.logger.error(f"ROW_COUNT failed: {e}")
882
+ return DataFrameMetadataResult.failure_result(
883
+ MetadataOperationType.ROW_COUNT,
884
+ str(e),
885
+ start_time,
886
+ )
887
+
888
+ def execute_column_count(self, dataframe: Any) -> DataFrameMetadataResult:
889
+ """Get column count for a DataFrame.
890
+
891
+ Args:
892
+ dataframe: The DataFrame to count
893
+
894
+ Returns:
895
+ DataFrameMetadataResult with column count
896
+ """
897
+ start_time = time.time()
898
+
899
+ try:
900
+ self.validate_operation(MetadataOperationType.COLUMN_COUNT)
901
+
902
+ if "polars" in self.platform_name:
903
+ count = dataframe.width
904
+ elif "pandas" in self.platform_name or "pyspark" in self.platform_name or "spark" in self.platform_name:
905
+ count = len(dataframe.columns)
906
+ elif "datafusion" in self.platform_name:
907
+ count = len(dataframe.schema())
908
+ else:
909
+ count = len(getattr(dataframe, "columns", []))
910
+
911
+ return DataFrameMetadataResult.success_result(
912
+ operation_type=MetadataOperationType.COLUMN_COUNT,
913
+ start_time=start_time,
914
+ result_count=1,
915
+ result_data=count,
916
+ metrics={"column_count": count},
917
+ )
918
+
919
+ except UnsupportedOperationError:
920
+ raise
921
+ except Exception as e:
922
+ self.logger.error(f"COLUMN_COUNT failed: {e}")
923
+ return DataFrameMetadataResult.failure_result(
924
+ MetadataOperationType.COLUMN_COUNT,
925
+ str(e),
926
+ start_time,
927
+ )
928
+
929
+ # =========================================================================
930
+ # Catalog Operations (PySpark with Catalog)
931
+ # =========================================================================
932
+
933
+ def execute_list_databases(self) -> DataFrameMetadataResult:
934
+ """List all databases in the catalog.
935
+
936
+ Requires PySpark with configured catalog.
937
+
938
+ Returns:
939
+ DataFrameMetadataResult with database names
940
+ """
941
+ start_time = time.time()
942
+
943
+ try:
944
+ self.validate_operation(MetadataOperationType.LIST_DATABASES)
945
+
946
+ if self.spark_session is None:
947
+ return DataFrameMetadataResult.failure_result(
948
+ MetadataOperationType.LIST_DATABASES,
949
+ "SparkSession is required for catalog operations. "
950
+ "Pass spark_session to DataFrameMetadataOperationsManager.",
951
+ start_time,
952
+ )
953
+
954
+ databases = self.spark_session.catalog.listDatabases()
955
+ db_names = [db.name for db in databases]
956
+
957
+ return DataFrameMetadataResult.success_result(
958
+ operation_type=MetadataOperationType.LIST_DATABASES,
959
+ start_time=start_time,
960
+ result_count=len(db_names),
961
+ result_data=db_names,
962
+ )
963
+
964
+ except UnsupportedOperationError:
965
+ raise
966
+ except Exception as e:
967
+ self.logger.error(f"LIST_DATABASES failed: {e}")
968
+ return DataFrameMetadataResult.failure_result(
969
+ MetadataOperationType.LIST_DATABASES,
970
+ str(e),
971
+ start_time,
972
+ )
973
+
974
+ def execute_list_tables(self, database: str | None = None) -> DataFrameMetadataResult:
975
+ """List all tables in a database.
976
+
977
+ Requires PySpark with configured catalog.
978
+
979
+ Args:
980
+ database: Database name (uses current database if None)
981
+
982
+ Returns:
983
+ DataFrameMetadataResult with table names
984
+ """
985
+ start_time = time.time()
986
+
987
+ try:
988
+ self.validate_operation(MetadataOperationType.LIST_TABLES)
989
+
990
+ if self.spark_session is None:
991
+ return DataFrameMetadataResult.failure_result(
992
+ MetadataOperationType.LIST_TABLES,
993
+ "SparkSession is required for catalog operations.",
994
+ start_time,
995
+ )
996
+
997
+ if database:
998
+ tables = self.spark_session.catalog.listTables(database)
999
+ else:
1000
+ tables = self.spark_session.catalog.listTables()
1001
+
1002
+ table_info = [{"name": t.name, "database": t.database, "tableType": t.tableType} for t in tables]
1003
+
1004
+ return DataFrameMetadataResult.success_result(
1005
+ operation_type=MetadataOperationType.LIST_TABLES,
1006
+ start_time=start_time,
1007
+ result_count=len(table_info),
1008
+ result_data=table_info,
1009
+ )
1010
+
1011
+ except UnsupportedOperationError:
1012
+ raise
1013
+ except Exception as e:
1014
+ self.logger.error(f"LIST_TABLES failed: {e}")
1015
+ return DataFrameMetadataResult.failure_result(
1016
+ MetadataOperationType.LIST_TABLES,
1017
+ str(e),
1018
+ start_time,
1019
+ )
1020
+
1021
+ def execute_list_table_columns(self, table_name: str, database: str | None = None) -> DataFrameMetadataResult:
1022
+ """List columns for a specific table in the catalog.
1023
+
1024
+ Requires PySpark with configured catalog.
1025
+
1026
+ Args:
1027
+ table_name: Name of the table
1028
+ database: Database name (uses current database if None)
1029
+
1030
+ Returns:
1031
+ DataFrameMetadataResult with column information
1032
+ """
1033
+ start_time = time.time()
1034
+
1035
+ try:
1036
+ self.validate_operation(MetadataOperationType.LIST_TABLE_COLUMNS)
1037
+
1038
+ if self.spark_session is None:
1039
+ return DataFrameMetadataResult.failure_result(
1040
+ MetadataOperationType.LIST_TABLE_COLUMNS,
1041
+ "SparkSession is required for catalog operations.",
1042
+ start_time,
1043
+ )
1044
+
1045
+ if database:
1046
+ columns = self.spark_session.catalog.listColumns(table_name, database)
1047
+ else:
1048
+ columns = self.spark_session.catalog.listColumns(table_name)
1049
+
1050
+ column_info = [
1051
+ {
1052
+ "name": c.name,
1053
+ "dataType": c.dataType,
1054
+ "nullable": c.nullable,
1055
+ "description": c.description,
1056
+ }
1057
+ for c in columns
1058
+ ]
1059
+
1060
+ return DataFrameMetadataResult.success_result(
1061
+ operation_type=MetadataOperationType.LIST_TABLE_COLUMNS,
1062
+ start_time=start_time,
1063
+ result_count=len(column_info),
1064
+ result_data=column_info,
1065
+ )
1066
+
1067
+ except UnsupportedOperationError:
1068
+ raise
1069
+ except Exception as e:
1070
+ self.logger.error(f"LIST_TABLE_COLUMNS failed: {e}")
1071
+ return DataFrameMetadataResult.failure_result(
1072
+ MetadataOperationType.LIST_TABLE_COLUMNS,
1073
+ str(e),
1074
+ start_time,
1075
+ )
1076
+
1077
+ def execute_table_exists(self, table_name: str, database: str | None = None) -> DataFrameMetadataResult:
1078
+ """Check if a table exists in the catalog.
1079
+
1080
+ Requires PySpark with configured catalog.
1081
+
1082
+ Args:
1083
+ table_name: Name of the table
1084
+ database: Database name (uses current database if None)
1085
+
1086
+ Returns:
1087
+ DataFrameMetadataResult with exists boolean
1088
+ """
1089
+ start_time = time.time()
1090
+
1091
+ try:
1092
+ self.validate_operation(MetadataOperationType.TABLE_EXISTS)
1093
+
1094
+ if self.spark_session is None:
1095
+ return DataFrameMetadataResult.failure_result(
1096
+ MetadataOperationType.TABLE_EXISTS,
1097
+ "SparkSession is required for catalog operations.",
1098
+ start_time,
1099
+ )
1100
+
1101
+ if database:
1102
+ full_name = f"{database}.{table_name}"
1103
+ else:
1104
+ full_name = table_name
1105
+
1106
+ exists = self.spark_session.catalog.tableExists(full_name)
1107
+
1108
+ return DataFrameMetadataResult.success_result(
1109
+ operation_type=MetadataOperationType.TABLE_EXISTS,
1110
+ start_time=start_time,
1111
+ result_count=1 if exists else 0,
1112
+ result_data=exists,
1113
+ )
1114
+
1115
+ except UnsupportedOperationError:
1116
+ raise
1117
+ except Exception as e:
1118
+ self.logger.error(f"TABLE_EXISTS failed: {e}")
1119
+ return DataFrameMetadataResult.failure_result(
1120
+ MetadataOperationType.TABLE_EXISTS,
1121
+ str(e),
1122
+ start_time,
1123
+ )
1124
+
1125
+ def execute_get_table_info(self, table_name: str, database: str | None = None) -> DataFrameMetadataResult:
1126
+ """Get detailed information about a table.
1127
+
1128
+ Requires PySpark with configured catalog.
1129
+
1130
+ Args:
1131
+ table_name: Name of the table
1132
+ database: Database name (uses current database if None)
1133
+
1134
+ Returns:
1135
+ DataFrameMetadataResult with table metadata
1136
+ """
1137
+ start_time = time.time()
1138
+
1139
+ try:
1140
+ self.validate_operation(MetadataOperationType.GET_TABLE_INFO)
1141
+
1142
+ if self.spark_session is None:
1143
+ return DataFrameMetadataResult.failure_result(
1144
+ MetadataOperationType.GET_TABLE_INFO,
1145
+ "SparkSession is required for catalog operations.",
1146
+ start_time,
1147
+ )
1148
+
1149
+ if database:
1150
+ full_name = f"{database}.{table_name}"
1151
+ else:
1152
+ full_name = table_name
1153
+
1154
+ table = self.spark_session.catalog.getTable(full_name)
1155
+
1156
+ table_info = {
1157
+ "name": table.name,
1158
+ "database": table.database,
1159
+ "tableType": table.tableType,
1160
+ "description": table.description,
1161
+ "isTemporary": table.isTemporary,
1162
+ }
1163
+
1164
+ return DataFrameMetadataResult.success_result(
1165
+ operation_type=MetadataOperationType.GET_TABLE_INFO,
1166
+ start_time=start_time,
1167
+ result_count=1,
1168
+ result_data=table_info,
1169
+ )
1170
+
1171
+ except UnsupportedOperationError:
1172
+ raise
1173
+ except Exception as e:
1174
+ self.logger.error(f"GET_TABLE_INFO failed: {e}")
1175
+ return DataFrameMetadataResult.failure_result(
1176
+ MetadataOperationType.GET_TABLE_INFO,
1177
+ str(e),
1178
+ start_time,
1179
+ )
1180
+
1181
+ # =========================================================================
1182
+ # Lakehouse Metadata Operations (Delta Lake / Iceberg)
1183
+ # =========================================================================
1184
+
1185
+ def execute_table_history(self, table_path: str) -> DataFrameMetadataResult:
1186
+ """Get transaction history for a Delta Lake table.
1187
+
1188
+ Requires Delta Lake support.
1189
+
1190
+ Args:
1191
+ table_path: Path to the Delta table
1192
+
1193
+ Returns:
1194
+ DataFrameMetadataResult with transaction history
1195
+ """
1196
+ start_time = time.time()
1197
+
1198
+ try:
1199
+ self.validate_operation(MetadataOperationType.TABLE_HISTORY)
1200
+
1201
+ if self.spark_session is not None:
1202
+ # Use DeltaTable API
1203
+ from delta.tables import DeltaTable
1204
+
1205
+ delta_table = DeltaTable.forPath(self.spark_session, table_path)
1206
+ history_df = delta_table.history()
1207
+ history = history_df.collect()
1208
+
1209
+ return DataFrameMetadataResult.success_result(
1210
+ operation_type=MetadataOperationType.TABLE_HISTORY,
1211
+ start_time=start_time,
1212
+ result_count=len(history),
1213
+ result_data=[row.asDict() for row in history],
1214
+ )
1215
+ else:
1216
+ # Use deltalake Python library
1217
+ from deltalake import DeltaTable as PyDeltaTable
1218
+
1219
+ dt = PyDeltaTable(table_path)
1220
+ history = list(dt.history())
1221
+
1222
+ return DataFrameMetadataResult.success_result(
1223
+ operation_type=MetadataOperationType.TABLE_HISTORY,
1224
+ start_time=start_time,
1225
+ result_count=len(history),
1226
+ result_data=history,
1227
+ )
1228
+
1229
+ except UnsupportedOperationError:
1230
+ raise
1231
+ except ImportError as e:
1232
+ return DataFrameMetadataResult.failure_result(
1233
+ MetadataOperationType.TABLE_HISTORY,
1234
+ f"Delta Lake library not available: {e}",
1235
+ start_time,
1236
+ )
1237
+ except Exception as e:
1238
+ self.logger.error(f"TABLE_HISTORY failed: {e}")
1239
+ return DataFrameMetadataResult.failure_result(
1240
+ MetadataOperationType.TABLE_HISTORY,
1241
+ str(e),
1242
+ start_time,
1243
+ )
1244
+
1245
+ def execute_table_detail(self, table_path: str) -> DataFrameMetadataResult:
1246
+ """Get detailed metadata for a Delta Lake table.
1247
+
1248
+ Requires Delta Lake support.
1249
+
1250
+ Args:
1251
+ table_path: Path to the Delta table
1252
+
1253
+ Returns:
1254
+ DataFrameMetadataResult with table detail
1255
+ """
1256
+ start_time = time.time()
1257
+
1258
+ try:
1259
+ self.validate_operation(MetadataOperationType.TABLE_DETAIL)
1260
+
1261
+ if self.spark_session is not None:
1262
+ # Use DeltaTable API
1263
+ from delta.tables import DeltaTable
1264
+
1265
+ delta_table = DeltaTable.forPath(self.spark_session, table_path)
1266
+ detail_df = delta_table.detail()
1267
+ detail = detail_df.collect()[0].asDict()
1268
+
1269
+ return DataFrameMetadataResult.success_result(
1270
+ operation_type=MetadataOperationType.TABLE_DETAIL,
1271
+ start_time=start_time,
1272
+ result_count=1,
1273
+ result_data=detail,
1274
+ )
1275
+ else:
1276
+ # Use deltalake Python library
1277
+ from deltalake import DeltaTable as PyDeltaTable
1278
+
1279
+ dt = PyDeltaTable(table_path)
1280
+ metadata = dt.metadata()
1281
+
1282
+ detail = {
1283
+ "id": metadata.id,
1284
+ "name": metadata.name,
1285
+ "description": metadata.description,
1286
+ "partitionColumns": metadata.partition_columns,
1287
+ "createdTime": metadata.created_time,
1288
+ }
1289
+
1290
+ return DataFrameMetadataResult.success_result(
1291
+ operation_type=MetadataOperationType.TABLE_DETAIL,
1292
+ start_time=start_time,
1293
+ result_count=1,
1294
+ result_data=detail,
1295
+ )
1296
+
1297
+ except UnsupportedOperationError:
1298
+ raise
1299
+ except ImportError as e:
1300
+ return DataFrameMetadataResult.failure_result(
1301
+ MetadataOperationType.TABLE_DETAIL,
1302
+ f"Delta Lake library not available: {e}",
1303
+ start_time,
1304
+ )
1305
+ except Exception as e:
1306
+ self.logger.error(f"TABLE_DETAIL failed: {e}")
1307
+ return DataFrameMetadataResult.failure_result(
1308
+ MetadataOperationType.TABLE_DETAIL,
1309
+ str(e),
1310
+ start_time,
1311
+ )
1312
+
1313
+ def execute_file_metadata(self, table_path: str) -> DataFrameMetadataResult:
1314
+ """Get file-level metadata for a lakehouse table.
1315
+
1316
+ Requires Delta Lake or Iceberg support.
1317
+
1318
+ Args:
1319
+ table_path: Path to the table
1320
+
1321
+ Returns:
1322
+ DataFrameMetadataResult with file metadata
1323
+ """
1324
+ start_time = time.time()
1325
+
1326
+ try:
1327
+ self.validate_operation(MetadataOperationType.FILE_METADATA)
1328
+
1329
+ if self._capabilities.supports_delta_lake:
1330
+ # Delta Lake file metadata
1331
+ if self.spark_session is not None:
1332
+ from delta.tables import DeltaTable
1333
+
1334
+ delta_table = DeltaTable.forPath(self.spark_session, table_path)
1335
+ # Get files via detail or internal API
1336
+ detail = delta_table.detail().collect()[0]
1337
+ files = {
1338
+ "numFiles": detail.numFiles if hasattr(detail, "numFiles") else None,
1339
+ "sizeInBytes": detail.sizeInBytes if hasattr(detail, "sizeInBytes") else None,
1340
+ }
1341
+ else:
1342
+ from deltalake import DeltaTable as PyDeltaTable
1343
+
1344
+ dt = PyDeltaTable(table_path)
1345
+ file_uris = dt.file_uris()
1346
+ files = {
1347
+ "numFiles": len(file_uris),
1348
+ "files": file_uris[:100], # Limit for large tables
1349
+ }
1350
+
1351
+ return DataFrameMetadataResult.success_result(
1352
+ operation_type=MetadataOperationType.FILE_METADATA,
1353
+ start_time=start_time,
1354
+ result_count=files.get("numFiles", 0),
1355
+ result_data=files,
1356
+ )
1357
+
1358
+ elif self._capabilities.supports_iceberg:
1359
+ # Iceberg file metadata - requires catalog configuration
1360
+ # Note: pyiceberg.catalog and pyiceberg.table would be used here
1361
+ # but require proper catalog setup which is outside scope of basic introspection
1362
+ return DataFrameMetadataResult.failure_result(
1363
+ MetadataOperationType.FILE_METADATA,
1364
+ "Iceberg file metadata requires catalog configuration",
1365
+ start_time,
1366
+ )
1367
+
1368
+ else:
1369
+ return DataFrameMetadataResult.failure_result(
1370
+ MetadataOperationType.FILE_METADATA,
1371
+ "No lakehouse format available for file metadata",
1372
+ start_time,
1373
+ )
1374
+
1375
+ except UnsupportedOperationError:
1376
+ raise
1377
+ except ImportError as e:
1378
+ return DataFrameMetadataResult.failure_result(
1379
+ MetadataOperationType.FILE_METADATA,
1380
+ f"Lakehouse library not available: {e}",
1381
+ start_time,
1382
+ )
1383
+ except Exception as e:
1384
+ self.logger.error(f"FILE_METADATA failed: {e}")
1385
+ return DataFrameMetadataResult.failure_result(
1386
+ MetadataOperationType.FILE_METADATA,
1387
+ str(e),
1388
+ start_time,
1389
+ )
1390
+
1391
+ def execute_partition_info(self, table_path: str) -> DataFrameMetadataResult:
1392
+ """Get partition information for a table.
1393
+
1394
+ Requires PySpark or lakehouse table format.
1395
+
1396
+ Args:
1397
+ table_path: Path to the table
1398
+
1399
+ Returns:
1400
+ DataFrameMetadataResult with partition info
1401
+ """
1402
+ start_time = time.time()
1403
+
1404
+ try:
1405
+ self.validate_operation(MetadataOperationType.PARTITION_INFO)
1406
+
1407
+ if self.spark_session is not None and self._capabilities.supports_delta_lake:
1408
+ from delta.tables import DeltaTable
1409
+
1410
+ delta_table = DeltaTable.forPath(self.spark_session, table_path)
1411
+ detail = delta_table.detail().collect()[0]
1412
+ partitions = {
1413
+ "partitionColumns": list(detail.partitionColumns) if detail.partitionColumns else [],
1414
+ "numPartitions": len(detail.partitionColumns) if detail.partitionColumns else 0,
1415
+ }
1416
+
1417
+ return DataFrameMetadataResult.success_result(
1418
+ operation_type=MetadataOperationType.PARTITION_INFO,
1419
+ start_time=start_time,
1420
+ result_count=partitions["numPartitions"],
1421
+ result_data=partitions,
1422
+ )
1423
+
1424
+ elif self._capabilities.supports_delta_lake:
1425
+ # Use standalone deltalake Python library (no SparkSession)
1426
+ from deltalake import DeltaTable as PyDeltaTable
1427
+
1428
+ dt = PyDeltaTable(table_path)
1429
+ metadata = dt.metadata()
1430
+ partitions = {
1431
+ "partitionColumns": metadata.partition_columns,
1432
+ "numPartitions": len(metadata.partition_columns),
1433
+ }
1434
+
1435
+ return DataFrameMetadataResult.success_result(
1436
+ operation_type=MetadataOperationType.PARTITION_INFO,
1437
+ start_time=start_time,
1438
+ result_count=partitions["numPartitions"],
1439
+ result_data=partitions,
1440
+ )
1441
+
1442
+ else:
1443
+ return DataFrameMetadataResult.failure_result(
1444
+ MetadataOperationType.PARTITION_INFO,
1445
+ "Partition info requires Delta Lake or Iceberg table format",
1446
+ start_time,
1447
+ )
1448
+
1449
+ except UnsupportedOperationError:
1450
+ raise
1451
+ except ImportError as e:
1452
+ return DataFrameMetadataResult.failure_result(
1453
+ MetadataOperationType.PARTITION_INFO,
1454
+ f"Lakehouse library not available: {e}",
1455
+ start_time,
1456
+ )
1457
+ except Exception as e:
1458
+ self.logger.error(f"PARTITION_INFO failed: {e}")
1459
+ return DataFrameMetadataResult.failure_result(
1460
+ MetadataOperationType.PARTITION_INFO,
1461
+ str(e),
1462
+ start_time,
1463
+ )
1464
+
1465
+ def execute_snapshot_info(self, table_path: str) -> DataFrameMetadataResult:
1466
+ """Get snapshot information for an Iceberg table.
1467
+
1468
+ Requires Iceberg support.
1469
+
1470
+ Args:
1471
+ table_path: Path or identifier for the Iceberg table
1472
+
1473
+ Returns:
1474
+ DataFrameMetadataResult with snapshot info
1475
+ """
1476
+ start_time = time.time()
1477
+
1478
+ try:
1479
+ self.validate_operation(MetadataOperationType.SNAPSHOT_INFO)
1480
+
1481
+ # This requires proper Iceberg catalog configuration
1482
+ # Simplified implementation for now
1483
+ return DataFrameMetadataResult.failure_result(
1484
+ MetadataOperationType.SNAPSHOT_INFO,
1485
+ "Iceberg snapshot info requires catalog configuration. "
1486
+ "Configure Iceberg catalog in your Spark session or use pyiceberg with proper catalog.",
1487
+ start_time,
1488
+ )
1489
+
1490
+ except UnsupportedOperationError:
1491
+ raise
1492
+ except Exception as e:
1493
+ self.logger.error(f"SNAPSHOT_INFO failed: {e}")
1494
+ return DataFrameMetadataResult.failure_result(
1495
+ MetadataOperationType.SNAPSHOT_INFO,
1496
+ str(e),
1497
+ start_time,
1498
+ )
1499
+
1500
+ # =========================================================================
1501
+ # Complexity Testing Operations
1502
+ # =========================================================================
1503
+
1504
+ def execute_wide_table_schema(self, dataframe: Any) -> DataFrameMetadataResult:
1505
+ """Introspect schema of a wide DataFrame (100+ columns).
1506
+
1507
+ Tests metadata introspection performance on DataFrames with many columns.
1508
+ This is useful for benchmarking schema discovery performance.
1509
+
1510
+ Args:
1511
+ dataframe: A wide DataFrame with many columns
1512
+
1513
+ Returns:
1514
+ DataFrameMetadataResult with schema info and metrics
1515
+ """
1516
+ start_time = time.time()
1517
+
1518
+ try:
1519
+ self.validate_operation(MetadataOperationType.WIDE_TABLE_SCHEMA)
1520
+
1521
+ # Get column count
1522
+ if "polars" in self.platform_name:
1523
+ column_count = dataframe.width
1524
+ schema_info = [
1525
+ {"name": col, "dtype": str(dtype)} for col, dtype in zip(dataframe.columns, dataframe.dtypes)
1526
+ ]
1527
+ elif "pandas" in self.platform_name:
1528
+ column_count = len(dataframe.columns)
1529
+ schema_info = [{"name": col, "dtype": str(dtype)} for col, dtype in dataframe.dtypes.items()]
1530
+ elif "pyspark" in self.platform_name or "spark" in self.platform_name:
1531
+ column_count = len(dataframe.columns)
1532
+ schema_info = [{"name": f.name, "dtype": str(f.dataType)} for f in dataframe.schema.fields]
1533
+ else:
1534
+ column_count = len(getattr(dataframe, "columns", []))
1535
+ schema_info = []
1536
+
1537
+ return DataFrameMetadataResult.success_result(
1538
+ operation_type=MetadataOperationType.WIDE_TABLE_SCHEMA,
1539
+ start_time=start_time,
1540
+ result_count=column_count,
1541
+ result_data=schema_info,
1542
+ metrics={
1543
+ "column_count": column_count,
1544
+ "is_wide_table": column_count >= 100,
1545
+ },
1546
+ )
1547
+
1548
+ except UnsupportedOperationError:
1549
+ raise
1550
+ except Exception as e:
1551
+ self.logger.error(f"WIDE_TABLE_SCHEMA failed: {e}")
1552
+ return DataFrameMetadataResult.failure_result(
1553
+ MetadataOperationType.WIDE_TABLE_SCHEMA,
1554
+ str(e),
1555
+ start_time,
1556
+ )
1557
+
1558
+ def execute_large_catalog_list(self) -> DataFrameMetadataResult:
1559
+ """List tables in a large catalog (100+ tables).
1560
+
1561
+ Tests catalog introspection performance with many tables.
1562
+ Requires PySpark with configured catalog.
1563
+
1564
+ Returns:
1565
+ DataFrameMetadataResult with table list and metrics
1566
+ """
1567
+ start_time = time.time()
1568
+
1569
+ try:
1570
+ self.validate_operation(MetadataOperationType.LARGE_CATALOG_LIST)
1571
+
1572
+ if self.spark_session is None:
1573
+ return DataFrameMetadataResult.failure_result(
1574
+ MetadataOperationType.LARGE_CATALOG_LIST,
1575
+ "SparkSession is required for catalog operations.",
1576
+ start_time,
1577
+ )
1578
+
1579
+ # Get all tables across all databases
1580
+ tables_result = []
1581
+ databases = self.spark_session.catalog.listDatabases()
1582
+
1583
+ for db in databases:
1584
+ try:
1585
+ tables = self.spark_session.catalog.listTables(db.name)
1586
+ for t in tables:
1587
+ tables_result.append({"name": t.name, "database": db.name, "tableType": t.tableType})
1588
+ except Exception as e:
1589
+ self.logger.debug(f"Skipping database {db.name}: {e}")
1590
+
1591
+ table_count = len(tables_result)
1592
+
1593
+ return DataFrameMetadataResult.success_result(
1594
+ operation_type=MetadataOperationType.LARGE_CATALOG_LIST,
1595
+ start_time=start_time,
1596
+ result_count=table_count,
1597
+ result_data=tables_result,
1598
+ metrics={
1599
+ "table_count": table_count,
1600
+ "database_count": len(databases),
1601
+ "is_large_catalog": table_count >= 100,
1602
+ },
1603
+ )
1604
+
1605
+ except UnsupportedOperationError:
1606
+ raise
1607
+ except Exception as e:
1608
+ self.logger.error(f"LARGE_CATALOG_LIST failed: {e}")
1609
+ return DataFrameMetadataResult.failure_result(
1610
+ MetadataOperationType.LARGE_CATALOG_LIST,
1611
+ str(e),
1612
+ start_time,
1613
+ )
1614
+
1615
+ def execute_complex_type_introspection(self, dataframe: Any) -> DataFrameMetadataResult:
1616
+ """Introspect complex/nested types in a DataFrame.
1617
+
1618
+ Tests metadata introspection for DataFrames containing complex types
1619
+ like ARRAY, STRUCT, MAP, and nested structures.
1620
+
1621
+ Args:
1622
+ dataframe: DataFrame with complex nested types
1623
+
1624
+ Returns:
1625
+ DataFrameMetadataResult with type analysis
1626
+ """
1627
+ start_time = time.time()
1628
+
1629
+ try:
1630
+ self.validate_operation(MetadataOperationType.COMPLEX_TYPE_INTROSPECTION)
1631
+
1632
+ complex_types = []
1633
+ nested_depth = 0
1634
+
1635
+ if "polars" in self.platform_name:
1636
+ import polars as pl
1637
+
1638
+ for col, dtype in zip(dataframe.columns, dataframe.dtypes):
1639
+ type_info = self._analyze_polars_type(col, dtype, pl)
1640
+ if type_info.get("is_complex"):
1641
+ complex_types.append(type_info)
1642
+ nested_depth = max(nested_depth, type_info.get("nested_depth", 0))
1643
+
1644
+ elif "pyspark" in self.platform_name or "spark" in self.platform_name:
1645
+ from pyspark.sql.types import ArrayType, MapType, StructType
1646
+
1647
+ for field in dataframe.schema.fields:
1648
+ type_info = self._analyze_spark_type(field, ArrayType, MapType, StructType)
1649
+ if type_info.get("is_complex"):
1650
+ complex_types.append(type_info)
1651
+ nested_depth = max(nested_depth, type_info.get("nested_depth", 0))
1652
+
1653
+ elif "pandas" in self.platform_name:
1654
+ # Pandas has limited complex type support
1655
+ for col, dtype in dataframe.dtypes.items():
1656
+ if str(dtype) == "object":
1657
+ # Could contain nested structures
1658
+ complex_types.append(
1659
+ {
1660
+ "name": col,
1661
+ "dtype": str(dtype),
1662
+ "is_complex": True,
1663
+ "complex_type": "object",
1664
+ "nested_depth": 1,
1665
+ }
1666
+ )
1667
+
1668
+ return DataFrameMetadataResult.success_result(
1669
+ operation_type=MetadataOperationType.COMPLEX_TYPE_INTROSPECTION,
1670
+ start_time=start_time,
1671
+ result_count=len(complex_types),
1672
+ result_data=complex_types,
1673
+ metrics={
1674
+ "complex_column_count": len(complex_types),
1675
+ "max_nested_depth": nested_depth,
1676
+ "has_arrays": any(t.get("complex_type") == "array" for t in complex_types),
1677
+ "has_structs": any(t.get("complex_type") == "struct" for t in complex_types),
1678
+ "has_maps": any(t.get("complex_type") == "map" for t in complex_types),
1679
+ },
1680
+ )
1681
+
1682
+ except UnsupportedOperationError:
1683
+ raise
1684
+ except Exception as e:
1685
+ self.logger.error(f"COMPLEX_TYPE_INTROSPECTION failed: {e}")
1686
+ return DataFrameMetadataResult.failure_result(
1687
+ MetadataOperationType.COMPLEX_TYPE_INTROSPECTION,
1688
+ str(e),
1689
+ start_time,
1690
+ )
1691
+
1692
+ def _analyze_polars_type(self, col_name: str, dtype: Any, pl: Any) -> dict[str, Any]:
1693
+ """Analyze a Polars data type for complexity.
1694
+
1695
+ Args:
1696
+ col_name: Column name
1697
+ dtype: Polars data type
1698
+ pl: Polars module
1699
+
1700
+ Returns:
1701
+ Type analysis dict
1702
+ """
1703
+ dtype_str = str(dtype)
1704
+ is_complex = False
1705
+ complex_type = None
1706
+ nested_depth = 0
1707
+
1708
+ if dtype_str.startswith("List"):
1709
+ is_complex = True
1710
+ complex_type = "array"
1711
+ nested_depth = dtype_str.count("List") + dtype_str.count("Struct")
1712
+ elif dtype_str.startswith("Struct"):
1713
+ is_complex = True
1714
+ complex_type = "struct"
1715
+ nested_depth = dtype_str.count("Struct")
1716
+
1717
+ return {
1718
+ "name": col_name,
1719
+ "dtype": dtype_str,
1720
+ "is_complex": is_complex,
1721
+ "complex_type": complex_type,
1722
+ "nested_depth": nested_depth,
1723
+ }
1724
+
1725
+ def _analyze_spark_type(self, field: Any, ArrayType: type, MapType: type, StructType: type) -> dict[str, Any]:
1726
+ """Analyze a Spark schema field for complexity.
1727
+
1728
+ Args:
1729
+ field: Spark StructField
1730
+ ArrayType: Spark ArrayType class
1731
+ MapType: Spark MapType class
1732
+ StructType: Spark StructType class
1733
+
1734
+ Returns:
1735
+ Type analysis dict
1736
+ """
1737
+ dtype = field.dataType
1738
+ is_complex = False
1739
+ complex_type = None
1740
+ nested_depth = 0
1741
+
1742
+ def count_depth(t: Any, depth: int = 0) -> int:
1743
+ if isinstance(t, ArrayType):
1744
+ return count_depth(t.elementType, depth + 1)
1745
+ elif isinstance(t, MapType):
1746
+ return max(count_depth(t.keyType, depth + 1), count_depth(t.valueType, depth + 1))
1747
+ elif isinstance(t, StructType):
1748
+ if t.fields:
1749
+ return max(count_depth(f.dataType, depth + 1) for f in t.fields)
1750
+ return depth + 1
1751
+ return depth
1752
+
1753
+ if isinstance(dtype, ArrayType):
1754
+ is_complex = True
1755
+ complex_type = "array"
1756
+ nested_depth = count_depth(dtype)
1757
+ elif isinstance(dtype, MapType):
1758
+ is_complex = True
1759
+ complex_type = "map"
1760
+ nested_depth = count_depth(dtype)
1761
+ elif isinstance(dtype, StructType):
1762
+ is_complex = True
1763
+ complex_type = "struct"
1764
+ nested_depth = count_depth(dtype)
1765
+
1766
+ return {
1767
+ "name": field.name,
1768
+ "dtype": str(dtype),
1769
+ "is_complex": is_complex,
1770
+ "complex_type": complex_type,
1771
+ "nested_depth": nested_depth,
1772
+ }
1773
+
1774
+
1775
+ def get_dataframe_metadata_manager(
1776
+ platform_name: str,
1777
+ spark_session: Any = None,
1778
+ ) -> DataFrameMetadataOperationsManager | None:
1779
+ """Get a DataFrame metadata operations manager for a platform.
1780
+
1781
+ Args:
1782
+ platform_name: Platform name (e.g., "polars-df", "pandas-df", "pyspark-df")
1783
+ spark_session: SparkSession instance (required for pyspark-df catalog ops)
1784
+
1785
+ Returns:
1786
+ DataFrameMetadataOperationsManager if platform supports DataFrame operations,
1787
+ None if platform is not a DataFrame platform.
1788
+ """
1789
+ platform_lower = platform_name.lower()
1790
+
1791
+ # Check if this is a DataFrame platform
1792
+ df_platforms = ("polars-df", "polars", "pandas-df", "pandas", "pyspark-df", "pyspark", "datafusion")
1793
+ if not any(p in platform_lower for p in df_platforms):
1794
+ logger.debug(f"Platform {platform_name} is not a DataFrame platform")
1795
+ return None
1796
+
1797
+ try:
1798
+ return DataFrameMetadataOperationsManager(platform_name, spark_session=spark_session)
1799
+ except Exception as e:
1800
+ logger.warning(f"Failed to create metadata manager for {platform_name}: {e}")
1801
+ return None
1802
+
1803
+
1804
+ __all__ = [
1805
+ # Enums
1806
+ "MetadataOperationType",
1807
+ "MetadataOperationCategory",
1808
+ "OPERATION_CATEGORIES",
1809
+ # Capabilities
1810
+ "DataFrameMetadataCapabilities",
1811
+ "get_platform_capabilities",
1812
+ "POLARS_METADATA_CAPABILITIES",
1813
+ "PANDAS_METADATA_CAPABILITIES",
1814
+ "PYSPARK_METADATA_CAPABILITIES",
1815
+ "DATAFUSION_METADATA_CAPABILITIES",
1816
+ # Results
1817
+ "DataFrameMetadataResult",
1818
+ # Errors
1819
+ "UnsupportedOperationError",
1820
+ "get_unsupported_message",
1821
+ # Manager
1822
+ "DataFrameMetadataOperationsManager",
1823
+ "get_dataframe_metadata_manager",
1824
+ ]