relationalai 0.13.0.dev0__py3-none-any.whl → 0.13.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (838) hide show
  1. frontend/debugger/dist/.gitignore +2 -0
  2. frontend/debugger/dist/assets/favicon-Dy0ZgA6N.png +0 -0
  3. frontend/debugger/dist/assets/index-Cssla-O7.js +208 -0
  4. frontend/debugger/dist/assets/index-DlHsYx1V.css +9 -0
  5. frontend/debugger/dist/index.html +17 -0
  6. relationalai/__init__.py +256 -1
  7. relationalai/clients/__init__.py +18 -0
  8. relationalai/clients/client.py +947 -0
  9. relationalai/clients/config.py +673 -0
  10. relationalai/clients/direct_access_client.py +118 -0
  11. relationalai/clients/exec_txn_poller.py +91 -0
  12. relationalai/clients/hash_util.py +31 -0
  13. relationalai/clients/local.py +586 -0
  14. relationalai/clients/profile_polling.py +73 -0
  15. relationalai/clients/resources/__init__.py +8 -0
  16. relationalai/clients/resources/azure/azure.py +502 -0
  17. relationalai/clients/resources/snowflake/__init__.py +20 -0
  18. relationalai/clients/resources/snowflake/cli_resources.py +98 -0
  19. relationalai/clients/resources/snowflake/direct_access_resources.py +734 -0
  20. relationalai/clients/resources/snowflake/engine_service.py +381 -0
  21. relationalai/clients/resources/snowflake/engine_state_handlers.py +315 -0
  22. relationalai/clients/resources/snowflake/error_handlers.py +240 -0
  23. relationalai/clients/resources/snowflake/export_procedure.py.jinja +249 -0
  24. relationalai/clients/resources/snowflake/resources_factory.py +99 -0
  25. relationalai/clients/resources/snowflake/snowflake.py +3185 -0
  26. relationalai/clients/resources/snowflake/use_index_poller.py +1019 -0
  27. relationalai/clients/resources/snowflake/use_index_resources.py +188 -0
  28. relationalai/clients/resources/snowflake/util.py +387 -0
  29. relationalai/clients/result_helpers.py +420 -0
  30. relationalai/clients/types.py +118 -0
  31. relationalai/clients/util.py +356 -0
  32. relationalai/debugging.py +389 -0
  33. relationalai/dsl.py +1749 -0
  34. relationalai/early_access/builder/__init__.py +30 -0
  35. relationalai/early_access/builder/builder/__init__.py +35 -0
  36. relationalai/early_access/builder/snowflake/__init__.py +12 -0
  37. relationalai/early_access/builder/std/__init__.py +25 -0
  38. relationalai/early_access/builder/std/decimals/__init__.py +12 -0
  39. relationalai/early_access/builder/std/integers/__init__.py +12 -0
  40. relationalai/early_access/builder/std/math/__init__.py +12 -0
  41. relationalai/early_access/builder/std/strings/__init__.py +14 -0
  42. relationalai/early_access/devtools/__init__.py +12 -0
  43. relationalai/early_access/devtools/benchmark_lqp/__init__.py +12 -0
  44. relationalai/early_access/devtools/extract_lqp/__init__.py +12 -0
  45. relationalai/early_access/dsl/adapters/orm/adapter_qb.py +427 -0
  46. relationalai/early_access/dsl/adapters/orm/parser.py +636 -0
  47. relationalai/early_access/dsl/adapters/owl/adapter.py +176 -0
  48. relationalai/early_access/dsl/adapters/owl/parser.py +160 -0
  49. relationalai/early_access/dsl/bindings/common.py +402 -0
  50. relationalai/early_access/dsl/bindings/csv.py +170 -0
  51. relationalai/early_access/dsl/bindings/legacy/binding_models.py +143 -0
  52. relationalai/early_access/dsl/bindings/snowflake.py +64 -0
  53. relationalai/early_access/dsl/codegen/binder.py +411 -0
  54. relationalai/early_access/dsl/codegen/common.py +79 -0
  55. relationalai/early_access/dsl/codegen/helpers.py +23 -0
  56. relationalai/early_access/dsl/codegen/relations.py +700 -0
  57. relationalai/early_access/dsl/codegen/weaver.py +417 -0
  58. relationalai/early_access/dsl/core/builders/__init__.py +47 -0
  59. relationalai/early_access/dsl/core/builders/logic.py +19 -0
  60. relationalai/early_access/dsl/core/builders/scalar_constraint.py +11 -0
  61. relationalai/early_access/dsl/core/constraints/predicate/atomic.py +455 -0
  62. relationalai/early_access/dsl/core/constraints/predicate/universal.py +73 -0
  63. relationalai/early_access/dsl/core/constraints/scalar.py +310 -0
  64. relationalai/early_access/dsl/core/context.py +13 -0
  65. relationalai/early_access/dsl/core/cset.py +132 -0
  66. relationalai/early_access/dsl/core/exprs/__init__.py +116 -0
  67. relationalai/early_access/dsl/core/exprs/relational.py +18 -0
  68. relationalai/early_access/dsl/core/exprs/scalar.py +412 -0
  69. relationalai/early_access/dsl/core/instances.py +44 -0
  70. relationalai/early_access/dsl/core/logic/__init__.py +193 -0
  71. relationalai/early_access/dsl/core/logic/aggregation.py +98 -0
  72. relationalai/early_access/dsl/core/logic/exists.py +223 -0
  73. relationalai/early_access/dsl/core/logic/helper.py +163 -0
  74. relationalai/early_access/dsl/core/namespaces.py +32 -0
  75. relationalai/early_access/dsl/core/relations.py +276 -0
  76. relationalai/early_access/dsl/core/rules.py +112 -0
  77. relationalai/early_access/dsl/core/std/__init__.py +45 -0
  78. relationalai/early_access/dsl/core/temporal/recall.py +6 -0
  79. relationalai/early_access/dsl/core/types/__init__.py +270 -0
  80. relationalai/early_access/dsl/core/types/concepts.py +128 -0
  81. relationalai/early_access/dsl/core/types/constrained/__init__.py +267 -0
  82. relationalai/early_access/dsl/core/types/constrained/nominal.py +143 -0
  83. relationalai/early_access/dsl/core/types/constrained/subtype.py +124 -0
  84. relationalai/early_access/dsl/core/types/standard.py +92 -0
  85. relationalai/early_access/dsl/core/types/unconstrained.py +50 -0
  86. relationalai/early_access/dsl/core/types/variables.py +203 -0
  87. relationalai/early_access/dsl/ir/compiler.py +318 -0
  88. relationalai/early_access/dsl/ir/executor.py +260 -0
  89. relationalai/early_access/dsl/ontologies/constraints.py +88 -0
  90. relationalai/early_access/dsl/ontologies/export.py +30 -0
  91. relationalai/early_access/dsl/ontologies/models.py +453 -0
  92. relationalai/early_access/dsl/ontologies/python_printer.py +303 -0
  93. relationalai/early_access/dsl/ontologies/readings.py +60 -0
  94. relationalai/early_access/dsl/ontologies/relationships.py +322 -0
  95. relationalai/early_access/dsl/ontologies/roles.py +87 -0
  96. relationalai/early_access/dsl/ontologies/subtyping.py +55 -0
  97. relationalai/early_access/dsl/orm/constraints.py +438 -0
  98. relationalai/early_access/dsl/orm/measures/dimensions.py +200 -0
  99. relationalai/early_access/dsl/orm/measures/initializer.py +16 -0
  100. relationalai/early_access/dsl/orm/measures/measure_rules.py +275 -0
  101. relationalai/early_access/dsl/orm/measures/measures.py +299 -0
  102. relationalai/early_access/dsl/orm/measures/role_exprs.py +268 -0
  103. relationalai/early_access/dsl/orm/models.py +256 -0
  104. relationalai/early_access/dsl/orm/object_oriented_printer.py +344 -0
  105. relationalai/early_access/dsl/orm/printer.py +469 -0
  106. relationalai/early_access/dsl/orm/reasoners.py +480 -0
  107. relationalai/early_access/dsl/orm/relations.py +19 -0
  108. relationalai/early_access/dsl/orm/relationships.py +251 -0
  109. relationalai/early_access/dsl/orm/types.py +42 -0
  110. relationalai/early_access/dsl/orm/utils.py +79 -0
  111. relationalai/early_access/dsl/orm/verb.py +204 -0
  112. relationalai/early_access/dsl/physical_metadata/tables.py +133 -0
  113. relationalai/early_access/dsl/relations.py +170 -0
  114. relationalai/early_access/dsl/rulesets.py +69 -0
  115. relationalai/early_access/dsl/schemas/__init__.py +450 -0
  116. relationalai/early_access/dsl/schemas/builder.py +48 -0
  117. relationalai/early_access/dsl/schemas/comp_names.py +51 -0
  118. relationalai/early_access/dsl/schemas/components.py +203 -0
  119. relationalai/early_access/dsl/schemas/contexts.py +156 -0
  120. relationalai/early_access/dsl/schemas/exprs.py +89 -0
  121. relationalai/early_access/dsl/schemas/fragments.py +464 -0
  122. relationalai/early_access/dsl/serialization.py +79 -0
  123. relationalai/early_access/dsl/serialize/exporter.py +163 -0
  124. relationalai/early_access/dsl/snow/api.py +105 -0
  125. relationalai/early_access/dsl/snow/common.py +76 -0
  126. relationalai/early_access/dsl/state_mgmt/__init__.py +129 -0
  127. relationalai/early_access/dsl/state_mgmt/state_charts.py +125 -0
  128. relationalai/early_access/dsl/state_mgmt/transitions.py +130 -0
  129. relationalai/early_access/dsl/types/__init__.py +40 -0
  130. relationalai/early_access/dsl/types/concepts.py +12 -0
  131. relationalai/early_access/dsl/types/entities.py +135 -0
  132. relationalai/early_access/dsl/types/values.py +17 -0
  133. relationalai/early_access/dsl/utils.py +102 -0
  134. relationalai/early_access/graphs/__init__.py +13 -0
  135. relationalai/early_access/lqp/__init__.py +12 -0
  136. relationalai/early_access/lqp/compiler/__init__.py +12 -0
  137. relationalai/early_access/lqp/constructors/__init__.py +18 -0
  138. relationalai/early_access/lqp/executor/__init__.py +12 -0
  139. relationalai/early_access/lqp/ir/__init__.py +12 -0
  140. relationalai/early_access/lqp/passes/__init__.py +12 -0
  141. relationalai/early_access/lqp/pragmas/__init__.py +12 -0
  142. relationalai/early_access/lqp/primitives/__init__.py +12 -0
  143. relationalai/early_access/lqp/types/__init__.py +12 -0
  144. relationalai/early_access/lqp/utils/__init__.py +12 -0
  145. relationalai/early_access/lqp/validators/__init__.py +12 -0
  146. relationalai/early_access/metamodel/__init__.py +58 -0
  147. relationalai/early_access/metamodel/builtins/__init__.py +12 -0
  148. relationalai/early_access/metamodel/compiler/__init__.py +12 -0
  149. relationalai/early_access/metamodel/dependency/__init__.py +12 -0
  150. relationalai/early_access/metamodel/factory/__init__.py +17 -0
  151. relationalai/early_access/metamodel/helpers/__init__.py +12 -0
  152. relationalai/early_access/metamodel/ir/__init__.py +14 -0
  153. relationalai/early_access/metamodel/rewrite/__init__.py +7 -0
  154. relationalai/early_access/metamodel/typer/__init__.py +3 -0
  155. relationalai/early_access/metamodel/typer/typer/__init__.py +12 -0
  156. relationalai/early_access/metamodel/types/__init__.py +15 -0
  157. relationalai/early_access/metamodel/util/__init__.py +15 -0
  158. relationalai/early_access/metamodel/visitor/__init__.py +12 -0
  159. relationalai/early_access/rel/__init__.py +12 -0
  160. relationalai/early_access/rel/executor/__init__.py +12 -0
  161. relationalai/early_access/rel/rel_utils/__init__.py +12 -0
  162. relationalai/early_access/rel/rewrite/__init__.py +7 -0
  163. relationalai/early_access/solvers/__init__.py +19 -0
  164. relationalai/early_access/sql/__init__.py +11 -0
  165. relationalai/early_access/sql/executor/__init__.py +3 -0
  166. relationalai/early_access/sql/rewrite/__init__.py +3 -0
  167. relationalai/early_access/tests/logging/__init__.py +12 -0
  168. relationalai/early_access/tests/test_snapshot_base/__init__.py +12 -0
  169. relationalai/early_access/tests/utils/__init__.py +12 -0
  170. relationalai/environments/__init__.py +35 -0
  171. relationalai/environments/base.py +381 -0
  172. relationalai/environments/colab.py +14 -0
  173. relationalai/environments/generic.py +71 -0
  174. relationalai/environments/ipython.py +68 -0
  175. relationalai/environments/jupyter.py +9 -0
  176. relationalai/environments/snowbook.py +169 -0
  177. relationalai/errors.py +2496 -0
  178. relationalai/experimental/SF.py +38 -0
  179. relationalai/experimental/inspect.py +47 -0
  180. relationalai/experimental/pathfinder/__init__.py +158 -0
  181. relationalai/experimental/pathfinder/api.py +160 -0
  182. relationalai/experimental/pathfinder/automaton.py +584 -0
  183. relationalai/experimental/pathfinder/bridge.py +226 -0
  184. relationalai/experimental/pathfinder/compiler.py +416 -0
  185. relationalai/experimental/pathfinder/datalog.py +214 -0
  186. relationalai/experimental/pathfinder/diagnostics.py +56 -0
  187. relationalai/experimental/pathfinder/filter.py +236 -0
  188. relationalai/experimental/pathfinder/glushkov.py +439 -0
  189. relationalai/experimental/pathfinder/options.py +265 -0
  190. relationalai/experimental/pathfinder/pathfinder-v0.7.0.rel +1951 -0
  191. relationalai/experimental/pathfinder/rpq.py +344 -0
  192. relationalai/experimental/pathfinder/transition.py +200 -0
  193. relationalai/experimental/pathfinder/utils.py +26 -0
  194. relationalai/experimental/paths/README.md +107 -0
  195. relationalai/experimental/paths/api.py +143 -0
  196. relationalai/experimental/paths/benchmarks/grid_graph.py +37 -0
  197. relationalai/experimental/paths/code_organization.md +2 -0
  198. relationalai/experimental/paths/examples/Movies.ipynb +16328 -0
  199. relationalai/experimental/paths/examples/basic_example.py +40 -0
  200. relationalai/experimental/paths/examples/minimal_engine_warmup.py +3 -0
  201. relationalai/experimental/paths/examples/movie_example.py +77 -0
  202. relationalai/experimental/paths/examples/movies_data/actedin.csv +193 -0
  203. relationalai/experimental/paths/examples/movies_data/directed.csv +45 -0
  204. relationalai/experimental/paths/examples/movies_data/follows.csv +7 -0
  205. relationalai/experimental/paths/examples/movies_data/movies.csv +39 -0
  206. relationalai/experimental/paths/examples/movies_data/person.csv +134 -0
  207. relationalai/experimental/paths/examples/movies_data/produced.csv +16 -0
  208. relationalai/experimental/paths/examples/movies_data/ratings.csv +10 -0
  209. relationalai/experimental/paths/examples/movies_data/wrote.csv +11 -0
  210. relationalai/experimental/paths/examples/paths_benchmark.py +115 -0
  211. relationalai/experimental/paths/examples/paths_example.py +116 -0
  212. relationalai/experimental/paths/examples/pattern_to_automaton.py +28 -0
  213. relationalai/experimental/paths/find_paths_via_automaton.py +85 -0
  214. relationalai/experimental/paths/graph.py +185 -0
  215. relationalai/experimental/paths/path_algorithms/find_paths.py +280 -0
  216. relationalai/experimental/paths/path_algorithms/one_sided_ball_repetition.py +26 -0
  217. relationalai/experimental/paths/path_algorithms/one_sided_ball_upto.py +111 -0
  218. relationalai/experimental/paths/path_algorithms/single.py +59 -0
  219. relationalai/experimental/paths/path_algorithms/two_sided_balls_repetition.py +39 -0
  220. relationalai/experimental/paths/path_algorithms/two_sided_balls_upto.py +103 -0
  221. relationalai/experimental/paths/path_algorithms/usp-old.py +130 -0
  222. relationalai/experimental/paths/path_algorithms/usp-tuple.py +183 -0
  223. relationalai/experimental/paths/path_algorithms/usp.py +150 -0
  224. relationalai/experimental/paths/product_graph.py +93 -0
  225. relationalai/experimental/paths/rpq/automaton.py +584 -0
  226. relationalai/experimental/paths/rpq/diagnostics.py +56 -0
  227. relationalai/experimental/paths/rpq/rpq.py +378 -0
  228. relationalai/experimental/paths/tests/tests_limit_sp_max_length.py +90 -0
  229. relationalai/experimental/paths/tests/tests_limit_sp_multiple.py +119 -0
  230. relationalai/experimental/paths/tests/tests_limit_sp_single.py +104 -0
  231. relationalai/experimental/paths/tests/tests_limit_walks_multiple.py +113 -0
  232. relationalai/experimental/paths/tests/tests_limit_walks_single.py +149 -0
  233. relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_multiple.py +70 -0
  234. relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_single.py +64 -0
  235. relationalai/experimental/paths/tests/tests_one_sided_ball_upto_multiple.py +115 -0
  236. relationalai/experimental/paths/tests/tests_one_sided_ball_upto_single.py +75 -0
  237. relationalai/experimental/paths/tests/tests_single_paths.py +152 -0
  238. relationalai/experimental/paths/tests/tests_single_walks.py +208 -0
  239. relationalai/experimental/paths/tests/tests_single_walks_undirected.py +297 -0
  240. relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_multiple.py +107 -0
  241. relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_single.py +76 -0
  242. relationalai/experimental/paths/tests/tests_two_sided_balls_upto_multiple.py +76 -0
  243. relationalai/experimental/paths/tests/tests_two_sided_balls_upto_single.py +110 -0
  244. relationalai/experimental/paths/tests/tests_usp_nsp_multiple.py +229 -0
  245. relationalai/experimental/paths/tests/tests_usp_nsp_single.py +108 -0
  246. relationalai/experimental/paths/tree_agg.py +168 -0
  247. relationalai/experimental/paths/utilities/iterators.py +27 -0
  248. relationalai/experimental/paths/utilities/prefix_sum.py +91 -0
  249. relationalai/experimental/solvers.py +1087 -0
  250. relationalai/loaders/csv.py +195 -0
  251. relationalai/loaders/loader.py +177 -0
  252. relationalai/loaders/types.py +23 -0
  253. relationalai/rel_emitter.py +373 -0
  254. relationalai/rel_utils.py +185 -0
  255. relationalai/semantics/__init__.py +22 -146
  256. relationalai/semantics/designs/query_builder/identify_by.md +106 -0
  257. relationalai/semantics/devtools/benchmark_lqp.py +535 -0
  258. relationalai/semantics/devtools/compilation_manager.py +294 -0
  259. relationalai/semantics/devtools/extract_lqp.py +110 -0
  260. relationalai/semantics/internal/internal.py +3785 -0
  261. relationalai/semantics/internal/snowflake.py +325 -0
  262. relationalai/semantics/lqp/README.md +34 -0
  263. relationalai/semantics/lqp/builtins.py +16 -0
  264. relationalai/semantics/lqp/compiler.py +22 -0
  265. relationalai/semantics/lqp/constructors.py +68 -0
  266. relationalai/semantics/lqp/executor.py +469 -0
  267. relationalai/semantics/lqp/intrinsics.py +24 -0
  268. relationalai/semantics/lqp/model2lqp.py +877 -0
  269. relationalai/semantics/lqp/passes.py +680 -0
  270. relationalai/semantics/lqp/primitives.py +252 -0
  271. relationalai/semantics/lqp/result_helpers.py +202 -0
  272. relationalai/semantics/lqp/rewrite/annotate_constraints.py +57 -0
  273. relationalai/semantics/lqp/rewrite/cdc.py +216 -0
  274. relationalai/semantics/lqp/rewrite/extract_common.py +338 -0
  275. relationalai/semantics/lqp/rewrite/extract_keys.py +512 -0
  276. relationalai/semantics/lqp/rewrite/function_annotations.py +114 -0
  277. relationalai/semantics/lqp/rewrite/functional_dependencies.py +314 -0
  278. relationalai/semantics/lqp/rewrite/quantify_vars.py +296 -0
  279. relationalai/semantics/lqp/rewrite/splinter.py +76 -0
  280. relationalai/semantics/lqp/types.py +101 -0
  281. relationalai/semantics/lqp/utils.py +160 -0
  282. relationalai/semantics/lqp/validators.py +57 -0
  283. relationalai/semantics/metamodel/__init__.py +40 -6
  284. relationalai/semantics/metamodel/builtins.py +771 -205
  285. relationalai/semantics/metamodel/compiler.py +133 -0
  286. relationalai/semantics/metamodel/dependency.py +862 -0
  287. relationalai/semantics/metamodel/executor.py +61 -0
  288. relationalai/semantics/metamodel/factory.py +287 -0
  289. relationalai/semantics/metamodel/helpers.py +361 -0
  290. relationalai/semantics/metamodel/rewrite/discharge_constraints.py +39 -0
  291. relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +210 -0
  292. relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py +78 -0
  293. relationalai/semantics/metamodel/rewrite/flatten.py +554 -0
  294. relationalai/semantics/metamodel/rewrite/format_outputs.py +165 -0
  295. relationalai/semantics/metamodel/typer/checker.py +353 -0
  296. relationalai/semantics/metamodel/typer/typer.py +1399 -0
  297. relationalai/semantics/metamodel/util.py +506 -0
  298. relationalai/semantics/reasoners/__init__.py +10 -0
  299. relationalai/semantics/reasoners/graph/README.md +620 -0
  300. relationalai/semantics/reasoners/graph/__init__.py +37 -0
  301. relationalai/semantics/reasoners/graph/core.py +9019 -0
  302. relationalai/semantics/reasoners/graph/design/beyond_demand_transform.md +797 -0
  303. relationalai/semantics/reasoners/graph/tests/README.md +21 -0
  304. relationalai/semantics/reasoners/optimization/__init__.py +68 -0
  305. relationalai/semantics/reasoners/optimization/common.py +88 -0
  306. relationalai/semantics/reasoners/optimization/solvers_dev.py +568 -0
  307. relationalai/semantics/reasoners/optimization/solvers_pb.py +1414 -0
  308. relationalai/semantics/rel/builtins.py +40 -0
  309. relationalai/semantics/rel/compiler.py +989 -0
  310. relationalai/semantics/rel/executor.py +362 -0
  311. relationalai/semantics/rel/rel.py +482 -0
  312. relationalai/semantics/rel/rel_utils.py +276 -0
  313. relationalai/semantics/snowflake/__init__.py +3 -0
  314. relationalai/semantics/sql/compiler.py +2503 -0
  315. relationalai/semantics/sql/executor/duck_db.py +52 -0
  316. relationalai/semantics/sql/executor/result_helpers.py +64 -0
  317. relationalai/semantics/sql/executor/snowflake.py +149 -0
  318. relationalai/semantics/sql/rewrite/denormalize.py +222 -0
  319. relationalai/semantics/sql/rewrite/double_negation.py +49 -0
  320. relationalai/semantics/sql/rewrite/recursive_union.py +127 -0
  321. relationalai/semantics/sql/rewrite/sort_output_query.py +246 -0
  322. relationalai/semantics/sql/sql.py +504 -0
  323. relationalai/semantics/std/__init__.py +40 -60
  324. relationalai/semantics/std/constraints.py +43 -37
  325. relationalai/semantics/std/datetime.py +135 -246
  326. relationalai/semantics/std/decimals.py +52 -45
  327. relationalai/semantics/std/floats.py +5 -13
  328. relationalai/semantics/std/integers.py +11 -26
  329. relationalai/semantics/std/math.py +112 -183
  330. relationalai/semantics/std/pragmas.py +11 -0
  331. relationalai/semantics/std/re.py +62 -80
  332. relationalai/semantics/std/std.py +14 -0
  333. relationalai/semantics/std/strings.py +60 -117
  334. relationalai/semantics/tests/test_snapshot_abstract.py +143 -0
  335. relationalai/semantics/tests/test_snapshot_base.py +9 -0
  336. relationalai/semantics/tests/utils.py +46 -0
  337. relationalai/std/__init__.py +70 -0
  338. relationalai/tools/cli.py +2089 -0
  339. relationalai/tools/cli_controls.py +1826 -0
  340. relationalai/tools/cli_helpers.py +802 -0
  341. relationalai/tools/debugger.py +183 -289
  342. relationalai/tools/debugger_client.py +109 -0
  343. relationalai/tools/debugger_server.py +302 -0
  344. relationalai/tools/dev.py +685 -0
  345. relationalai/tools/notes +7 -0
  346. relationalai/tools/qb_debugger.py +425 -0
  347. relationalai/util/clean_up_databases.py +95 -0
  348. relationalai/util/format.py +106 -48
  349. relationalai/util/list_databases.py +9 -0
  350. relationalai/util/otel_configuration.py +26 -0
  351. relationalai/util/otel_handler.py +484 -0
  352. relationalai/util/snowflake_handler.py +88 -0
  353. relationalai/util/span_format_test.py +43 -0
  354. relationalai/util/span_tracker.py +207 -0
  355. relationalai/util/spans_file_handler.py +72 -0
  356. relationalai/util/tracing_handler.py +34 -0
  357. relationalai-0.13.2.dist-info/METADATA +74 -0
  358. relationalai-0.13.2.dist-info/RECORD +460 -0
  359. relationalai-0.13.2.dist-info/WHEEL +4 -0
  360. relationalai-0.13.2.dist-info/entry_points.txt +3 -0
  361. relationalai-0.13.2.dist-info/licenses/LICENSE +202 -0
  362. relationalai_test_util/__init__.py +4 -0
  363. relationalai_test_util/fixtures.py +233 -0
  364. relationalai_test_util/snapshot.py +252 -0
  365. relationalai_test_util/traceback.py +118 -0
  366. relationalai/config/__init__.py +0 -56
  367. relationalai/config/config.py +0 -289
  368. relationalai/config/config_fields.py +0 -86
  369. relationalai/config/connections/__init__.py +0 -46
  370. relationalai/config/connections/base.py +0 -23
  371. relationalai/config/connections/duckdb.py +0 -29
  372. relationalai/config/connections/snowflake.py +0 -243
  373. relationalai/config/external/__init__.py +0 -17
  374. relationalai/config/external/dbt_converter.py +0 -101
  375. relationalai/config/external/dbt_models.py +0 -93
  376. relationalai/config/external/snowflake_converter.py +0 -41
  377. relationalai/config/external/snowflake_models.py +0 -85
  378. relationalai/config/external/utils.py +0 -19
  379. relationalai/semantics/backends/lqp/annotations.py +0 -11
  380. relationalai/semantics/backends/sql/sql_compiler.py +0 -327
  381. relationalai/semantics/frontend/base.py +0 -1707
  382. relationalai/semantics/frontend/core.py +0 -179
  383. relationalai/semantics/frontend/front_compiler.py +0 -1313
  384. relationalai/semantics/frontend/pprint.py +0 -408
  385. relationalai/semantics/metamodel/metamodel.py +0 -437
  386. relationalai/semantics/metamodel/metamodel_analyzer.py +0 -519
  387. relationalai/semantics/metamodel/metamodel_compiler.py +0 -0
  388. relationalai/semantics/metamodel/pprint.py +0 -412
  389. relationalai/semantics/metamodel/rewriter.py +0 -266
  390. relationalai/semantics/metamodel/typer.py +0 -1378
  391. relationalai/semantics/std/aggregates.py +0 -149
  392. relationalai/semantics/std/common.py +0 -44
  393. relationalai/semantics/std/numbers.py +0 -86
  394. relationalai/shims/executor.py +0 -147
  395. relationalai/shims/helpers.py +0 -126
  396. relationalai/shims/hoister.py +0 -221
  397. relationalai/shims/mm2v0.py +0 -1290
  398. relationalai/tools/cli/__init__.py +0 -6
  399. relationalai/tools/cli/cli.py +0 -90
  400. relationalai/tools/cli/components/__init__.py +0 -5
  401. relationalai/tools/cli/components/progress_reader.py +0 -1524
  402. relationalai/tools/cli/components/utils.py +0 -58
  403. relationalai/tools/cli/config_template.py +0 -45
  404. relationalai/tools/cli/dev.py +0 -19
  405. relationalai/tools/typer_debugger.py +0 -93
  406. relationalai/util/dataclasses.py +0 -43
  407. relationalai/util/docutils.py +0 -40
  408. relationalai/util/error.py +0 -199
  409. relationalai/util/naming.py +0 -145
  410. relationalai/util/python.py +0 -35
  411. relationalai/util/runtime.py +0 -156
  412. relationalai/util/schema.py +0 -197
  413. relationalai/util/source.py +0 -185
  414. relationalai/util/structures.py +0 -163
  415. relationalai/util/tracing.py +0 -261
  416. relationalai-0.13.0.dev0.dist-info/METADATA +0 -46
  417. relationalai-0.13.0.dev0.dist-info/RECORD +0 -488
  418. relationalai-0.13.0.dev0.dist-info/WHEEL +0 -5
  419. relationalai-0.13.0.dev0.dist-info/entry_points.txt +0 -3
  420. relationalai-0.13.0.dev0.dist-info/top_level.txt +0 -2
  421. v0/relationalai/__init__.py +0 -216
  422. v0/relationalai/clients/__init__.py +0 -5
  423. v0/relationalai/clients/azure.py +0 -477
  424. v0/relationalai/clients/client.py +0 -912
  425. v0/relationalai/clients/config.py +0 -673
  426. v0/relationalai/clients/direct_access_client.py +0 -118
  427. v0/relationalai/clients/hash_util.py +0 -31
  428. v0/relationalai/clients/local.py +0 -571
  429. v0/relationalai/clients/profile_polling.py +0 -73
  430. v0/relationalai/clients/result_helpers.py +0 -420
  431. v0/relationalai/clients/snowflake.py +0 -3869
  432. v0/relationalai/clients/types.py +0 -113
  433. v0/relationalai/clients/use_index_poller.py +0 -980
  434. v0/relationalai/clients/util.py +0 -356
  435. v0/relationalai/debugging.py +0 -389
  436. v0/relationalai/dsl.py +0 -1749
  437. v0/relationalai/early_access/builder/__init__.py +0 -30
  438. v0/relationalai/early_access/builder/builder/__init__.py +0 -35
  439. v0/relationalai/early_access/builder/snowflake/__init__.py +0 -12
  440. v0/relationalai/early_access/builder/std/__init__.py +0 -25
  441. v0/relationalai/early_access/builder/std/decimals/__init__.py +0 -12
  442. v0/relationalai/early_access/builder/std/integers/__init__.py +0 -12
  443. v0/relationalai/early_access/builder/std/math/__init__.py +0 -12
  444. v0/relationalai/early_access/builder/std/strings/__init__.py +0 -14
  445. v0/relationalai/early_access/devtools/__init__.py +0 -12
  446. v0/relationalai/early_access/devtools/benchmark_lqp/__init__.py +0 -12
  447. v0/relationalai/early_access/devtools/extract_lqp/__init__.py +0 -12
  448. v0/relationalai/early_access/dsl/adapters/orm/adapter_qb.py +0 -427
  449. v0/relationalai/early_access/dsl/adapters/orm/parser.py +0 -636
  450. v0/relationalai/early_access/dsl/adapters/owl/adapter.py +0 -176
  451. v0/relationalai/early_access/dsl/adapters/owl/parser.py +0 -160
  452. v0/relationalai/early_access/dsl/bindings/common.py +0 -402
  453. v0/relationalai/early_access/dsl/bindings/csv.py +0 -170
  454. v0/relationalai/early_access/dsl/bindings/legacy/binding_models.py +0 -143
  455. v0/relationalai/early_access/dsl/bindings/snowflake.py +0 -64
  456. v0/relationalai/early_access/dsl/codegen/binder.py +0 -411
  457. v0/relationalai/early_access/dsl/codegen/common.py +0 -79
  458. v0/relationalai/early_access/dsl/codegen/helpers.py +0 -23
  459. v0/relationalai/early_access/dsl/codegen/relations.py +0 -700
  460. v0/relationalai/early_access/dsl/codegen/weaver.py +0 -417
  461. v0/relationalai/early_access/dsl/core/builders/__init__.py +0 -47
  462. v0/relationalai/early_access/dsl/core/builders/logic.py +0 -19
  463. v0/relationalai/early_access/dsl/core/builders/scalar_constraint.py +0 -11
  464. v0/relationalai/early_access/dsl/core/constraints/predicate/atomic.py +0 -455
  465. v0/relationalai/early_access/dsl/core/constraints/predicate/universal.py +0 -73
  466. v0/relationalai/early_access/dsl/core/constraints/scalar.py +0 -310
  467. v0/relationalai/early_access/dsl/core/context.py +0 -13
  468. v0/relationalai/early_access/dsl/core/cset.py +0 -132
  469. v0/relationalai/early_access/dsl/core/exprs/__init__.py +0 -116
  470. v0/relationalai/early_access/dsl/core/exprs/relational.py +0 -18
  471. v0/relationalai/early_access/dsl/core/exprs/scalar.py +0 -412
  472. v0/relationalai/early_access/dsl/core/instances.py +0 -44
  473. v0/relationalai/early_access/dsl/core/logic/__init__.py +0 -193
  474. v0/relationalai/early_access/dsl/core/logic/aggregation.py +0 -98
  475. v0/relationalai/early_access/dsl/core/logic/exists.py +0 -223
  476. v0/relationalai/early_access/dsl/core/logic/helper.py +0 -163
  477. v0/relationalai/early_access/dsl/core/namespaces.py +0 -32
  478. v0/relationalai/early_access/dsl/core/relations.py +0 -276
  479. v0/relationalai/early_access/dsl/core/rules.py +0 -112
  480. v0/relationalai/early_access/dsl/core/std/__init__.py +0 -45
  481. v0/relationalai/early_access/dsl/core/temporal/recall.py +0 -6
  482. v0/relationalai/early_access/dsl/core/types/__init__.py +0 -270
  483. v0/relationalai/early_access/dsl/core/types/concepts.py +0 -128
  484. v0/relationalai/early_access/dsl/core/types/constrained/__init__.py +0 -267
  485. v0/relationalai/early_access/dsl/core/types/constrained/nominal.py +0 -143
  486. v0/relationalai/early_access/dsl/core/types/constrained/subtype.py +0 -124
  487. v0/relationalai/early_access/dsl/core/types/standard.py +0 -92
  488. v0/relationalai/early_access/dsl/core/types/unconstrained.py +0 -50
  489. v0/relationalai/early_access/dsl/core/types/variables.py +0 -203
  490. v0/relationalai/early_access/dsl/ir/compiler.py +0 -318
  491. v0/relationalai/early_access/dsl/ir/executor.py +0 -260
  492. v0/relationalai/early_access/dsl/ontologies/constraints.py +0 -88
  493. v0/relationalai/early_access/dsl/ontologies/export.py +0 -30
  494. v0/relationalai/early_access/dsl/ontologies/models.py +0 -453
  495. v0/relationalai/early_access/dsl/ontologies/python_printer.py +0 -303
  496. v0/relationalai/early_access/dsl/ontologies/readings.py +0 -60
  497. v0/relationalai/early_access/dsl/ontologies/relationships.py +0 -322
  498. v0/relationalai/early_access/dsl/ontologies/roles.py +0 -87
  499. v0/relationalai/early_access/dsl/ontologies/subtyping.py +0 -55
  500. v0/relationalai/early_access/dsl/orm/constraints.py +0 -438
  501. v0/relationalai/early_access/dsl/orm/measures/dimensions.py +0 -200
  502. v0/relationalai/early_access/dsl/orm/measures/initializer.py +0 -16
  503. v0/relationalai/early_access/dsl/orm/measures/measure_rules.py +0 -275
  504. v0/relationalai/early_access/dsl/orm/measures/measures.py +0 -299
  505. v0/relationalai/early_access/dsl/orm/measures/role_exprs.py +0 -268
  506. v0/relationalai/early_access/dsl/orm/models.py +0 -256
  507. v0/relationalai/early_access/dsl/orm/object_oriented_printer.py +0 -344
  508. v0/relationalai/early_access/dsl/orm/printer.py +0 -469
  509. v0/relationalai/early_access/dsl/orm/reasoners.py +0 -480
  510. v0/relationalai/early_access/dsl/orm/relations.py +0 -19
  511. v0/relationalai/early_access/dsl/orm/relationships.py +0 -251
  512. v0/relationalai/early_access/dsl/orm/types.py +0 -42
  513. v0/relationalai/early_access/dsl/orm/utils.py +0 -79
  514. v0/relationalai/early_access/dsl/orm/verb.py +0 -204
  515. v0/relationalai/early_access/dsl/physical_metadata/tables.py +0 -133
  516. v0/relationalai/early_access/dsl/relations.py +0 -170
  517. v0/relationalai/early_access/dsl/rulesets.py +0 -69
  518. v0/relationalai/early_access/dsl/schemas/__init__.py +0 -450
  519. v0/relationalai/early_access/dsl/schemas/builder.py +0 -48
  520. v0/relationalai/early_access/dsl/schemas/comp_names.py +0 -51
  521. v0/relationalai/early_access/dsl/schemas/components.py +0 -203
  522. v0/relationalai/early_access/dsl/schemas/contexts.py +0 -156
  523. v0/relationalai/early_access/dsl/schemas/exprs.py +0 -89
  524. v0/relationalai/early_access/dsl/schemas/fragments.py +0 -464
  525. v0/relationalai/early_access/dsl/serialization.py +0 -79
  526. v0/relationalai/early_access/dsl/serialize/exporter.py +0 -163
  527. v0/relationalai/early_access/dsl/snow/api.py +0 -104
  528. v0/relationalai/early_access/dsl/snow/common.py +0 -76
  529. v0/relationalai/early_access/dsl/state_mgmt/__init__.py +0 -129
  530. v0/relationalai/early_access/dsl/state_mgmt/state_charts.py +0 -125
  531. v0/relationalai/early_access/dsl/state_mgmt/transitions.py +0 -130
  532. v0/relationalai/early_access/dsl/types/__init__.py +0 -40
  533. v0/relationalai/early_access/dsl/types/concepts.py +0 -12
  534. v0/relationalai/early_access/dsl/types/entities.py +0 -135
  535. v0/relationalai/early_access/dsl/types/values.py +0 -17
  536. v0/relationalai/early_access/dsl/utils.py +0 -102
  537. v0/relationalai/early_access/graphs/__init__.py +0 -13
  538. v0/relationalai/early_access/lqp/__init__.py +0 -12
  539. v0/relationalai/early_access/lqp/compiler/__init__.py +0 -12
  540. v0/relationalai/early_access/lqp/constructors/__init__.py +0 -18
  541. v0/relationalai/early_access/lqp/executor/__init__.py +0 -12
  542. v0/relationalai/early_access/lqp/ir/__init__.py +0 -12
  543. v0/relationalai/early_access/lqp/passes/__init__.py +0 -12
  544. v0/relationalai/early_access/lqp/pragmas/__init__.py +0 -12
  545. v0/relationalai/early_access/lqp/primitives/__init__.py +0 -12
  546. v0/relationalai/early_access/lqp/types/__init__.py +0 -12
  547. v0/relationalai/early_access/lqp/utils/__init__.py +0 -12
  548. v0/relationalai/early_access/lqp/validators/__init__.py +0 -12
  549. v0/relationalai/early_access/metamodel/__init__.py +0 -58
  550. v0/relationalai/early_access/metamodel/builtins/__init__.py +0 -12
  551. v0/relationalai/early_access/metamodel/compiler/__init__.py +0 -12
  552. v0/relationalai/early_access/metamodel/dependency/__init__.py +0 -12
  553. v0/relationalai/early_access/metamodel/factory/__init__.py +0 -17
  554. v0/relationalai/early_access/metamodel/helpers/__init__.py +0 -12
  555. v0/relationalai/early_access/metamodel/ir/__init__.py +0 -14
  556. v0/relationalai/early_access/metamodel/rewrite/__init__.py +0 -7
  557. v0/relationalai/early_access/metamodel/typer/__init__.py +0 -3
  558. v0/relationalai/early_access/metamodel/typer/typer/__init__.py +0 -12
  559. v0/relationalai/early_access/metamodel/types/__init__.py +0 -15
  560. v0/relationalai/early_access/metamodel/util/__init__.py +0 -15
  561. v0/relationalai/early_access/metamodel/visitor/__init__.py +0 -12
  562. v0/relationalai/early_access/rel/__init__.py +0 -12
  563. v0/relationalai/early_access/rel/executor/__init__.py +0 -12
  564. v0/relationalai/early_access/rel/rel_utils/__init__.py +0 -12
  565. v0/relationalai/early_access/rel/rewrite/__init__.py +0 -7
  566. v0/relationalai/early_access/solvers/__init__.py +0 -19
  567. v0/relationalai/early_access/sql/__init__.py +0 -11
  568. v0/relationalai/early_access/sql/executor/__init__.py +0 -3
  569. v0/relationalai/early_access/sql/rewrite/__init__.py +0 -3
  570. v0/relationalai/early_access/tests/logging/__init__.py +0 -12
  571. v0/relationalai/early_access/tests/test_snapshot_base/__init__.py +0 -12
  572. v0/relationalai/early_access/tests/utils/__init__.py +0 -12
  573. v0/relationalai/environments/__init__.py +0 -35
  574. v0/relationalai/environments/base.py +0 -381
  575. v0/relationalai/environments/colab.py +0 -14
  576. v0/relationalai/environments/generic.py +0 -71
  577. v0/relationalai/environments/ipython.py +0 -68
  578. v0/relationalai/environments/jupyter.py +0 -9
  579. v0/relationalai/environments/snowbook.py +0 -169
  580. v0/relationalai/errors.py +0 -2455
  581. v0/relationalai/experimental/SF.py +0 -38
  582. v0/relationalai/experimental/inspect.py +0 -47
  583. v0/relationalai/experimental/pathfinder/__init__.py +0 -158
  584. v0/relationalai/experimental/pathfinder/api.py +0 -160
  585. v0/relationalai/experimental/pathfinder/automaton.py +0 -584
  586. v0/relationalai/experimental/pathfinder/bridge.py +0 -226
  587. v0/relationalai/experimental/pathfinder/compiler.py +0 -416
  588. v0/relationalai/experimental/pathfinder/datalog.py +0 -214
  589. v0/relationalai/experimental/pathfinder/diagnostics.py +0 -56
  590. v0/relationalai/experimental/pathfinder/filter.py +0 -236
  591. v0/relationalai/experimental/pathfinder/glushkov.py +0 -439
  592. v0/relationalai/experimental/pathfinder/options.py +0 -265
  593. v0/relationalai/experimental/pathfinder/rpq.py +0 -344
  594. v0/relationalai/experimental/pathfinder/transition.py +0 -200
  595. v0/relationalai/experimental/pathfinder/utils.py +0 -26
  596. v0/relationalai/experimental/paths/api.py +0 -143
  597. v0/relationalai/experimental/paths/benchmarks/grid_graph.py +0 -37
  598. v0/relationalai/experimental/paths/examples/basic_example.py +0 -40
  599. v0/relationalai/experimental/paths/examples/minimal_engine_warmup.py +0 -3
  600. v0/relationalai/experimental/paths/examples/movie_example.py +0 -77
  601. v0/relationalai/experimental/paths/examples/paths_benchmark.py +0 -115
  602. v0/relationalai/experimental/paths/examples/paths_example.py +0 -116
  603. v0/relationalai/experimental/paths/examples/pattern_to_automaton.py +0 -28
  604. v0/relationalai/experimental/paths/find_paths_via_automaton.py +0 -85
  605. v0/relationalai/experimental/paths/graph.py +0 -185
  606. v0/relationalai/experimental/paths/path_algorithms/find_paths.py +0 -280
  607. v0/relationalai/experimental/paths/path_algorithms/one_sided_ball_repetition.py +0 -26
  608. v0/relationalai/experimental/paths/path_algorithms/one_sided_ball_upto.py +0 -111
  609. v0/relationalai/experimental/paths/path_algorithms/single.py +0 -59
  610. v0/relationalai/experimental/paths/path_algorithms/two_sided_balls_repetition.py +0 -39
  611. v0/relationalai/experimental/paths/path_algorithms/two_sided_balls_upto.py +0 -103
  612. v0/relationalai/experimental/paths/path_algorithms/usp-old.py +0 -130
  613. v0/relationalai/experimental/paths/path_algorithms/usp-tuple.py +0 -183
  614. v0/relationalai/experimental/paths/path_algorithms/usp.py +0 -150
  615. v0/relationalai/experimental/paths/product_graph.py +0 -93
  616. v0/relationalai/experimental/paths/rpq/automaton.py +0 -584
  617. v0/relationalai/experimental/paths/rpq/diagnostics.py +0 -56
  618. v0/relationalai/experimental/paths/rpq/rpq.py +0 -378
  619. v0/relationalai/experimental/paths/tests/tests_limit_sp_max_length.py +0 -90
  620. v0/relationalai/experimental/paths/tests/tests_limit_sp_multiple.py +0 -119
  621. v0/relationalai/experimental/paths/tests/tests_limit_sp_single.py +0 -104
  622. v0/relationalai/experimental/paths/tests/tests_limit_walks_multiple.py +0 -113
  623. v0/relationalai/experimental/paths/tests/tests_limit_walks_single.py +0 -149
  624. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_multiple.py +0 -70
  625. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_single.py +0 -64
  626. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_upto_multiple.py +0 -115
  627. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_upto_single.py +0 -75
  628. v0/relationalai/experimental/paths/tests/tests_single_paths.py +0 -152
  629. v0/relationalai/experimental/paths/tests/tests_single_walks.py +0 -208
  630. v0/relationalai/experimental/paths/tests/tests_single_walks_undirected.py +0 -297
  631. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_multiple.py +0 -107
  632. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_single.py +0 -76
  633. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_upto_multiple.py +0 -76
  634. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_upto_single.py +0 -110
  635. v0/relationalai/experimental/paths/tests/tests_usp_nsp_multiple.py +0 -229
  636. v0/relationalai/experimental/paths/tests/tests_usp_nsp_single.py +0 -108
  637. v0/relationalai/experimental/paths/tree_agg.py +0 -168
  638. v0/relationalai/experimental/paths/utilities/iterators.py +0 -27
  639. v0/relationalai/experimental/paths/utilities/prefix_sum.py +0 -91
  640. v0/relationalai/experimental/solvers.py +0 -1087
  641. v0/relationalai/loaders/csv.py +0 -195
  642. v0/relationalai/loaders/loader.py +0 -177
  643. v0/relationalai/loaders/types.py +0 -23
  644. v0/relationalai/rel_emitter.py +0 -373
  645. v0/relationalai/rel_utils.py +0 -185
  646. v0/relationalai/semantics/__init__.py +0 -29
  647. v0/relationalai/semantics/devtools/benchmark_lqp.py +0 -536
  648. v0/relationalai/semantics/devtools/compilation_manager.py +0 -294
  649. v0/relationalai/semantics/devtools/extract_lqp.py +0 -110
  650. v0/relationalai/semantics/internal/internal.py +0 -3785
  651. v0/relationalai/semantics/internal/snowflake.py +0 -324
  652. v0/relationalai/semantics/lqp/builtins.py +0 -16
  653. v0/relationalai/semantics/lqp/compiler.py +0 -22
  654. v0/relationalai/semantics/lqp/constructors.py +0 -68
  655. v0/relationalai/semantics/lqp/executor.py +0 -469
  656. v0/relationalai/semantics/lqp/intrinsics.py +0 -24
  657. v0/relationalai/semantics/lqp/model2lqp.py +0 -839
  658. v0/relationalai/semantics/lqp/passes.py +0 -680
  659. v0/relationalai/semantics/lqp/primitives.py +0 -252
  660. v0/relationalai/semantics/lqp/result_helpers.py +0 -202
  661. v0/relationalai/semantics/lqp/rewrite/annotate_constraints.py +0 -57
  662. v0/relationalai/semantics/lqp/rewrite/cdc.py +0 -216
  663. v0/relationalai/semantics/lqp/rewrite/extract_common.py +0 -338
  664. v0/relationalai/semantics/lqp/rewrite/extract_keys.py +0 -449
  665. v0/relationalai/semantics/lqp/rewrite/function_annotations.py +0 -114
  666. v0/relationalai/semantics/lqp/rewrite/functional_dependencies.py +0 -314
  667. v0/relationalai/semantics/lqp/rewrite/quantify_vars.py +0 -296
  668. v0/relationalai/semantics/lqp/rewrite/splinter.py +0 -76
  669. v0/relationalai/semantics/lqp/types.py +0 -101
  670. v0/relationalai/semantics/lqp/utils.py +0 -160
  671. v0/relationalai/semantics/lqp/validators.py +0 -57
  672. v0/relationalai/semantics/metamodel/__init__.py +0 -40
  673. v0/relationalai/semantics/metamodel/builtins.py +0 -774
  674. v0/relationalai/semantics/metamodel/compiler.py +0 -133
  675. v0/relationalai/semantics/metamodel/dependency.py +0 -862
  676. v0/relationalai/semantics/metamodel/executor.py +0 -61
  677. v0/relationalai/semantics/metamodel/factory.py +0 -287
  678. v0/relationalai/semantics/metamodel/helpers.py +0 -361
  679. v0/relationalai/semantics/metamodel/rewrite/discharge_constraints.py +0 -39
  680. v0/relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +0 -210
  681. v0/relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py +0 -78
  682. v0/relationalai/semantics/metamodel/rewrite/flatten.py +0 -549
  683. v0/relationalai/semantics/metamodel/rewrite/format_outputs.py +0 -165
  684. v0/relationalai/semantics/metamodel/typer/checker.py +0 -353
  685. v0/relationalai/semantics/metamodel/typer/typer.py +0 -1395
  686. v0/relationalai/semantics/metamodel/util.py +0 -505
  687. v0/relationalai/semantics/reasoners/__init__.py +0 -10
  688. v0/relationalai/semantics/reasoners/graph/__init__.py +0 -37
  689. v0/relationalai/semantics/reasoners/graph/core.py +0 -9020
  690. v0/relationalai/semantics/reasoners/optimization/__init__.py +0 -68
  691. v0/relationalai/semantics/reasoners/optimization/common.py +0 -88
  692. v0/relationalai/semantics/reasoners/optimization/solvers_dev.py +0 -568
  693. v0/relationalai/semantics/reasoners/optimization/solvers_pb.py +0 -1163
  694. v0/relationalai/semantics/rel/builtins.py +0 -40
  695. v0/relationalai/semantics/rel/compiler.py +0 -989
  696. v0/relationalai/semantics/rel/executor.py +0 -359
  697. v0/relationalai/semantics/rel/rel.py +0 -482
  698. v0/relationalai/semantics/rel/rel_utils.py +0 -276
  699. v0/relationalai/semantics/snowflake/__init__.py +0 -3
  700. v0/relationalai/semantics/sql/compiler.py +0 -2503
  701. v0/relationalai/semantics/sql/executor/duck_db.py +0 -52
  702. v0/relationalai/semantics/sql/executor/result_helpers.py +0 -64
  703. v0/relationalai/semantics/sql/executor/snowflake.py +0 -145
  704. v0/relationalai/semantics/sql/rewrite/denormalize.py +0 -222
  705. v0/relationalai/semantics/sql/rewrite/double_negation.py +0 -49
  706. v0/relationalai/semantics/sql/rewrite/recursive_union.py +0 -127
  707. v0/relationalai/semantics/sql/rewrite/sort_output_query.py +0 -246
  708. v0/relationalai/semantics/sql/sql.py +0 -504
  709. v0/relationalai/semantics/std/__init__.py +0 -54
  710. v0/relationalai/semantics/std/constraints.py +0 -43
  711. v0/relationalai/semantics/std/datetime.py +0 -363
  712. v0/relationalai/semantics/std/decimals.py +0 -62
  713. v0/relationalai/semantics/std/floats.py +0 -7
  714. v0/relationalai/semantics/std/integers.py +0 -22
  715. v0/relationalai/semantics/std/math.py +0 -141
  716. v0/relationalai/semantics/std/pragmas.py +0 -11
  717. v0/relationalai/semantics/std/re.py +0 -83
  718. v0/relationalai/semantics/std/std.py +0 -14
  719. v0/relationalai/semantics/std/strings.py +0 -63
  720. v0/relationalai/semantics/tests/__init__.py +0 -0
  721. v0/relationalai/semantics/tests/test_snapshot_abstract.py +0 -143
  722. v0/relationalai/semantics/tests/test_snapshot_base.py +0 -9
  723. v0/relationalai/semantics/tests/utils.py +0 -46
  724. v0/relationalai/std/__init__.py +0 -70
  725. v0/relationalai/tools/__init__.py +0 -0
  726. v0/relationalai/tools/cli.py +0 -1940
  727. v0/relationalai/tools/cli_controls.py +0 -1826
  728. v0/relationalai/tools/cli_helpers.py +0 -390
  729. v0/relationalai/tools/debugger.py +0 -183
  730. v0/relationalai/tools/debugger_client.py +0 -109
  731. v0/relationalai/tools/debugger_server.py +0 -302
  732. v0/relationalai/tools/dev.py +0 -685
  733. v0/relationalai/tools/qb_debugger.py +0 -425
  734. v0/relationalai/util/clean_up_databases.py +0 -95
  735. v0/relationalai/util/format.py +0 -123
  736. v0/relationalai/util/list_databases.py +0 -9
  737. v0/relationalai/util/otel_configuration.py +0 -25
  738. v0/relationalai/util/otel_handler.py +0 -484
  739. v0/relationalai/util/snowflake_handler.py +0 -88
  740. v0/relationalai/util/span_format_test.py +0 -43
  741. v0/relationalai/util/span_tracker.py +0 -207
  742. v0/relationalai/util/spans_file_handler.py +0 -72
  743. v0/relationalai/util/tracing_handler.py +0 -34
  744. /relationalai/{semantics/frontend → analysis}/__init__.py +0 -0
  745. {v0/relationalai → relationalai}/analysis/mechanistic.py +0 -0
  746. {v0/relationalai → relationalai}/analysis/whynot.py +0 -0
  747. /relationalai/{shims → auth}/__init__.py +0 -0
  748. {v0/relationalai → relationalai}/auth/jwt_generator.py +0 -0
  749. {v0/relationalai → relationalai}/auth/oauth_callback_server.py +0 -0
  750. {v0/relationalai → relationalai}/auth/token_handler.py +0 -0
  751. {v0/relationalai → relationalai}/auth/util.py +0 -0
  752. {v0/relationalai/clients → relationalai/clients/resources/snowflake}/cache_store.py +0 -0
  753. {v0/relationalai → relationalai}/compiler.py +0 -0
  754. {v0/relationalai → relationalai}/dependencies.py +0 -0
  755. {v0/relationalai → relationalai}/docutils.py +0 -0
  756. {v0/relationalai/analysis → relationalai/early_access}/__init__.py +0 -0
  757. {v0/relationalai → relationalai}/early_access/dsl/__init__.py +0 -0
  758. {v0/relationalai/auth → relationalai/early_access/dsl/adapters}/__init__.py +0 -0
  759. {v0/relationalai/early_access → relationalai/early_access/dsl/adapters/orm}/__init__.py +0 -0
  760. {v0/relationalai → relationalai}/early_access/dsl/adapters/orm/model.py +0 -0
  761. {v0/relationalai/early_access/dsl/adapters → relationalai/early_access/dsl/adapters/owl}/__init__.py +0 -0
  762. {v0/relationalai → relationalai}/early_access/dsl/adapters/owl/model.py +0 -0
  763. {v0/relationalai/early_access/dsl/adapters/orm → relationalai/early_access/dsl/bindings}/__init__.py +0 -0
  764. {v0/relationalai/early_access/dsl/adapters/owl → relationalai/early_access/dsl/bindings/legacy}/__init__.py +0 -0
  765. {v0/relationalai/early_access/dsl/bindings → relationalai/early_access/dsl/codegen}/__init__.py +0 -0
  766. {v0/relationalai → relationalai}/early_access/dsl/constants.py +0 -0
  767. {v0/relationalai → relationalai}/early_access/dsl/core/__init__.py +0 -0
  768. {v0/relationalai → relationalai}/early_access/dsl/core/constraints/__init__.py +0 -0
  769. {v0/relationalai → relationalai}/early_access/dsl/core/constraints/predicate/__init__.py +0 -0
  770. {v0/relationalai → relationalai}/early_access/dsl/core/stack.py +0 -0
  771. {v0/relationalai/early_access/dsl/bindings/legacy → relationalai/early_access/dsl/core/temporal}/__init__.py +0 -0
  772. {v0/relationalai → relationalai}/early_access/dsl/core/utils.py +0 -0
  773. {v0/relationalai/early_access/dsl/codegen → relationalai/early_access/dsl/ir}/__init__.py +0 -0
  774. {v0/relationalai/early_access/dsl/core/temporal → relationalai/early_access/dsl/ontologies}/__init__.py +0 -0
  775. {v0/relationalai → relationalai}/early_access/dsl/ontologies/raw_source.py +0 -0
  776. {v0/relationalai/early_access/dsl/ir → relationalai/early_access/dsl/orm}/__init__.py +0 -0
  777. {v0/relationalai/early_access/dsl/ontologies → relationalai/early_access/dsl/orm/measures}/__init__.py +0 -0
  778. {v0/relationalai → relationalai}/early_access/dsl/orm/reasoner_errors.py +0 -0
  779. {v0/relationalai/early_access/dsl/orm → relationalai/early_access/dsl/physical_metadata}/__init__.py +0 -0
  780. {v0/relationalai/early_access/dsl/orm/measures → relationalai/early_access/dsl/serialize}/__init__.py +0 -0
  781. {v0/relationalai → relationalai}/early_access/dsl/serialize/binding_model.py +0 -0
  782. {v0/relationalai → relationalai}/early_access/dsl/serialize/model.py +0 -0
  783. {v0/relationalai/early_access/dsl/physical_metadata → relationalai/early_access/dsl/snow}/__init__.py +0 -0
  784. {v0/relationalai → relationalai}/early_access/tests/__init__.py +0 -0
  785. {v0/relationalai → relationalai}/environments/ci.py +0 -0
  786. {v0/relationalai → relationalai}/environments/hex.py +0 -0
  787. {v0/relationalai → relationalai}/environments/terminal.py +0 -0
  788. {v0/relationalai → relationalai}/experimental/__init__.py +0 -0
  789. {v0/relationalai → relationalai}/experimental/graphs.py +0 -0
  790. {v0/relationalai → relationalai}/experimental/paths/__init__.py +0 -0
  791. {v0/relationalai → relationalai}/experimental/paths/benchmarks/__init__.py +0 -0
  792. {v0/relationalai → relationalai}/experimental/paths/path_algorithms/__init__.py +0 -0
  793. {v0/relationalai → relationalai}/experimental/paths/rpq/__init__.py +0 -0
  794. {v0/relationalai → relationalai}/experimental/paths/rpq/filter.py +0 -0
  795. {v0/relationalai → relationalai}/experimental/paths/rpq/glushkov.py +0 -0
  796. {v0/relationalai → relationalai}/experimental/paths/rpq/transition.py +0 -0
  797. {v0/relationalai → relationalai}/experimental/paths/utilities/__init__.py +0 -0
  798. {v0/relationalai → relationalai}/experimental/paths/utilities/utilities.py +0 -0
  799. {v0/relationalai/early_access/dsl/serialize → relationalai/loaders}/__init__.py +0 -0
  800. {v0/relationalai → relationalai}/metagen.py +0 -0
  801. {v0/relationalai → relationalai}/metamodel.py +0 -0
  802. {v0/relationalai → relationalai}/rel.py +0 -0
  803. {v0/relationalai → relationalai}/semantics/devtools/__init__.py +0 -0
  804. {v0/relationalai → relationalai}/semantics/internal/__init__.py +0 -0
  805. {v0/relationalai → relationalai}/semantics/internal/annotations.py +0 -0
  806. {v0/relationalai → relationalai}/semantics/lqp/__init__.py +0 -0
  807. {v0/relationalai → relationalai}/semantics/lqp/ir.py +0 -0
  808. {v0/relationalai → relationalai}/semantics/lqp/pragmas.py +0 -0
  809. {v0/relationalai → relationalai}/semantics/lqp/rewrite/__init__.py +0 -0
  810. {v0/relationalai → relationalai}/semantics/metamodel/dataflow.py +0 -0
  811. {v0/relationalai → relationalai}/semantics/metamodel/ir.py +0 -0
  812. {v0/relationalai → relationalai}/semantics/metamodel/rewrite/__init__.py +0 -0
  813. {v0/relationalai → relationalai}/semantics/metamodel/typer/__init__.py +0 -0
  814. {v0/relationalai → relationalai}/semantics/metamodel/types.py +0 -0
  815. {v0/relationalai → relationalai}/semantics/metamodel/visitor.py +0 -0
  816. {v0/relationalai → relationalai}/semantics/reasoners/experimental/__init__.py +0 -0
  817. {v0/relationalai → relationalai}/semantics/rel/__init__.py +0 -0
  818. {v0/relationalai → relationalai}/semantics/sql/__init__.py +0 -0
  819. {v0/relationalai → relationalai}/semantics/sql/executor/__init__.py +0 -0
  820. {v0/relationalai → relationalai}/semantics/sql/rewrite/__init__.py +0 -0
  821. {v0/relationalai/early_access/dsl/snow → relationalai/semantics/tests}/__init__.py +0 -0
  822. {v0/relationalai → relationalai}/semantics/tests/logging.py +0 -0
  823. {v0/relationalai → relationalai}/std/aggregates.py +0 -0
  824. {v0/relationalai → relationalai}/std/dates.py +0 -0
  825. {v0/relationalai → relationalai}/std/graphs.py +0 -0
  826. {v0/relationalai → relationalai}/std/inspect.py +0 -0
  827. {v0/relationalai → relationalai}/std/math.py +0 -0
  828. {v0/relationalai → relationalai}/std/re.py +0 -0
  829. {v0/relationalai → relationalai}/std/strings.py +0 -0
  830. {v0/relationalai/loaders → relationalai/tools}/__init__.py +0 -0
  831. {v0/relationalai → relationalai}/tools/cleanup_snapshots.py +0 -0
  832. {v0/relationalai → relationalai}/tools/constants.py +0 -0
  833. {v0/relationalai → relationalai}/tools/query_utils.py +0 -0
  834. {v0/relationalai → relationalai}/tools/snapshot_viewer.py +0 -0
  835. {v0/relationalai → relationalai}/util/__init__.py +0 -0
  836. {v0/relationalai → relationalai}/util/constants.py +0 -0
  837. {v0/relationalai → relationalai}/util/graph.py +0 -0
  838. {v0/relationalai → relationalai}/util/timeout.py +0 -0
@@ -0,0 +1,2503 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ import logging
5
+ from collections import defaultdict
6
+ from dataclasses import dataclass, field
7
+ from functools import partial
8
+ from itertools import chain
9
+ from typing import Tuple, cast, Optional, Union
10
+ from decimal import Decimal as PyDecimal
11
+
12
+ import math
13
+
14
+ from relationalai.semantics.metamodel.rewrite import (Flatten, ExtractNestedLogicals, DNFUnionSplitter,
15
+ DischargeConstraints)
16
+ from relationalai.semantics.metamodel.visitor import ReadWriteVisitor
17
+ from relationalai.util.graph import topological_sort
18
+ from relationalai.semantics.metamodel import ir, compiler as c, visitor as v, builtins, types, helpers
19
+ from relationalai.semantics.metamodel.typer import Checker, InferTypes, typer
20
+ from relationalai.semantics.metamodel.builtins import from_cdc_annotation, concept_relation_annotation
21
+ from relationalai.semantics.metamodel.types import (Hash, String, Number, Int64, Int128, Bool, Date, DateTime, Float,
22
+ RowId, UInt128)
23
+ from relationalai.semantics.metamodel.util import (FrozenOrderedSet, OrderedSet, frozen, ordered_set, filter_by_type,
24
+ NameCache)
25
+ from relationalai.semantics.sql import sql, rewrite
26
+
27
+
28
+ class Compiler(c.Compiler):
29
+ def __init__(self, skip_denormalization:bool=False):
30
+ rewrites = [
31
+ DischargeConstraints(),
32
+ Checker(),
33
+ ExtractNestedLogicals(), # before InferTypes to avoid extracting casts
34
+ InferTypes(),
35
+ DNFUnionSplitter(),
36
+ Flatten(use_sql=True),
37
+ rewrite.RecursiveUnion(),
38
+ rewrite.DoubleNegation(),
39
+ rewrite.SortOutputQuery()
40
+ ]
41
+ if not skip_denormalization:
42
+ # group updates, compute SCCs, use Sequence to denote their order
43
+ rewrites.append(rewrite.Denormalize())
44
+ super().__init__(rewrites)
45
+ self.model_to_sql = ModelToSQL()
46
+
47
+ def do_compile(self, model: ir.Model, options:dict={}) -> tuple[str, ir.Model]:
48
+ """
49
+ Compile the rewritten model into a SQL string. Returns the SQL string together
50
+ with the rewritten model (after the rewrite passes, before translating to sql).
51
+ """
52
+ return str(self.model_to_sql.to_sql(model, options)), model
53
+
54
+ @dataclass(frozen=True)
55
+ class OutputVar:
56
+ value: ir.Value
57
+ alias: Optional[str] = None
58
+ value_type: Optional[str] = None
59
+ task: Optional[ir.Task] = None
60
+
61
+ @dataclass
62
+ class RelationInfo:
63
+ used: bool = False
64
+ view_selects: list[sql.Select] = field(default_factory=list)
65
+ table_selects: list[sql.Select] = field(default_factory=list)
66
+ dynamic_table_selects: list[sql.Select] = field(default_factory=list)
67
+
68
+ @dataclass
69
+ class ImportSpec:
70
+ value: str
71
+ module: Optional[str] = None # e.g., "scipy.special"
72
+
73
+ def render(self) -> str:
74
+ return f"from {self.module} import {self.value}" if self.module else f"import {self.value}"
75
+
76
+
77
+ @dataclass
78
+ class UDFConfig:
79
+ handler: str
80
+ code: str
81
+ imports: list[ImportSpec] = field(default_factory=list)
82
+ packages: list[str] = field(default_factory=list)
83
+
84
+ @dataclass
85
+ class ModelToSQL:
86
+ """ Generates SQL from an IR Model, assuming the compiler rewrites were done. """
87
+
88
+ _is_duck_db: bool = False
89
+ _warehouse: str = 'MAIN_WH'
90
+ _query_compilation: bool = False
91
+ _default_dynamic_table_target_lag: str = '5 minutes'
92
+ relation_name_cache: NameCache = field(default_factory=NameCache)
93
+ relation_arg_name_cache: NameCache = field(default_factory=NameCache)
94
+ relation_infos: dict[ir.Relation, RelationInfo] = field(default_factory=dict)
95
+ _error_relation_names: set[str] = field(default_factory=lambda: {'Error', 'pyrel_error_attrs'})
96
+
97
+ def to_sql(self, model: ir.Model, options:dict) -> sql.Program:
98
+ self.relation_infos.clear()
99
+ self._is_duck_db = options.get("is_duck_db", False)
100
+ self._warehouse = options.get("warehouse") or self._warehouse
101
+ self._query_compilation = options.get("query_compilation", False)
102
+ self._default_dynamic_table_target_lag = (options.get("default_dynamic_table_target_lag") or
103
+ self._default_dynamic_table_target_lag)
104
+ return sql.Program(self._sort_dependencies(self._union_output_selects(self._generate_statements(model))))
105
+
106
+ def _generate_statements(self, model: ir.Model) -> list[sql.Node]:
107
+ table_relations, used_builtins = self._get_relations(model)
108
+
109
+ self._register_relation_args(table_relations)
110
+ self._register_external_relations(model)
111
+
112
+ statements: list[sql.Node] = []
113
+ # 1. Process root logical body
114
+ root = cast(ir.Logical, model.root)
115
+ for child in root.body:
116
+ if isinstance(child, ir.Logical):
117
+ statements.extend(self._create_statement(cast(ir.Logical, child)))
118
+ elif isinstance(child, ir.Union):
119
+ statements.append(self._create_recursive_view(cast(ir.Union, child)))
120
+
121
+ relation_selects = {
122
+ relation: info.dynamic_table_selects + info.view_selects + info.table_selects
123
+ for relation, info in self.relation_infos.items()
124
+ if info.dynamic_table_selects or info.view_selects or info.table_selects
125
+ }
126
+
127
+ # 3. Handle each relation with proper priority
128
+ for relation, selects in relation_selects.items():
129
+ table_name = self._relation_name(relation)
130
+
131
+ info = self._get_relation_info(relation)
132
+ if info.table_selects:
133
+ # Relation is a table → insert into it
134
+ columns = [self._var_name(relation.id, f) for f in relation.fields]
135
+ if len(selects) == 1:
136
+ statements.append(sql.Insert(table_name, columns, [], selects[0]))
137
+ else:
138
+ statements.append(sql.Insert(table_name, columns, [],
139
+ sql.CTE(False, f"{table_name}_cte", columns, selects, True)))
140
+ elif info.view_selects:
141
+ statements.append(sql.CreateView(table_name, selects))
142
+ else:
143
+ # Snowflake currently has issues when using DISTINCT together with UNION in a Dynamic Table.
144
+ # As a workaround, we generate a CTE without DISTINCT, using UNION ALL.
145
+ # Then, we create a dynamic table with `SELECT DISTINCT * FROM CTE` to remove duplicates.
146
+ columns = [self._var_name(relation.id, f) for f in relation.fields]
147
+ statements.append(
148
+ sql.CreateDynamicTable(
149
+ table_name,
150
+ sql.CTE(False, f"{table_name}_cte", columns, selects, True),
151
+ self._default_dynamic_table_target_lag,
152
+ self._warehouse
153
+ )
154
+ )
155
+
156
+ # 4. Create physical tables for explicitly declared table relations
157
+ for relation in table_relations:
158
+ info = self.relation_infos.get(relation)
159
+ if info is None or info.table_selects:
160
+ statements.append(self._create_table(relation))
161
+
162
+ #5. Create Snowflake user-defined functions
163
+ if not self._is_duck_db:
164
+ statements.extend(self._create_user_defined_functions(used_builtins))
165
+
166
+ return statements
167
+
168
+ #--------------------------------------------------
169
+ # SQL Generation
170
+ #--------------------------------------------------
171
+ def _create_table(self, r: ir.Relation) -> sql.Node:
172
+ return sql.CreateTable(
173
+ sql.Table(self._relation_name(r),
174
+ list(map(lambda f: sql.Column(self._var_name(r.id, f), self._convert_type(f.type)), r.fields))
175
+ ), self._query_compilation)
176
+
177
+ def _create_recursive_view(self, union: ir.Union) -> sql.Node:
178
+ assert len(union.tasks) >= 2, f"Recursive CTE requires at least 2 tasks (anchor + recursive), but got {len(union.tasks)}."
179
+ assert all(isinstance(task, ir.Logical) for task in union.tasks), (
180
+ "All tasks in a recursive CTE must be of type `ir.Logical`. "
181
+ f"Invalid types: {[type(task).__name__ for task in union.tasks if not isinstance(task, ir.Logical)]}"
182
+ )
183
+
184
+ def make_case_select(logical: ir.Logical):
185
+ # TODO - assuming a single update per case
186
+ update = v.collect_by_type(ir.Update, logical).some()
187
+
188
+ # Rewrite relation references for recursive lookups
189
+ old_relation = update.relation
190
+ new_relation = ir.Relation(f"{old_relation.name}_rec", old_relation.fields, frozen(), frozen())
191
+ rlr = RecursiveLookupsRewriter(old_relation, new_relation)
192
+ result = rlr.walk(logical)
193
+
194
+ # TODO - improve the typing info to avoid these casts
195
+ nots = cast(list[ir.Not], filter_by_type(result.body, ir.Not))
196
+ unions = cast(list[ir.Union], filter_by_type(result.body, ir.Union))
197
+ lookups = cast(list[ir.Lookup], filter_by_type(result.body, ir.Lookup))
198
+ constructs = cast(list[ir.Construct], filter_by_type(result.body, ir.Construct))
199
+
200
+ aliases = []
201
+ for i, arg in enumerate(update.args):
202
+ relation_field = old_relation.fields[i]
203
+ field_type = self._convert_type(relation_field.type)
204
+ aliases.append(OutputVar(arg, self._var_name(old_relation.id, relation_field), value_type=field_type))
205
+
206
+ return self._make_select(lookups, aliases, nots, unions, constructs)
207
+
208
+ # get a representative update
209
+ update = v.collect_by_type(ir.Update, union).some()
210
+
211
+ relation = update.relation
212
+ self.mark_used(relation)
213
+ return sql.CreateView(
214
+ self._relation_name(relation),
215
+ sql.CTE(
216
+ True,
217
+ f"{self._relation_name(relation)}_rec",
218
+ [self._var_name(relation.id, field) for field in update.relation.fields],
219
+ [
220
+ make_case_select(cast(ir.Logical, task))
221
+ for task in union.tasks
222
+ ]
223
+ )
224
+ )
225
+
226
+ def _create_user_defined_functions(self, relations: list[ir.Relation]) -> list[sql.CreateFunction]:
227
+ # Central UDF metadata configuration
228
+ udf_relations: dict[str, UDFConfig] = {
229
+ builtins.acot.name: UDFConfig(
230
+ handler="compute",
231
+ imports=[ImportSpec("math")],
232
+ code="""def compute(x): return math.atan(1 / x) if x != 0 else math.copysign(math.pi / 2, x)"""
233
+ ),
234
+ builtins.erf.name: UDFConfig(
235
+ handler="compute",
236
+ imports=[ImportSpec("math")],
237
+ code="""def compute(x): return math.erf(x)"""
238
+ ),
239
+ builtins.erfinv.name: UDFConfig(
240
+ handler="compute",
241
+ imports=[ImportSpec("erfinv", module="scipy.special")],
242
+ packages=["'scipy'"],
243
+ code="""def compute(x): return erfinv(x)"""
244
+ )
245
+ }
246
+
247
+ statements: list[sql.CreateFunction] = []
248
+
249
+ for r in relations:
250
+ meta = udf_relations.get(r.name)
251
+ if not meta:
252
+ continue
253
+
254
+ # Split relation fields into inputs and return type
255
+ # We expect a single return argument per builtin relation
256
+ return_type = None
257
+ input_columns: list[sql.Column] = []
258
+ for f in r.fields:
259
+ if f.input:
260
+ input_columns.append(sql.Column(self._var_name(r.id, f), self._convert_type(f.type)))
261
+ else:
262
+ return_type = self._convert_type(f.type)
263
+
264
+ # Build a full code block (imports + code)
265
+ imports_code = "\n".join(imp.render() for imp in meta.imports)
266
+ python_block = "\n".join(part for part in (imports_code, meta.code) if part)
267
+
268
+ assert return_type, f"No return type found for relation '{r.name}'"
269
+ statements.append(
270
+ sql.CreateFunction(
271
+ name=r.name,
272
+ inputs=input_columns,
273
+ return_type=return_type,
274
+ handler=meta.handler,
275
+ body=python_block,
276
+ packages=meta.packages
277
+ )
278
+ )
279
+
280
+ return statements
281
+
282
+ def _create_statement(self, task: ir.Logical):
283
+
284
+ # TODO - improve the typing info to avoid these casts
285
+ nots = cast(list[ir.Not], filter_by_type(task.body, ir.Not))
286
+ lookups = cast(list[ir.Lookup], filter_by_type(task.body, ir.Lookup))
287
+ updates = cast(list[ir.Update], filter_by_type(task.body, ir.Update))
288
+ outputs = cast(list[ir.Output], filter_by_type(task.body, ir.Output))
289
+ logicals = cast(list[ir.Logical], filter_by_type(task.body, ir.Logical))
290
+ constructs = cast(list[ir.Construct], filter_by_type(task.body, ir.Construct))
291
+ ranks = cast(list[ir.Rank], filter_by_type(task.body, ir.Rank))
292
+ aggs = cast(list[ir.Aggregate], filter_by_type(task.body, ir.Aggregate))
293
+ unions = cast(list[ir.Union], filter_by_type(task.body, ir.Union))
294
+
295
+ var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
296
+
297
+ statements = []
298
+ if updates and not lookups and not nots and not aggs and not logicals and not unions:
299
+ for u in updates:
300
+ r = u.relation
301
+ if r == builtins.raw_source:
302
+ lang, src = u.args[0], u.args[1]
303
+ if not (isinstance(lang, str) and lang.lower() == "sql"):
304
+ logging.warning(f"Unsupported language for RawSource: {lang}")
305
+ continue
306
+ if not isinstance(src, str):
307
+ raise Exception(f"Expected SQL source to be a string, got: {type(src).__name__}")
308
+ statements.append(sql.RawSource(src))
309
+ else:
310
+ # Generate select with static values: SELECT hash(V1, ...), V2, V3
311
+ # We need to use `SELECT` instead of `VALUES` because Snowflake parses and restricts certain expressions in VALUES(...).
312
+ # Built-in functions like HASH() or MD5() are often rejected unless used in SELECT.
313
+ for values in self._get_tuples(task, u):
314
+ output_vars = [
315
+ sql.VarRef(str(value), alias=self._var_name(r.id, f))
316
+ for f, value in zip(r.fields, values)
317
+ ]
318
+ self.add_table_select(r, sql.Select(False, output_vars))
319
+ elif lookups or outputs or nots or aggs or updates:
320
+ # Some of the lookup relations we wrap into logical and we need to get them out for the SQL compilation.
321
+ # For example QB `decimal(0)` in IR will look like this:
322
+ # Logical ^[res]
323
+ # Exists(vDecimal128)
324
+ # Logical
325
+ # cast(Decimal128, 0, vDecimal128)
326
+ # decimal128(vDecimal128, res)
327
+ unions = self._extract_all_of_type_from_logical(task, ir.Union) if logicals else unions
328
+ all_lookups = self._extract_all_of_type_from_logical(task, ir.Lookup) if logicals else lookups
329
+
330
+ var_to_union = {
331
+ a: u
332
+ for u in unions
333
+ for t in u.tasks
334
+ if isinstance(t, ir.Lookup)
335
+ for a in t.args
336
+ if isinstance(a, ir.Var)
337
+ } if unions else {}
338
+
339
+ if updates:
340
+ # insert values that match a query: INSERT INTO ... SELECT ... FROM ... WHERE ...
341
+ for u in updates:
342
+ r = u.relation
343
+ if self._is_error_relation(r):
344
+ # TODO: revisit this during `RAI-39124`. For now we filter out all error relations.
345
+ continue
346
+ # We shouldn’t create or populate tables for value types that can be directly sourced from existing Snowflake tables.
347
+ if not self._is_value_type_population_relation(r):
348
+ if all_lookups and all(builtins.is_builtin(lookup.relation) for lookup in all_lookups):
349
+ # Assuming static values insert when you have only builtin lookups (like `cast`, etc.) and you do not have table lookups.
350
+ aliases = self._get_update_aliases(u, var_to_construct, var_to_union, True)
351
+ select = self._make_select(all_lookups, aliases, nots, unions, constructs)
352
+ self.add_table_select(r, select)
353
+ else:
354
+ select = None
355
+ drv = DerivedRelationsVisitor()
356
+ task.accept(drv)
357
+ if aggs:
358
+ # After flatten it can be only one aggregation per rule.
359
+ select = self._make_agg_select(u, all_lookups, aggs[0], nots, unions, constructs)
360
+ elif ranks:
361
+ # After flatten it can be only one rank per rule.
362
+ select = self._make_rank_select(u, all_lookups, ranks[0], nots, unions, constructs)
363
+ else:
364
+ # Snowflake currently has issues when using DISTINCT together with UNION in a Dynamic Table.
365
+ # That is why we generate statements without DISTINCT, and we remove duplicates later
366
+ # by using CTE + DISTINCT to declare the Dynamic Tables
367
+ distinct = True if self._is_duck_db or not drv.is_derived() else False
368
+ aliases = self._get_update_aliases(u, var_to_construct, var_to_union)
369
+
370
+ if not unions:
371
+ select = self._make_select(all_lookups, aliases, nots, unions, constructs, distinct)
372
+ elif lookups:
373
+ select = self._make_match_select(all_lookups, aliases, unions, nots, constructs, distinct)
374
+ else:
375
+ select = self._make_full_outer_join_select(aliases, unions, constructs, distinct)
376
+
377
+ if drv.is_derived() and not self._is_duck_db:
378
+ self.add_dynamic_table_select(r, select)
379
+ else:
380
+ self.add_view_select(r, select)
381
+ elif outputs:
382
+ # output a query: SELECT ... FROM ... WHERE ...
383
+ aliases = []
384
+ distinct = False
385
+ for output in outputs:
386
+ distinct = distinct or output.keys is None
387
+ for key, arg in output.aliases:
388
+ aliases.append(self._get_alias(key, arg, None, var_to_construct, var_to_union))
389
+
390
+ if not unions:
391
+ if all(builtins.is_builtin(lookup.relation) for lookup in all_lookups):
392
+ # Example:
393
+ # QB: select(1).where(Foo(1) == Bar(1))
394
+ # IR:
395
+ # Logical
396
+ # 1::Foo = 1::Bar
397
+ # -> output(1 as 'v')
398
+ select = self._make_select(all_lookups, aliases, nots, unions, constructs, distinct, True)
399
+ else:
400
+ select = self._make_left_outer_join_select(task, all_lookups, aliases, nots, constructs, distinct)
401
+ elif lookups:
402
+ select = self._make_match_select(all_lookups, aliases, unions, nots, constructs, distinct, True)
403
+ else:
404
+ select = self._make_full_outer_join_select(aliases, unions, constructs, distinct, True)
405
+
406
+ statements.append(select)
407
+ elif logicals:
408
+ for logical in logicals:
409
+ statements.extend(self._create_statement(logical))
410
+ elif not updates and not outputs:
411
+ # Example:
412
+ # QB:
413
+ # (
414
+ # where(Person.age >= 65).define(Senior(Person)) |
415
+ # where(Person.age >= 18).define(Adult(Person)) |
416
+ # define(Child(Person))
417
+ # )
418
+ # After `flatten` IR will look like this:
419
+ # Logical
420
+ # Union
421
+ # _match_7(person_7)
422
+ # _match_8(person_7)
423
+ # _match_9(person_7)
424
+ #
425
+ # Nothing to query or define, we need to skip this task.
426
+ return statements
427
+ else:
428
+ raise Exception(f"Cannot create SQL statement for:\n{task}")
429
+ return statements
430
+
431
+ def _make_agg_select(self, update: ir.Update, lookups: list[ir.Lookup], agg: ir.Aggregate,
432
+ nots: Optional[list[ir.Not]] = None, unions: Optional[list[ir.Union]] = None,
433
+ constructs: Optional[list[ir.Construct]] = None) -> sql.Select:
434
+
435
+ """
436
+ Generate a SQL SELECT for an aggregation using a DISTINCT subquery.
437
+
438
+ Example output:
439
+ SELECT
440
+ department, count(v) AS v
441
+ FROM (
442
+ SELECT DISTINCT
443
+ v0.department, v0.employees AS v
444
+ FROM
445
+ department_employees AS v0,
446
+ Department AS v1
447
+ WHERE
448
+ v0.department = v1.department
449
+ ) GROUP BY department;
450
+
451
+ Rationale:
452
+ In the IR, it’s not always explicit whether aggregation should be applied over distinct rows.
453
+ By wrapping the aggregation in a DISTINCT subquery, we ensure correctness regardless of whether
454
+ the original query used `count(...)` or `count(distinct ...)`.
455
+
456
+ Compare:
457
+
458
+ QB: select(count(Person.name))
459
+ IR:
460
+ Logical
461
+ Logical ^[name=None, person_4=None]
462
+ Person(person_4)
463
+ name(person_4, name)
464
+ count([person_4, name], [], [v])
465
+ -> derive _aggregate_1(v)
466
+
467
+ QB: select(count(distinct Person.name))
468
+ IR:
469
+ Logical
470
+ Logical ^[name=None]
471
+ Person(person_4)
472
+ name(person_4, name)
473
+ count([name], [], [v])
474
+ -> derive _aggregate_1(v)
475
+
476
+ Note:
477
+ The key difference is that in the `distinct` case, the grouping variable `person_4` is absent from the projection.
478
+ The subquery pattern unifies both cases by projecting all aggregation arguments, ensuring correctness.
479
+ """
480
+
481
+ seen_args = set()
482
+ outputs: list[Union[sql.VarRef, sql.RowNumberVar, int]] = []
483
+ sub_query_outputs: list[OutputVar] = []
484
+
485
+ relation = update.relation
486
+ agg_var = agg.args[0] if agg.aggregation == builtins.count else agg.args[1]
487
+ # Group across all non-aggregated variables.
488
+ group_by: list[sql.VarRef] = []
489
+
490
+ for i, arg in enumerate(update.args):
491
+ if arg not in seen_args:
492
+ relation_field = relation.fields[i]
493
+ field_type = self._convert_type(relation_field.type)
494
+ field_name = self._var_name(relation.id, relation_field)
495
+ if isinstance(arg, ir.Var) and arg == agg_var:
496
+ outputs.append(sql.VarRef(f"{agg.aggregation.name}({field_name})", alias=field_name, type=field_type))
497
+ sub_query_outputs.append(OutputVar(arg, field_name, task=agg))
498
+ else:
499
+ group_by.append(sql.VarRef(field_name))
500
+ outputs.append(sql.VarRef(field_name, alias=field_name, type=field_type))
501
+ sub_query_outputs.append(OutputVar(arg, field_name))
502
+ seen_args.add(arg)
503
+
504
+ for arg in agg.projection:
505
+ if arg not in seen_args:
506
+ if agg.aggregation == builtins.count and arg == agg.projection[-1]:
507
+ continue
508
+ sub_query_outputs.append(OutputVar(value=arg))
509
+ seen_args.add(arg)
510
+
511
+ sub_select = self._make_select(lookups, sub_query_outputs, nots, unions, constructs, True)
512
+
513
+ return sql.Select(False, outputs, sub_select, group_by=group_by)
514
+
515
+ def _make_rank_select(self, update: ir.Update, lookups: list[ir.Lookup], rank: ir.Rank,
516
+ nots: Optional[list[ir.Not]] = None, unions: Optional[list[ir.Union]] = None,
517
+ constructs: Optional[list[ir.Construct]] = None):
518
+
519
+ """
520
+ Generate a SQL SELECT for a rank using a DISTINCT subquery.
521
+
522
+ Example output:
523
+ SELECT
524
+ cat, name, ROW_NUMBER() OVER ( ORDER BY name ASC ) as v
525
+ FROM (
526
+ SELECT DISTINCT
527
+ v0.cat, v1.name
528
+ FROM
529
+ Cat AS v0, cat_name AS v1
530
+ WHERE
531
+ v0.cat = v1.cat
532
+ ) ORDER BY v LIMIT 10;
533
+
534
+ Rationale:
535
+ In the IR, it’s not always explicit whether rank should be applied over distinct rows.
536
+ By wrapping the rank in a DISTINCT subquery, we ensure correctness regardless of whether
537
+ the original query used `rank(...)` or `rank(distinct ...)`.
538
+
539
+ Compare:
540
+
541
+ QB: select(rank(Cat.name))
542
+ IR:
543
+ Logical
544
+ Cat(cat_5)
545
+ name(cat_5, name)
546
+ rank([cat_5], [], [name'↑'], v)
547
+ -> derive _rank_1(cat_5, name, v)
548
+
549
+ QB: select(rank(distinct(Cat.name)))
550
+ IR:
551
+ Logical
552
+ Cat(cat_5)
553
+ name(cat_5, name)
554
+ rank([], [], [name'↑'], v)
555
+ -> derive _rank_1(name, v)
556
+
557
+ Note:
558
+ The key difference is that in the `distinct` case, the grouping variable `cat_5` is absent from the projection.
559
+ The subquery pattern unifies both cases by projecting all rank arguments, ensuring correctness.
560
+ """
561
+
562
+ seen_args = set()
563
+ outputs: list[Union[sql.VarRef, sql.RowNumberVar, int]] = []
564
+ sub_query_outputs: list[OutputVar] = []
565
+
566
+ order_by_vars = []
567
+ for arg, is_ascending in zip(rank.args, rank.arg_is_ascending):
568
+ order_by_vars.append(sql.OrderByVar(arg.name, is_ascending))
569
+ partition_by_vars = [arg.name for arg in rank.group] if rank.group else []
570
+
571
+ relation = update.relation
572
+
573
+ rank_result_field_name = None
574
+ for i, arg in enumerate(update.args):
575
+ if arg not in seen_args:
576
+ relation_field = relation.fields[i]
577
+ field_type = self._convert_type(relation_field.type)
578
+ field_name = self._var_name(relation.id, relation_field)
579
+ if isinstance(arg, ir.Var) and arg == rank.result:
580
+ rank_result_field_name = field_name
581
+ outputs.append(sql.RowNumberVar(order_by_vars, partition_by_vars, field_name, field_type))
582
+ else:
583
+ outputs.append(sql.VarRef(field_name, alias=field_name, type=field_type))
584
+ sub_query_outputs.append(OutputVar(arg, field_name))
585
+ seen_args.add(arg)
586
+
587
+ for arg in rank.projection:
588
+ if arg not in seen_args:
589
+ sub_query_outputs.append(OutputVar(value=arg))
590
+ seen_args.add(arg)
591
+
592
+ sub_select = self._make_select(lookups, sub_query_outputs, nots, unions, constructs, True)
593
+
594
+ assert rank_result_field_name is not None, "Rank result variable not found in update.args."
595
+ return sql.Select(False, outputs, sub_select, order_by=[sql.VarRef(rank_result_field_name)], limit=rank.limit)
596
+
597
+ def _make_match_select(self, lookups: list[ir.Lookup], outputs: list[OutputVar], unions: list[ir.Union],
598
+ nots: Optional[list[ir.Not]] = None, constructs: Optional[list[ir.Construct]] = None,
599
+ distinct: bool = False, is_output: bool = False):
600
+
601
+ """
602
+ Generate a SQL SELECT statement representing a match operation.
603
+
604
+ Example output:
605
+ SELECT
606
+ COALESCE(v2.v0, v3.v0) as v0, v0.name, COALESCE(v4.v0, v5.v0) as v02
607
+ FROM
608
+ person_name AS v0
609
+ JOIN Person AS v1 ON v0.person = v1.person
610
+ LEFT OUTER JOIN _match_3 AS v2 ON v1.person = v2.person
611
+ LEFT OUTER JOIN _match_4 AS v3 ON v1.person = v3.person
612
+ LEFT OUTER JOIN _match_5 AS v4 ON v1.person = v4.person
613
+ LEFT OUTER JOIN _match_6 AS v5 ON v1.person = v5.person
614
+ WHERE
615
+ ( v2.person IS NOT NULL OR v3.person IS NOT NULL ) AND
616
+ ( v4.person IS NOT NULL OR v5.person IS NOT NULL );
617
+
618
+ Explanation:
619
+ This query performs a series of joins to gather matching records based on shared keys (`person`).
620
+ - INNER JOINs are used for mandatory relations (e.g. `Person`, `person_name`).
621
+ - LEFT OUTER JOINs are used to include optional match sets from auxiliary `_match_*` tables.
622
+ - `COALESCE(expr1, expr2, ...)` is used to merge values from multiple sources,
623
+ returning the first non-null value among the arguments (or NULL if all are null).
624
+ This is particularly useful for flattening results from union-style matches and preserving partial matches
625
+ in a single SELECT clause.
626
+ """
627
+
628
+ var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
629
+
630
+ union_lookups: dict[ir.Union, OrderedSet[ir.Lookup]] = self._extract_all_lookups_per_union(unions)
631
+
632
+ table_lookups = OrderedSet.from_iterable(t for t in lookups if not builtins.is_builtin(t.relation))
633
+ froms, joins, wheres, sql_vars, var_column, var_lookups = self._extract_match_lookups_metadata(table_lookups, union_lookups)
634
+
635
+ builtin_lookups = OrderedSet.from_iterable(t for t in lookups if builtins.is_builtin(t.relation))
636
+ builtin_vars, builtin_wheres, builtin_table_expressions = (
637
+ self._resolve_builtins(builtin_lookups, var_lookups, var_column, sql_vars, var_to_construct, outputs))
638
+
639
+ froms.extend(self._process_builtin_table_expressions(builtin_table_expressions))
640
+
641
+ wheres.extend(builtin_wheres)
642
+
643
+ construct_wheres = self._process_constructs(table_lookups, var_lookups, var_column, sql_vars, builtin_vars,
644
+ var_to_construct)
645
+ wheres.extend(construct_wheres)
646
+
647
+ not_null_vars, vars = self._generate_select_output(outputs, builtin_vars, sql_vars, var_column, var_lookups,
648
+ var_to_construct)
649
+
650
+ if not_null_vars:
651
+ wheres.extend(sql.NotNull(var) for var in not_null_vars)
652
+
653
+ not_exists, _ = self._generate_select_nots(nots, var_lookups, sql_vars, var_column, len(sql_vars))
654
+ wheres.extend(not_exists)
655
+
656
+ where = self._process_wheres_clauses(wheres)
657
+
658
+ return sql.Select(distinct, vars, froms, where, joins, is_output=is_output)
659
+
660
+ def _make_full_outer_join_select(self, outputs: list[OutputVar], unions: list[ir.Union],
661
+ constructs: Optional[list[ir.Construct]] = None, distinct: bool = False,
662
+ is_output: bool = False):
663
+
664
+ """
665
+ Generate a SQL SELECT statement representing a match operation that combines multiple sets of data
666
+ (using FULL OUTER JOINs), without additional lookup filtering.
667
+
668
+ This method is used when the input IR (Intermediate Representation) does not contain table lookups
669
+ but consists of `Union` operations grouped under a `Logical` node. The goal is to preserve all values
670
+ from each union input while aligning their corresponding fields via outer joins.
671
+
672
+ IR Example:
673
+ Logical
674
+ Logical ^[v0=None]
675
+ Union ^[v0]
676
+ _match_10(v0)
677
+ _match_11(v0)
678
+ Logical ^[v0_2=None]
679
+ Union ^[v0_2]
680
+ _match_12(v0_2)
681
+ _match_13(v0_2)
682
+ -> output(v0, v0_2 as 'v02')
683
+
684
+ This corresponds to an output schema with two final fields:
685
+ - `v0`, derived from `_match_10` and `_match_11`
686
+ - `v02`, derived from `_match_12` and `_match_13`
687
+
688
+ Example output:
689
+ SELECT DISTINCT
690
+ COALESCE(v0.v0, v1.v0) as v0, COALESCE(v2.v0, v3.v0) as v02
691
+ FROM
692
+ _match_10 AS v0
693
+ FULL OUTER JOIN _match_11 AS v1 ON TRUE
694
+ FULL OUTER JOIN _match_12 AS v2 ON TRUE
695
+ FULL OUTER JOIN _match_13 AS v3 ON TRUE;
696
+
697
+ Explanation:
698
+ - Each `Union` is compiled into one or more subqueries (e.g. `_match_10`, `_match_11`) that may represent
699
+ disjoint subsets of data.
700
+ - These are combined using `FULL OUTER JOIN` to retain all possible values from each side, including `NULL`s.
701
+ - `COALESCE()` is used to merge values from the joined tables into a single column per output field.
702
+ - This strategy ensures completeness when different subsets may contain different keys or match results.
703
+ """
704
+
705
+ var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
706
+
707
+ union_lookups: dict[ir.Union, OrderedSet[ir.Lookup]] = self._extract_all_lookups_per_union(unions)
708
+ froms, joins, wheres, sql_vars, var_column, var_lookups = self._extract_union_lookups_metadata(union_lookups)
709
+
710
+ not_null_vars, vars = self._generate_select_output(outputs, {}, sql_vars, var_column, var_lookups,
711
+ var_to_construct)
712
+
713
+ if not_null_vars:
714
+ wheres.extend(sql.NotNull(var) for var in not_null_vars)
715
+
716
+ where = self._process_wheres_clauses(wheres)
717
+
718
+ return sql.Select(distinct, vars, froms, where, joins, is_output=is_output)
719
+
720
+ def _make_left_outer_join_select(self, task: ir.Logical, lookups: list[ir.Lookup], outputs: list[OutputVar],
721
+ nots: Optional[list[ir.Not]] = None, constructs: Optional[list[ir.Construct]] = None,
722
+ distinct: bool = False) -> sql.Select:
723
+
724
+ """
725
+ Generate a SQL SELECT statement from an output query by combining INNER JOIN and LEFT OUTER JOIN clauses
726
+ based on the IR structure.
727
+
728
+ ### JOIN Rules:
729
+
730
+ 1. **Top-level lookups** (direct children of the root `Logical`) always use **INNER JOIN**.
731
+
732
+ 2. **LEFT OUTER JOIN** is used for a lookup if:
733
+ - It appears inside a nested `Logical`, and
734
+ - The corresponding variable is hoisted with a `None` value in that `Logical`.
735
+ - Example: `id(student, id)` is translated as a LEFT OUTER JOIN if the `Logical` hoists `id=None`.
736
+
737
+ 3. If a variable is hoisted with `None` in one `Logical`, but used in another lookup that is hoisted without `None`,
738
+ the corresponding join becomes **INNER JOIN**.
739
+ - This resolves ambiguity when a lookup's output variable is reused meaningfully elsewhere.
740
+
741
+ ---
742
+
743
+ ### IR Example 1 (with LEFT OUTER JOIN):
744
+
745
+ IR:
746
+ Logical
747
+ Logical
748
+ Student(student)
749
+ goes_at(student, school)
750
+ subject(school, subject)
751
+ desc(subject, desc)
752
+ desc = "English"
753
+ Logical ^[id=None]
754
+ id(student, id)
755
+ Logical ^[name=None, course=None]
756
+ attends(student, course)
757
+ instructor(course, instructor)
758
+ name(instructor, name)
759
+ -> output[student, course, subject](id, name, desc)
760
+
761
+ SQL Output:
762
+ SELECT
763
+ v0.id, v3.name, v7.desc
764
+ FROM
765
+ Student AS v4
766
+ JOIN student_goes_at AS v5 ON v4.student = v5.student
767
+ JOIN school_subject AS v6 ON v5.school = v6.school
768
+ JOIN subject_desc AS v7 ON v6.subject = v7.subject
769
+ LEFT OUTER JOIN student_id AS v0 ON v5.student = v0.student
770
+ LEFT OUTER JOIN student_attends AS v1 ON v5.student = v1.student
771
+ LEFT OUTER JOIN course_instructor AS v2 ON v1.course = v2.course
772
+ LEFT OUTER JOIN instructor_name AS v3 ON v2.instructor = v3.instructor
773
+ WHERE
774
+ v7.desc = 'English';
775
+
776
+ ---
777
+
778
+ ### IR Example 2 (with NOT EXISTS):
779
+
780
+ IR:
781
+ Logical
782
+ Not
783
+ Logical
784
+ Logical ^[person, age]
785
+ _union_1(person, age)
786
+ Person(person)
787
+ Logical ^[name=None]
788
+ name(person, name)
789
+ Logical ^[age=None]
790
+ age(person, age)
791
+ -> output[person](name, age)
792
+
793
+ Note: Even though `age` is hoisted with `None`, it is also used in `_union_1` which is hoisted without `None`
794
+ (i.e., `^[person, age]`). Therefore, `age(person, age)` is compiled as an INNER JOIN.
795
+
796
+ SQL Output:
797
+ SELECT
798
+ v0.name,
799
+ v1.age
800
+ FROM
801
+ Person AS v2
802
+ JOIN person_name AS v0 ON v2.person = v0.person
803
+ JOIN person_age AS v1 ON v2.person = v1.person
804
+ WHERE
805
+ NOT EXISTS ( SELECT 1 FROM _union_1 AS v3 WHERE v3.person = v0.person AND v3.age = v1.age );
806
+
807
+ ---
808
+ """
809
+
810
+ var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
811
+
812
+ table_lookups = OrderedSet.from_iterable(t for t in lookups if not builtins.is_builtin(t.relation))
813
+ froms, joins, wheres, sql_vars, var_column, var_lookups = (
814
+ self._extract_left_outer_joins_lookups_metadata(task, table_lookups, nots))
815
+
816
+ builtin_lookups = OrderedSet.from_iterable(t for t in lookups if builtins.is_builtin(t.relation))
817
+ builtin_vars, builtin_wheres, builtin_table_expressions = (
818
+ self._resolve_builtins(builtin_lookups, var_lookups, var_column, sql_vars, var_to_construct, outputs))
819
+
820
+ # SF in case of `LEFT OUTER JOIN` and `ARRAY_GENERATE_RANGE` doesn't allow usage of `ON TRUE` but
821
+ # for DuckDB this is mandatory that is why we have 2 different join classes.
822
+ make_join = (lambda e, a: sql.Join(e, a)) if self._is_duck_db else (lambda e, a: sql.JoinWithoutCondition(e, a))
823
+ joins.extend(make_join(expr, alias) for alias, expr in builtin_table_expressions.items())
824
+
825
+ wheres.extend(builtin_wheres)
826
+
827
+ construct_wheres = self._process_constructs(table_lookups, var_lookups, var_column, sql_vars, builtin_vars,
828
+ var_to_construct)
829
+ wheres.extend(construct_wheres)
830
+
831
+ _, vars = self._generate_select_output(outputs, builtin_vars, sql_vars, var_column, var_lookups, var_to_construct)
832
+
833
+ not_exists, _ = self._generate_select_nots(nots, var_lookups, sql_vars, var_column, len(sql_vars))
834
+ wheres.extend(not_exists)
835
+
836
+ where = self._process_wheres_clauses(wheres)
837
+
838
+ return sql.Select(distinct, vars, froms, where, joins, is_output=True)
839
+
840
+ def _make_select(self, lookups: list[ir.Lookup], outputs: list[OutputVar], nots: Optional[list[ir.Not]] = None,
841
+ unions: Optional[list[ir.Union]] = None, constructs: Optional[list[ir.Construct]] = None,
842
+ distinct: bool = False, is_output: bool = False) -> sql.Select:
843
+
844
+ var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
845
+
846
+ union_lookups: dict[ir.Union, OrderedSet[ir.Lookup]] = self._extract_all_lookups_per_union(unions)
847
+ all_lookups = lookups + list(chain.from_iterable(union_lookups.values()))
848
+
849
+ table_lookups = OrderedSet.from_iterable(t for t in all_lookups if not builtins.is_builtin(t.relation))
850
+ froms, wheres, sql_vars, var_column, var_lookups = self._extract_lookups_metadata(table_lookups)
851
+
852
+ builtin_lookups = OrderedSet.from_iterable(t for t in all_lookups if builtins.is_builtin(t.relation))
853
+ builtin_vars, builtin_wheres, builtin_table_expressions = (
854
+ self._resolve_builtins(builtin_lookups, var_lookups, var_column, sql_vars, var_to_construct, outputs))
855
+
856
+ froms.extend(self._process_builtin_table_expressions(builtin_table_expressions))
857
+
858
+ wheres.extend(builtin_wheres)
859
+
860
+ construct_wheres = self._process_constructs(table_lookups, var_lookups, var_column, sql_vars, builtin_vars,
861
+ var_to_construct)
862
+ wheres.extend(construct_wheres)
863
+
864
+ wheres.extend(self._generate_where_clauses(var_lookups, var_column, sql_vars, union_lookups))
865
+
866
+ not_null_vars, vars = self._generate_select_output(outputs, builtin_vars, sql_vars, var_column,
867
+ var_lookups, var_to_construct)
868
+
869
+ if not_null_vars:
870
+ wheres.extend(sql.NotNull(var) for var in not_null_vars)
871
+
872
+ not_exists, _ = self._generate_select_nots(nots, var_lookups, sql_vars, var_column, len(sql_vars))
873
+ wheres.extend(not_exists)
874
+
875
+ where = self._process_wheres_clauses(wheres)
876
+
877
+ return sql.Select(distinct, vars, froms, where, is_output=is_output)
878
+
879
+ def _extract_lookups_metadata(self, lookups: OrderedSet[ir.Lookup], start_index: int = 0):
880
+ froms: list[sql.From] = []
881
+ wheres: list[sql.Expr] = []
882
+ sql_vars: dict[ir.Lookup, str] = dict() # one var per table lookup
883
+ var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field] = dict()
884
+ var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]] = defaultdict(OrderedSet)
885
+ i = start_index
886
+
887
+ for lookup in lookups:
888
+ varname = f"v{i}"
889
+ froms.append(sql.From(self._relation_name(lookup.relation), varname))
890
+ sql_vars[lookup] = varname
891
+ self._process_lookup_args(lookup, sql_vars, var_column, var_lookups, wheres)
892
+ i += 1
893
+
894
+ return froms, wheres, sql_vars, var_column, var_lookups
895
+
896
+ def _extract_match_lookups_metadata(self, lookups: OrderedSet[ir.Lookup],
897
+ union_lookups: dict[ir.Union, OrderedSet[ir.Lookup]], start_index: int = 0):
898
+ wheres: list[sql.Expr] = []
899
+ sql_vars: dict[ir.Lookup, str] = dict() # one var per table lookup
900
+ var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field] = dict()
901
+ var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]] = defaultdict(OrderedSet)
902
+ i = start_index
903
+
904
+ def process_lookups(lookup_set: OrderedSet[ir.Lookup]):
905
+ nonlocal i
906
+ for lookup in lookup_set:
907
+ sql_vars[lookup] = f"v{i}"
908
+ self._process_lookup_args(lookup, sql_vars, var_column, var_lookups, wheres)
909
+ i += 1
910
+
911
+ # Step 1: assign aliases and populate helper mappings
912
+ process_lookups(lookups)
913
+ for values in union_lookups.values():
914
+ process_lookups(values)
915
+
916
+ # Step 2: build joins
917
+ used_lookups = ordered_set()
918
+ first_lookup = next(iter(lookups))
919
+ used_lookups.add(first_lookup)
920
+ froms: list[sql.From] = []
921
+ joins: list[sql.Join] = []
922
+
923
+ # Start with the first table as the root FROM
924
+ froms.append(sql.From(self._relation_name(first_lookup.relation), sql_vars[first_lookup]))
925
+
926
+ def _process_joins(lookup: ir.Lookup, is_left_join: bool = False):
927
+ # Try to find a shared variable with any *latest* used lookup
928
+ join_conditions = []
929
+ lookup_not_null_conditions = []
930
+
931
+ for arg in lookup.args:
932
+ if isinstance(arg, ir.Var) and arg in var_lookups:
933
+ for other_lookup in reversed(list(used_lookups)): # reversed: prioritize most recent join
934
+ if other_lookup in var_lookups[arg]:
935
+ left_alias = sql_vars[other_lookup]
936
+ right_alias = sql_vars[lookup]
937
+
938
+ left_field = self._var_name(other_lookup.relation.id, var_column[(arg, other_lookup)])
939
+ right_field = self._var_name(lookup.relation.id, var_column[(arg, lookup)])
940
+
941
+ left_var = f"{left_alias}.{left_field}"
942
+ right_var = f"{right_alias}.{right_field}"
943
+
944
+ join_conditions.append(sql.Terminal(f"{left_var} = {right_var}"))
945
+
946
+ if is_left_join:
947
+ lookup_not_null_conditions.append(sql.NotNull(right_var))
948
+
949
+ break # stop on first recent match
950
+
951
+ if join_conditions:
952
+ on = sql.And(join_conditions) if len(join_conditions) > 1 else join_conditions[0]
953
+ join = sql.LeftOuterJoin(self._relation_name(lookup.relation), sql_vars[lookup], on) if is_left_join \
954
+ else sql.Join(self._relation_name(lookup.relation), sql_vars[lookup], on)
955
+ joins.append(join)
956
+
957
+ if is_left_join:
958
+ return sql.And(lookup_not_null_conditions) if len(lookup_not_null_conditions) > 1 else lookup_not_null_conditions[0]
959
+ else:
960
+ used_lookups.add(lookup)
961
+ return None
962
+ else:
963
+ raise ValueError(f"No join condition found for lookup: {lookup}")
964
+
965
+ # Add JOINs based on shared variables
966
+ for lookup in lookups:
967
+ if lookup not in used_lookups:
968
+ _process_joins(lookup)
969
+
970
+ # Add LEFT JOINs based on shared variables
971
+ for values in union_lookups.values():
972
+ not_null_conditions = []
973
+ for lookup in values:
974
+ if lookup not in used_lookups:
975
+ lookup_condition = _process_joins(lookup, is_left_join=True)
976
+ not_null_conditions.append(lookup_condition)
977
+
978
+ if not_null_conditions:
979
+ wheres.append(sql.Or(not_null_conditions))
980
+
981
+ return froms, joins, wheres, sql_vars, var_column, var_lookups
982
+
983
+ def _extract_union_lookups_metadata(self, lookups: dict[ir.Union, OrderedSet[ir.Lookup]], start_index: int = 0):
984
+ wheres: list[sql.Expr] = []
985
+ sql_vars: dict[ir.Lookup, str] = {}
986
+ var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field] = {}
987
+ var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]] = defaultdict(OrderedSet)
988
+ froms: list[sql.From] = []
989
+ joins: list[sql.Join] = []
990
+ used_lookups = ordered_set()
991
+
992
+ i = start_index
993
+ first_lookup_handled = False
994
+
995
+ for values in lookups.values():
996
+ for lookup in values:
997
+ sql_vars[lookup] = f"v{i}"
998
+ self._process_lookup_args(lookup, sql_vars, var_column, var_lookups, wheres)
999
+ i += 1
1000
+
1001
+ if not first_lookup_handled:
1002
+ # Use this as the base FROM
1003
+ froms.append(sql.From(self._relation_name(lookup.relation), sql_vars[lookup]))
1004
+ used_lookups.add(lookup)
1005
+ first_lookup_handled = True
1006
+ else:
1007
+ # Join the rest
1008
+ joins.append(sql.FullOuterJoin(self._relation_name(lookup.relation), sql_vars[lookup]))
1009
+
1010
+ return froms, joins, wheres, sql_vars, var_column, var_lookups
1011
+
1012
+ def _extract_left_outer_joins_lookups_metadata(self, task: ir.Logical, lookups: OrderedSet[ir.Lookup],
1013
+ nots: Optional[list[ir.Not]] = None, start_index: int = 0):
1014
+ wheres: list[sql.Expr] = []
1015
+ sql_vars: dict[ir.Lookup, str] = dict() # one var per table lookup
1016
+ var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field] = dict()
1017
+ var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]] = defaultdict(OrderedSet)
1018
+
1019
+ # Step 1: assign aliases and populate helper mappings
1020
+ i = start_index
1021
+ for lookup in lookups:
1022
+ sql_vars[lookup] = f"v{i}"
1023
+ self._process_lookup_args(lookup, sql_vars, var_column, var_lookups, wheres)
1024
+ i += 1
1025
+
1026
+ froms: list[sql.From] = []
1027
+ joins: list[sql.Join] = []
1028
+ full_context = ordered_set()
1029
+
1030
+ # Choose a root FROM table
1031
+ first_lookup = next(iter(lookups))
1032
+ froms.append(sql.From(self._relation_name(first_lookup.relation), sql_vars[first_lookup]))
1033
+ full_context.add(first_lookup)
1034
+
1035
+ @dataclass(frozen=True)
1036
+ class JoinMetadata:
1037
+ on: Optional[sql.Expr] = None
1038
+ inner_join: bool = False
1039
+
1040
+ joins_metadata: dict[ir.Lookup, JoinMetadata] = {}
1041
+ not_null_vars: set[ir.Var] = self._extract_all_not_null_vars_from_nots(nots)
1042
+
1043
+ def _process_joins(lookup: ir.Lookup, context: OrderedSet[ir.Lookup], inner_join: bool = True):
1044
+ join_conditions = []
1045
+ seen_pairs = set()
1046
+
1047
+ # We want most recent joins first from context, then from full_context
1048
+ search_context = list(reversed(context)) + [
1049
+ lk for lk in reversed(full_context) if lk not in context
1050
+ ]
1051
+
1052
+ for arg in lookup.args:
1053
+ inner_join = arg in not_null_vars or inner_join
1054
+ if isinstance(arg, ir.Var) and arg in var_lookups:
1055
+ for other_lookup in search_context:
1056
+ if other_lookup in var_lookups[arg]:
1057
+ right_alias = sql_vars[lookup]
1058
+ left_alias = sql_vars[other_lookup]
1059
+
1060
+ right_field = self._var_name(lookup.relation.id, var_column[(arg, lookup)])
1061
+ left_field = self._var_name(other_lookup.relation.id, var_column[(arg, other_lookup)])
1062
+
1063
+ pair = (left_alias, left_field, right_alias, right_field)
1064
+ if pair not in seen_pairs:
1065
+ seen_pairs.add(pair)
1066
+ join_conditions.append(sql.Terminal(f"{left_alias}.{left_field} = {right_alias}.{right_field}"))
1067
+ break # stop at first matching lookup
1068
+
1069
+ on = None
1070
+ if join_conditions:
1071
+ on = sql.And(join_conditions) if len(join_conditions) > 1 else join_conditions[0]
1072
+
1073
+ join_metadata = joins_metadata.get(lookup)
1074
+
1075
+ if join_metadata:
1076
+ # Upgrade to inner join only if previously marked as left outer join
1077
+ if inner_join and not join_metadata.inner_join:
1078
+ joins_metadata[lookup] = JoinMetadata(on, inner_join)
1079
+ else:
1080
+ joins_metadata[lookup] = JoinMetadata(on, inner_join)
1081
+
1082
+ def _process_logical(logical: ir.Logical, parent_context: Optional[OrderedSet[ir.Lookup]] = None):
1083
+ # Step 1: Prepare null variables from hoisted defaults
1084
+ null_vars = {
1085
+ v.var for v in (logical.hoisted or [])
1086
+ if isinstance(v, ir.Default) and v.value is None
1087
+ }
1088
+
1089
+ # Step 2: Create a working context from parent_context
1090
+ context = OrderedSet.from_iterable(parent_context) if parent_context else ordered_set()
1091
+
1092
+ # Step 3: Process all sub-tasks
1093
+ for sub_task in logical.body:
1094
+ if isinstance(sub_task, ir.Logical):
1095
+ _process_logical(sub_task, context)
1096
+ elif isinstance(sub_task, ir.Lookup):
1097
+ lookup = cast(ir.Lookup, sub_task)
1098
+ if lookup != first_lookup and not builtins.is_builtin(lookup.relation):
1099
+ inner_join = False if null_vars else True
1100
+ _process_joins(lookup, context, inner_join)
1101
+ context.add(lookup)
1102
+ full_context.add(lookup)
1103
+
1104
+ _process_logical(task, full_context)
1105
+
1106
+ for lookup, metadata in joins_metadata.items():
1107
+ if metadata.inner_join:
1108
+ joins.append(sql.Join(self._relation_name(lookup.relation), sql_vars[lookup], metadata.on))
1109
+ else:
1110
+ joins.append(sql.LeftOuterJoin(self._relation_name(lookup.relation), sql_vars[lookup], metadata.on))
1111
+
1112
+ return froms, joins, wheres, sql_vars, var_column, var_lookups
1113
+
1114
+ def _process_lookup_args(self, lookup: ir.Lookup, sql_vars: dict[ir.Lookup, str],
1115
+ var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field],
1116
+ var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]], wheres: list[sql.Expr]):
1117
+ relation = lookup.relation
1118
+ for j, arg in enumerate(lookup.args):
1119
+ rel_field = relation.fields[j]
1120
+ if isinstance(arg, ir.Var):
1121
+ var_column[arg, lookup] = rel_field
1122
+ var_lookups[arg].add(lookup)
1123
+ # case when Literal is used as a relation argument: `test(1, x)`
1124
+ elif isinstance(arg, (int, str, float, bool, ir.Literal)):
1125
+ ref = f"{sql_vars[lookup]}.{self._var_name(relation.id, rel_field)}"
1126
+ wheres.append(sql.Terminal(f"{ref} = {self._convert_value(arg)}"))
1127
+
1128
+ def _var_reference(self, var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]], sql_vars: dict[ir.Lookup, str],
1129
+ var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field], v):
1130
+ if isinstance(v, ir.Var):
1131
+ # TODO - assuming the built-in reference was grounded elsewhere
1132
+ lookup = var_lookups[v].some()
1133
+ return f"{sql_vars[lookup]}.{self._var_name(lookup.relation.id, var_column[(v, lookup)])}"
1134
+ return f"'{v}'" if isinstance(v, str) else str(v)
1135
+
1136
+ def _resolve_builtin_var(self, builtin_vars: dict[ir.Var, ir.Value|str|int], var):
1137
+ # We need recursive lookup because it maybe a case when we need to join more than 2 lookups.
1138
+ # For example QB `a != decimal(0)` in IR will look like this:
1139
+ # Logical ^[res]
1140
+ # Exists(vDecimal128)
1141
+ # Logical
1142
+ # cast(Decimal128, 0, vDecimal128)
1143
+ # decimal128(vDecimal128, res)
1144
+ # a != res
1145
+ # But we need to convert it to `a != 0` in SQL.
1146
+ if isinstance(var, ir.Var) and var in builtin_vars:
1147
+ val = builtin_vars[var]
1148
+ return self._resolve_builtin_var(builtin_vars, val) if isinstance(val, ir.Var) else val
1149
+ return var
1150
+
1151
+ def _build_hash_expression(self, reference, resolve_builtin_var, var_to_construct, values):
1152
+ """Generate hash expression like hash(`x`, `y`, TABLE_ALIAS.COLUMN_NAME)."""
1153
+ elements = []
1154
+ for val in values:
1155
+ resolved_val = resolve_builtin_var(val)
1156
+ if val != resolved_val and isinstance(resolved_val, str):
1157
+ # In case we parsed builtin into some expression, we may add it as an element.
1158
+ # For example, `TO_DATE('1990-1-1', 'Y-m-d')` or `(v1.value + 5)`.
1159
+ elements.append(f"{resolved_val}")
1160
+ continue
1161
+ if isinstance(resolved_val, ir.Var):
1162
+ if resolved_val in var_to_construct:
1163
+ elements.append(self._resolve_construct_var(reference, resolve_builtin_var, var_to_construct, var_to_construct[resolved_val]))
1164
+ else:
1165
+ elements.append(reference(resolved_val))
1166
+ else:
1167
+ elements.append(str(self._convert_value(resolved_val)))
1168
+ return f"hash({', '.join(elements)})"
1169
+
1170
+ def _resolve_construct_var(self, reference, resolve_builtin_var, var_to_construct, construct: ir.Construct):
1171
+ return self._build_hash_expression(reference, resolve_builtin_var, var_to_construct, construct.values)
1172
+
1173
+ def _resolve_hash_var(self, reference, resolve_builtin_var, var_to_construct, arg: Union[ir.ListType, ir.Value]):
1174
+ if isinstance(arg, Tuple):
1175
+ return self._build_hash_expression(reference, resolve_builtin_var, var_to_construct, arg)
1176
+ return self._build_hash_expression(reference, resolve_builtin_var, var_to_construct, [arg])
1177
+
1178
+ def _resolve_builtins(self, builtin_lookups: OrderedSet[ir.Lookup], var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
1179
+ var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field], sql_vars: dict[ir.Lookup, str],
1180
+ var_to_construct: dict[ir.Var, ir.Construct],
1181
+ outputs: Optional[list[OutputVar]] = None):
1182
+
1183
+ wheres: list[sql.Expr] = []
1184
+ # We need to maintain a mapping of these builtin expressions because they generate a new table, which must be
1185
+ # referenced in the FROM clause as part of a JOIN. Structure is `SQL table variable` -> `generated expression`
1186
+ table_expressions: dict[str, str] = {}
1187
+ builtin_vars: dict[ir.Var, ir.Value|str|int] = {}
1188
+ # TODO: remove this when we introduce date periods in builtins
1189
+ date_period_var_type: dict[ir.Var, str] = {}
1190
+
1191
+ output_vars = {
1192
+ output.value
1193
+ for output in outputs or []
1194
+ if isinstance(output.value, ir.Var)
1195
+ }
1196
+
1197
+ intermediate_builtin_vars: set[ir.Var] = {
1198
+ arg for lookup in builtin_lookups
1199
+ for arg in lookup.args
1200
+ if isinstance(arg, ir.Var) and arg not in var_lookups
1201
+ }
1202
+
1203
+ reference = partial(self._var_reference, var_lookups, sql_vars, var_column)
1204
+ resolve_builtin_var = partial(self._resolve_builtin_var, builtin_vars)
1205
+
1206
+ for lookup in self._sort_builtin_lookups(list(builtin_lookups), output_vars):
1207
+ args = lookup.args
1208
+ relation = lookup.relation
1209
+ relation_name = self._relation_name(relation)
1210
+
1211
+ if relation == builtins.substring:
1212
+ assert len(args) == 4, f"Expected 4 args for `strings.substring`, got {len(args)}: {args}"
1213
+
1214
+ # Unpack and process arguments
1215
+ lhs_raw, from_idx_raw, to_idx_raw, output = args
1216
+ assert isinstance(output, ir.Var), "Fourth argument (output) must be a variable"
1217
+ from_idx = self._convert_value(from_idx_raw)
1218
+ to_idx = self._convert_value(to_idx_raw)
1219
+
1220
+ # Resolve the left-hand side expression
1221
+ left = self._var_to_expr(lhs_raw, reference, resolve_builtin_var, var_to_construct)
1222
+
1223
+ # Calculate substring length: SQL is 1-based and end-inclusive
1224
+ substring_len = int(to_idx) - int(from_idx) + 1
1225
+ assert substring_len >= 0, f"Invalid substring range: from {from_idx} to {to_idx}"
1226
+
1227
+ expr = f"substring({left}, {from_idx}, {substring_len})"
1228
+ builtin_vars[output] = expr
1229
+ elif relation == builtins.replace:
1230
+ assert len(args) == 4, f"Expected 4 args for `replace`, got {len(args)}: {args}"
1231
+ subject_raw, pattern_raw, replacement_raw, output = args
1232
+ subject = self._var_to_expr(subject_raw, reference, resolve_builtin_var, var_to_construct)
1233
+ pattern = self._var_to_expr(pattern_raw, reference, resolve_builtin_var, var_to_construct)
1234
+ replacement = self._var_to_expr(replacement_raw, reference, resolve_builtin_var, var_to_construct)
1235
+ assert isinstance(output, ir.Var), "Fourth argument (output) must be a variable"
1236
+ builtin_vars[output] = f"replace({subject}, {pattern}, {replacement})"
1237
+ elif relation == builtins.split_part:
1238
+ assert len(args) == 4, f"Expected 4 args for `split_part`, got {len(args)}: {args}"
1239
+ separator_raw, s_raw, idx_raw, output = args
1240
+ separator = self._var_to_expr(separator_raw, reference, resolve_builtin_var, var_to_construct)
1241
+ s = self._var_to_expr(s_raw, reference, resolve_builtin_var, var_to_construct)
1242
+ idx = self._var_to_expr(idx_raw, reference, resolve_builtin_var, var_to_construct)
1243
+ assert isinstance(output, ir.Var)
1244
+ builtin_vars[output] = f"split_part({s}, {separator}, {idx})"
1245
+ elif relation == builtins.split:
1246
+ assert len(args) == 4, f"Expected 4 args for `split`, got {len(args)}: {args}"
1247
+ separator_raw, value_raw, index, part = args
1248
+ value = self._var_to_expr(value_raw, reference, resolve_builtin_var, var_to_construct)
1249
+ separator = self._var_to_expr(separator_raw, reference, resolve_builtin_var, var_to_construct)
1250
+ table_sql_var = f"v{len(sql_vars)}"
1251
+ sql_vars[lookup] = table_sql_var
1252
+ if self._is_duck_db:
1253
+ table_alias = f"{table_sql_var}(data)"
1254
+ table_expressions[table_alias] = f"VALUES(string_split({value}, {separator}))"
1255
+
1256
+ part_expr = f"unnest({table_sql_var}.data)"
1257
+ index_expr = f"generate_subscripts({table_sql_var}.data, 1)"
1258
+ else:
1259
+ table_expressions[table_sql_var] = f"LATERAL FLATTEN(input => SPLIT({value}, {separator}))"
1260
+
1261
+ # SF returns values in `""` and to avoid this, we need to cast it to `TEXT` type
1262
+ part_expr = f"cast({table_sql_var}.value as TEXT)"
1263
+ index_expr = f"({table_sql_var}.index + 1)" # SF is 0-based internally, adjust to it back
1264
+ assert isinstance(index, ir.Var) and isinstance(part, ir.Var), "Third and fourth arguments (index, part) must be variables"
1265
+ builtin_vars[part] = part_expr
1266
+ builtin_vars[index] = index_expr
1267
+ elif relation == builtins.range or relation in builtins.range.overloads:
1268
+ assert len(args) == 4, f"Expected 4 args for `range`, got {len(args)}: {args}"
1269
+ start_raw, stop_raw, step_raw, result = args
1270
+ start = self._var_to_expr(start_raw, reference, resolve_builtin_var, var_to_construct)
1271
+ stop = self._var_to_expr(stop_raw, reference, resolve_builtin_var, var_to_construct)
1272
+ step = self._var_to_expr(step_raw, reference, resolve_builtin_var, var_to_construct)
1273
+ table_sql_var = f"v{len(sql_vars)}"
1274
+ sql_vars[lookup] = table_sql_var
1275
+ # In SQL range is 1...stop exclusive, and because we did `-1` in PyRel v1 we need to return it here
1276
+ if self._is_duck_db:
1277
+ table_expr = f"LATERAL range(cast({start} as bigint), cast(({stop} + 1) as bigint), cast({step} as bigint))"
1278
+ expr = f"{table_sql_var}.range"
1279
+ else:
1280
+ table_expr = f"LATERAL FLATTEN(input => ARRAY_GENERATE_RANGE({start}, ({stop} + 1), {step}))"
1281
+ expr = f"{table_sql_var}.value"
1282
+ table_expressions[table_sql_var] = table_expr
1283
+ assert isinstance(result, ir.Var), "Fourth argument (result) must be a variable"
1284
+ builtin_vars[result] = f"{expr}"
1285
+ elif relation == builtins.cast:
1286
+ assert len(args) == 3, f"Expected 3 args for `cast`, got {len(args)}: {args}"
1287
+
1288
+ _, original_raw, result = args
1289
+ assert isinstance(result, ir.Var), "Third argument (result) must be a variable"
1290
+
1291
+ builtin_vars[result] = original_raw
1292
+ elif relation in {builtins.isnan, builtins.isinf}:
1293
+ arg_expr = self._var_to_expr(args[0], reference, resolve_builtin_var, var_to_construct)
1294
+ expr = "cast('NaN' AS DOUBLE)" if relation == builtins.isnan else "cast('Infinity' AS DOUBLE)"
1295
+ wheres.append(sql.Terminal(f"{arg_expr} = {expr}"))
1296
+ elif relation == builtins.construct_date:
1297
+ assert len(args) == 4, f"Expected 4 args for `construct_date`, got {len(args)}: {args}"
1298
+ year_raw, month_raw, day_raw, result = args
1299
+ year = self._var_to_expr(year_raw, reference, resolve_builtin_var, var_to_construct)
1300
+ month = self._var_to_expr(month_raw, reference, resolve_builtin_var, var_to_construct)
1301
+ day = self._var_to_expr(day_raw, reference, resolve_builtin_var, var_to_construct)
1302
+
1303
+ assert isinstance(result, ir.Var), "Fourth argument (result) must be a variable."
1304
+ if self._is_duck_db:
1305
+ expr = f"make_date(cast({year} as bigint), cast({month} as bigint), cast({day} as bigint))"
1306
+ else:
1307
+ expr = f"date_from_parts({year}, {month}, {day})"
1308
+ builtin_vars[result] = expr
1309
+ elif relation == builtins.construct_datetime_ms_tz:
1310
+ assert len(args) == 9, f"Expected 9 args for `construct_datetime_ms_tz`, got {len(args)}: {args}"
1311
+
1312
+ year_raw, month_raw, day_raw, hour_raw, minute_raw, second_raw, millisecond_raw, tz_raw, result = args
1313
+ assert isinstance(result, ir.Var), "Ninth argument (result) must be a variable."
1314
+
1315
+ year = self._var_to_expr(year_raw, reference, resolve_builtin_var, var_to_construct)
1316
+ month = self._var_to_expr(month_raw, reference, resolve_builtin_var, var_to_construct)
1317
+ day = self._var_to_expr(day_raw, reference, resolve_builtin_var, var_to_construct)
1318
+ hour = self._var_to_expr(hour_raw, reference, resolve_builtin_var, var_to_construct)
1319
+ minute = self._var_to_expr(minute_raw, reference, resolve_builtin_var, var_to_construct)
1320
+ second = self._var_to_expr(second_raw, reference, resolve_builtin_var, var_to_construct)
1321
+ millisecond = self._var_to_expr(millisecond_raw, reference, resolve_builtin_var, var_to_construct)
1322
+ tz = self._var_to_expr(tz_raw, reference, resolve_builtin_var, var_to_construct)
1323
+
1324
+ if self._is_duck_db:
1325
+ sub_expr = (f"make_timestamp(cast({year} as bigint), cast({month} as bigint), cast({day} as bigint), "
1326
+ f"cast({hour} as bigint), cast({minute} as bigint), cast({second} as bigint) + {millisecond}/1000.0)")
1327
+ if tz.lower() != "'utc'":
1328
+ sub_expr = f"(({sub_expr} at time zone {tz}) at time zone 'UTC')"
1329
+ else:
1330
+ sub_expr = (f"to_timestamp_ntz(lpad({year}, 4, '0') || '-' || lpad({month}, 2, '0') || '-' || "
1331
+ f"lpad({day}, 2, '0') || ' ' || lpad({hour}, 2, '0') || ':' || "
1332
+ f"lpad({minute}, 2, '0') || ':' || lpad({second}, 2, '0') || '.' || "
1333
+ f"lpad({millisecond}, 3, '0'), 'YYYY-MM-DD HH24:MI:SS.FF3')")
1334
+ if tz.lower() != "'utc'":
1335
+ sub_expr = f"convert_timezone({tz}, 'UTC', {sub_expr})"
1336
+ builtin_vars[result] = f"cast({sub_expr} as DATETIME)"
1337
+ elif relation == builtins.infomap:
1338
+ raise NotImplementedError("`infomap` is not supported in SQL")
1339
+ elif relation == builtins.louvain:
1340
+ raise NotImplementedError("`louvain` is not supported in SQL")
1341
+ elif relation == builtins.label_propagation:
1342
+ raise NotImplementedError("`label_propagation` is not supported in SQL")
1343
+ else:
1344
+ # Assuming infix binary or ternary operators here
1345
+ lhs, rhs = args[0], args[1]
1346
+ if relation in builtins.string_binary_builtins:
1347
+ left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1348
+ if relation == builtins.num_chars and isinstance(rhs, ir.Var):
1349
+ builtin_vars[rhs] = f"length({left})"
1350
+ elif relation == builtins.lower and isinstance(rhs, ir.Var):
1351
+ builtin_vars[rhs] = f"lower({left})"
1352
+ elif relation == builtins.upper and isinstance(rhs, ir.Var):
1353
+ builtin_vars[rhs] = f"upper({left})"
1354
+ elif relation == builtins.strip and isinstance(rhs, ir.Var):
1355
+ builtin_vars[rhs] = f"trim({left})"
1356
+ elif relation == builtins.regex_match:
1357
+ right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1358
+ # swap left and right for SQL
1359
+ wheres.append(sql.RegexLike(right, left))
1360
+ else:
1361
+ right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct, False)
1362
+ if relation == builtins.starts_with:
1363
+ expr = f"concat({right}, '%')" if isinstance(rhs, ir.Var) else f"'{right}%'"
1364
+ elif relation == builtins.ends_with:
1365
+ expr = f"concat('%', {right})" if isinstance(rhs, ir.Var) else f"'%{right}'"
1366
+ elif relation == builtins.like_match:
1367
+ expr = right if isinstance(rhs, ir.Var) else f"'{right}'"
1368
+ elif relation == builtins.contains:
1369
+ expr = f"concat('%', {right}, '%')" if isinstance(rhs, ir.Var) else f"'%{right}%'"
1370
+ else:
1371
+ raise Exception(f"Unsupported string builtin relation: {relation}")
1372
+ wheres.append(sql.Like(left, expr))
1373
+ elif relation == builtins.levenshtein:
1374
+ assert len(args) == 3, f"Expected 3 args for `levenshtein`, got {len(args)}: {args}"
1375
+ left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1376
+ right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1377
+ function = "levenshtein" if self._is_duck_db else "editdistance"
1378
+ assert isinstance(args[2], ir.Var)
1379
+ builtin_vars[args[2]] = f"{function}({left}, {right})"
1380
+ elif relation == builtins.concat:
1381
+ assert len(args) == 3, f"Expected 3 args for `concat`, got {len(args)}: {args}"
1382
+ left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1383
+ right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1384
+ assert isinstance(args[2], ir.Var)
1385
+ builtin_vars[args[2]] = f"concat({left}, {right})"
1386
+ elif relation == builtins.join:
1387
+ assert len(args) == 3, f"Expected 3 args for `join`, got {len(args)}: {args}"
1388
+ assert isinstance(lhs, tuple)
1389
+ f_args = [
1390
+ self._var_to_expr(item, reference, resolve_builtin_var, var_to_construct)
1391
+ for item in lhs
1392
+ ]
1393
+ right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1394
+ assert isinstance(args[2], ir.Var)
1395
+ builtin_vars[args[2]] = f"concat_ws({right}, {', '.join(f_args)})"
1396
+ elif relation == builtins.hash and isinstance(rhs, ir.Var):
1397
+ builtin_vars[rhs] = self._resolve_hash_var(reference, resolve_builtin_var, var_to_construct, lhs)
1398
+ elif relation == builtins.string and isinstance(rhs, ir.Var):
1399
+ if isinstance(lhs, ir.Var) and typer.to_base_primitive(lhs.type) == DateTime:
1400
+ lhs = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1401
+ # Convert DateTime to string in the ISO 8601 format.
1402
+ if self._is_duck_db:
1403
+ builtin_vars[rhs] = f"""strftime({lhs}, '%Y-%m-%dT%H:%M:%S.%f')"""
1404
+ else:
1405
+ builtin_vars[rhs] = f"""to_varchar({lhs}, 'YYYY-MM-DD"T"HH24:MI:SS.FF3')"""
1406
+ else:
1407
+ builtin_vars[rhs] = lhs
1408
+ elif relation == builtins.parse_float and isinstance(rhs, ir.Var):
1409
+ left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1410
+ builtin_vars[rhs] = f"cast({left} AS DOUBLE)"
1411
+ elif relation == builtins.parse_date:
1412
+ if self._is_duck_db:
1413
+ raise Exception("DuckDB: unsupported builtin relation 'parse_date'.")
1414
+ assert len(args) == 3, f"Expected 3 args for `parse_date`, got {len(args)}: {args}"
1415
+ left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1416
+ right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1417
+ assert isinstance(args[2], ir.Var)
1418
+ builtin_vars[args[2]] = f"to_date({left}, {right})"
1419
+ elif relation == builtins.parse_datetime:
1420
+ assert len(args) == 3, f"Expected 3 args for `parse_datetime`, got {len(args)}: {args}"
1421
+ left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1422
+ right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1423
+ sub_expr = left
1424
+ if 'z' in right: # this means that out datetime formatter includes timezone, and we need to convert first.
1425
+ if self._is_duck_db:
1426
+ sub_expr = f"({left} AT TIME ZONE 'UTC')"
1427
+ else:
1428
+ sub_expr = f"convert_timezone('UTC', to_timestamp_tz({left}))"
1429
+ assert isinstance(args[2], ir.Var)
1430
+ builtin_vars[args[2]] = f"cast({sub_expr} as DATETIME)"
1431
+ elif relation in builtins.date_periods and isinstance(rhs, ir.Var):
1432
+ builtin_vars[rhs] = lhs
1433
+ date_period_var_type[rhs] = relation.name
1434
+ elif relation in builtins.date_builtins:
1435
+ if relation in {builtins.date_add, builtins.date_subtract, builtins.datetime_add,
1436
+ builtins.datetime_subtract}:
1437
+ assert len(args) == 3, f"Expected 3 args for {relation}, got {len(args)}: {args}"
1438
+ assert isinstance(rhs, ir.Var), f"Period variable must be `ir.Var`, got: {rhs}"
1439
+ period = date_period_var_type[rhs]
1440
+ period_val = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1441
+
1442
+ left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1443
+
1444
+ if self._is_duck_db:
1445
+ op = "+" if relation in {builtins.date_add, builtins.datetime_add} else "-"
1446
+ expr = f"({left} {op} {period_val} * interval 1 {period})"
1447
+ else:
1448
+ sign = 1 if relation in {builtins.date_add, builtins.datetime_add} else -1
1449
+ expr = f"dateadd({period}, ({sign} * {period_val}), {left})"
1450
+
1451
+ result_var = args[2]
1452
+ assert isinstance(result_var, ir.Var), (
1453
+ f"Expected `ir.Var` type for the result of `{relation}`, "
1454
+ f"but got `{type(result_var).__name__}`: {result_var}"
1455
+ )
1456
+ builtin_vars[result_var] = expr
1457
+ # handle binary cases
1458
+ elif len(args) == 2:
1459
+ assert isinstance(rhs, ir.Var), f"Resulting variable must be `ir.Var`, got: {rhs}"
1460
+ expr_map = {
1461
+ builtins.date_year: "year",
1462
+ builtins.date_quarter: "quarter",
1463
+ builtins.date_month: "month",
1464
+ builtins.date_week: "week",
1465
+ builtins.date_day: "day",
1466
+ builtins.date_dayofyear: "dayofyear",
1467
+ builtins.date_weekday: "isodow" if self._is_duck_db else "dayofweekiso",
1468
+ builtins.datetime_second: "second",
1469
+ }
1470
+ expr = expr_map.get(relation)
1471
+ lhs = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1472
+ builtin_vars[rhs] = f"{expr}({lhs})"
1473
+ elif len(args) == 3:
1474
+ result_var = args[2]
1475
+ assert isinstance(result_var, ir.Var), f"Resulting variable must be `ir.Var`, got: {result_var}"
1476
+ expr_map = {
1477
+ builtins.datetime_year: "year",
1478
+ builtins.datetime_quarter: "quarter",
1479
+ builtins.datetime_month: "month",
1480
+ builtins.datetime_week: "week",
1481
+ builtins.datetime_day: "day",
1482
+ builtins.datetime_dayofyear: "dayofyear",
1483
+ builtins.datetime_hour: "hour",
1484
+ builtins.datetime_minute: "minute",
1485
+ builtins.datetime_weekday: "isodow" if self._is_duck_db else "dayofweekiso",
1486
+ builtins.dates_period_days: "date_diff" if self._is_duck_db else "datediff",
1487
+ builtins.datetimes_period_milliseconds: "date_diff" if self._is_duck_db else "datediff"
1488
+ }
1489
+ expr = expr_map.get(relation)
1490
+ lhs = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1491
+ rhs = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1492
+ if relation == builtins.dates_period_days:
1493
+ sub_expr = f"'day', {lhs}, {rhs}" if self._is_duck_db else f"day, {lhs}, {rhs}"
1494
+ elif relation == builtins.datetimes_period_milliseconds:
1495
+ sub_expr = f"'millisecond', {lhs}, {rhs}" if self._is_duck_db else f"millisecond, {lhs}, {rhs}"
1496
+ else:
1497
+ sub_expr = self._convert_timezone(lhs, rhs)
1498
+ builtin_vars[result_var] = f"{expr}({sub_expr})"
1499
+ else:
1500
+ raise NotImplementedError("Unsupported number of arguments for date builtin (3+).")
1501
+ elif relation == builtins.construct_date_from_datetime:
1502
+ assert len(args) == 3, f"Expected 3 args for `construct_date_from_datetime`, got {len(args)}: {args}"
1503
+ dt_raw, tz, result = args
1504
+ tz = self._convert_value(tz)
1505
+
1506
+ assert isinstance(tz, str), "Timezone argument (tz) must be a string."
1507
+ assert isinstance(result, ir.Var), "Third argument (result) must be a variable."
1508
+
1509
+ # Note that the order of utc and dt is swapped in construct_date_from_datetime and construct_datetime,
1510
+ # because datetime->date (this case) ensures "the datetime is converted to the specified
1511
+ # timezone or offset string before extracting the date", while date->datetime (next case below)
1512
+ # ensures "the datetime is converted to UTC from the specified timezone or offset string."
1513
+ # (quotes are from pyrel0 docs for fromdate and fromdatetime).
1514
+ dt = self._var_to_expr(dt_raw, reference, resolve_builtin_var, var_to_construct)
1515
+ sub_expr = self._convert_timezone(dt, tz)
1516
+ sub_expr = f"cast({sub_expr} AS DATE)"
1517
+ builtin_vars[result] = sub_expr
1518
+ elif relation in builtins.math_builtins:
1519
+ result_var = rhs
1520
+ rel_name = relation.name
1521
+ left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1522
+ if relation in builtins.math_unary_builtins:
1523
+ method = "ln" if rel_name == builtins.natural_log.name else rel_name
1524
+ sub_expr = left
1525
+ if rel_name == builtins.factorial.name and self._is_duck_db:
1526
+ # Factorial requires an integer operand in DuckDB
1527
+ sub_expr = f"{left}::INTEGER"
1528
+ elif rel_name == builtins.log10.name:
1529
+ # log10 is not supported, so we use log with base 10
1530
+ sub_expr = f"10, {left}"
1531
+ method = "log"
1532
+ expr = f"{method}({sub_expr})"
1533
+ elif rel_name in {builtins.minimum.name, builtins.maximum.name, builtins.trunc_div.name,
1534
+ builtins.power.name, builtins.mod.name, builtins.pow.name,
1535
+ builtins.log.name}:
1536
+ assert len(args) == 3, f"Expected 3 args for {relation}, got {len(args)}: {args}"
1537
+
1538
+ result_var = args[2]
1539
+ right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1540
+
1541
+ if rel_name == builtins.minimum.name:
1542
+ expr = f"least({left}, {right})"
1543
+ elif rel_name == builtins.maximum.name:
1544
+ expr = f"greatest({left}, {right})"
1545
+ elif rel_name == builtins.trunc_div.name:
1546
+ expr = f"trunc({left} / {right})"
1547
+ elif rel_name == builtins.power.name or rel_name == builtins.pow.name:
1548
+ expr = f"power({left}, {right})"
1549
+ elif rel_name == builtins.log.name:
1550
+ expr = f"log({left}, {right})"
1551
+ else:
1552
+ expr = f"mod({left}, {right})"
1553
+ else:
1554
+ raise Exception(f"Unsupported math builtin relation: {relation}")
1555
+ assert isinstance(result_var, ir.Var), (
1556
+ f"Expected `ir.Var` type for the result of `{relation}`, "
1557
+ f"but got `{type(result_var).__name__}`: {result_var}"
1558
+ )
1559
+ builtin_vars[result_var] = expr
1560
+ elif relation in {builtins.parse_int64, builtins.parse_int128} and isinstance(rhs, ir.Var):
1561
+ builtin_vars[rhs] = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct, False)
1562
+ elif helpers.is_from_cast(lookup) and isinstance(rhs, ir.Var):
1563
+ # For the `from cast` relations we keep the raw var, and we will ground it later.
1564
+ builtin_vars[rhs] = lhs
1565
+ elif isinstance(lhs, ir.Var) and lhs in intermediate_builtin_vars and lhs not in (builtin_vars | var_to_construct):
1566
+ # Example IR:
1567
+ # Logical
1568
+ # Logical ^[v0]
1569
+ # int = 2
1570
+ # Logical ^[res=None]
1571
+ # Logical ^[res]
1572
+ # cast(Float, int, int_Float)
1573
+ # res = 2.1 * int_Float
1574
+ # v0 = res
1575
+ # -> derive _match_1(v0)
1576
+ #
1577
+ # In this example, the `int` variable is an intermediate result produced by the `=` (assignment) builtin.
1578
+ # We must retain this value in the `builtin_vars` mapping so it can be used when compiling the `cast`.
1579
+ # Ultimately, this allows us to fully resolve the final expression: `v0 = 2.1 * 2`.
1580
+ builtin_vars[lhs] = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1581
+ elif isinstance(rhs, ir.Var) and rhs in intermediate_builtin_vars and rhs not in (builtin_vars | var_to_construct):
1582
+ # Please see the example above but in this case it will be `2 = int` builtin lookup instead of `int = 2`.
1583
+ builtin_vars[rhs] = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1584
+ else:
1585
+ left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
1586
+ right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
1587
+
1588
+ if len(args) == 3:
1589
+ out_var = args[2]
1590
+ if isinstance(out_var, ir.Var):
1591
+ out_var = resolve_builtin_var(out_var)
1592
+ expr = f"({left} {relation_name} {right})"
1593
+ if isinstance(out_var, ir.Var):
1594
+ # For example, when this is an intermediate result
1595
+ # example: c = a - b in the IR is (a - b = d) and (d = c)
1596
+ builtin_vars[out_var] = expr
1597
+ else:
1598
+ # This means that var was already grounded, and we can add a WHERE clause.
1599
+ wheres.append(sql.Terminal(f"{expr} = {out_var}"))
1600
+ else:
1601
+ raise Exception(
1602
+ f"Expected `ir.Var` type for the relation `{relation}` output but got `{type(out_var).__name__}`: {out_var}"
1603
+ )
1604
+ else:
1605
+ # Replace intermediate vars with disjoined expressions
1606
+ expr = f"{left} {relation_name} {right}"
1607
+ wheres.append(sql.Terminal(expr))
1608
+
1609
+ # After handling all builtins we need to generate where statements for args with single lookup.
1610
+ for arg, lookup_set in var_lookups.items():
1611
+ if len(lookup_set) == 1:
1612
+ lookup = lookup_set[0]
1613
+ column = var_column[cast(ir.Var, arg), lookup]
1614
+ column_name = self._var_name(lookup.relation.id, column)
1615
+ ref = f"{sql_vars[lookup]}.{column_name}"
1616
+ # case when we have a builtin operation as a relation argument
1617
+ # example: `test(a - 1, b)` and we are handling here `a - 1` arg.
1618
+ if arg in builtin_vars:
1619
+ rhs_ref = resolve_builtin_var(arg)
1620
+ if isinstance(rhs_ref, ir.Var):
1621
+ rhs = reference(rhs_ref) if rhs_ref in var_lookups else rhs_ref.name
1622
+ elif isinstance(rhs_ref, ir.Literal):
1623
+ rhs = self._convert_value(rhs_ref.value)
1624
+ else:
1625
+ rhs = str(rhs_ref)
1626
+ wheres.append(sql.Terminal(f"{ref} = {rhs}"))
1627
+
1628
+ return builtin_vars, wheres, table_expressions
1629
+
1630
+ def _convert_timezone(self, dt: str, tz: str) -> str:
1631
+ if tz.lower() != "'utc'":
1632
+ if self._is_duck_db:
1633
+ return f"({dt} at time zone 'UTC') at time zone {tz}"
1634
+ else:
1635
+ return f"convert_timezone('UTC', {tz}, {dt})"
1636
+ return dt
1637
+
1638
+ def _process_builtin_table_expressions(self, builtin_table_expressions: dict[str, str]):
1639
+ """Convert builtin table expressions into SQL FROM clauses."""
1640
+ return [
1641
+ sql.From(expr, alias)
1642
+ for alias, expr in builtin_table_expressions.items()
1643
+ ]
1644
+
1645
+ def _process_constructs(self, lookups: OrderedSet[ir.Lookup], var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
1646
+ var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field], sql_vars: dict[ir.Lookup, str],
1647
+ builtin_vars: dict[ir.Var, ir.Value|str|int], var_to_construct: dict[ir.Var, ir.Construct]) -> list[sql.Expr]:
1648
+ """
1649
+ Handles `filter_by` constructs that require generating SQL `WHERE` conditions.
1650
+
1651
+ Example:
1652
+
1653
+ QB:
1654
+ Name = m.Concept('Name', extends=[str])
1655
+ Bank = m.Concept('Bank', identify_by={'name': Name})
1656
+
1657
+ where(Bank.filter_by(name="Chase")).select(Bank)
1658
+
1659
+ IR:
1660
+ construct(Bank, "name"::String, "Chase"::String, bank::Bank)
1661
+ Bank(bank::Bank)
1662
+
1663
+ SQL:
1664
+ ... FROM Bank v0
1665
+ WHERE v0.bank = hash('Bank', 'name', 'Chase')
1666
+ """
1667
+
1668
+ wheres: list[sql.Expr] = []
1669
+
1670
+ reference = partial(self._var_reference, var_lookups, sql_vars, var_column)
1671
+ resolve_builtin_var = partial(self._resolve_builtin_var, builtin_vars)
1672
+
1673
+ seen_vars: set[ir.Var] = set()
1674
+
1675
+ for lookup in lookups:
1676
+ relation = lookup.relation
1677
+ for j, arg in enumerate(lookup.args):
1678
+ if isinstance(arg, ir.Var) and arg in var_to_construct and arg not in seen_vars:
1679
+ seen_vars.add(arg)
1680
+
1681
+ rel_field = relation.fields[j]
1682
+ ref = f"{sql_vars[lookup]}.{self._var_name(relation.id, rel_field)}"
1683
+
1684
+ construct = var_to_construct[arg]
1685
+ construct_expr = self._resolve_construct_var(
1686
+ reference, resolve_builtin_var, var_to_construct, construct
1687
+ )
1688
+
1689
+ wheres.append(sql.Terminal(f"{ref} = {construct_expr}"))
1690
+
1691
+ return wheres
1692
+
1693
+ def _generate_where_clauses(self, var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
1694
+ var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field], sql_vars: dict[ir.Lookup, str],
1695
+ union_lookups: dict[ir.Union, OrderedSet[ir.Lookup]]):
1696
+ # Reverse mapping: lookup -> union
1697
+ lookup_to_union: dict[ir.Lookup, ir.Union] = {}
1698
+ for union, lookups in union_lookups.items():
1699
+ for lu in lookups:
1700
+ lookup_to_union[lu] = union
1701
+
1702
+ wheres: list[sql.Expr] = []
1703
+ plain_refs_by_var: dict[ir.Var, list[str]] = defaultdict(list)
1704
+ all_union_members: dict[str, dict[ir.Var, str]] = defaultdict(dict)
1705
+ for arg, lookup_set in var_lookups.items():
1706
+ # if there are 2 lookups for the same variable, we need a join
1707
+ if len(lookup_set) > 1:
1708
+ # Step 1: Collect all lookups by union member or plain
1709
+ for lu in lookup_set:
1710
+ col = var_column[arg, lu]
1711
+ col_name = self._var_name(lu.relation.id, col)
1712
+
1713
+ matched_union = lookup_to_union.get(lu)
1714
+ if matched_union:
1715
+ for u_lu in union_lookups[matched_union]:
1716
+ u_ref = f"{sql_vars[u_lu]}.{col_name}"
1717
+ all_union_members[sql_vars[u_lu]][arg] = u_ref
1718
+ else:
1719
+ ref = f"{sql_vars[lu]}.{col_name}"
1720
+ plain_refs_by_var[arg].append(ref)
1721
+
1722
+ # Step 2: Build AND chain of plain lookups
1723
+ and_clauses = []
1724
+ for refs in plain_refs_by_var.values():
1725
+ # join variable references pairwise (e.g. "x.id = y.id AND y.id = z.id")
1726
+ for lhs, rhs in zip(refs, refs[1:]):
1727
+ and_clauses.append(sql.Terminal(f"{lhs} = {rhs}"))
1728
+
1729
+ # Step 3: Build one OR clause across union members
1730
+ or_groups: list[sql.Expr] = []
1731
+ for member_ref_map in all_union_members.values():
1732
+ expressions = []
1733
+ for arg_var, rhs in member_ref_map.items():
1734
+ plain_refs = plain_refs_by_var.get(arg_var)
1735
+ if plain_refs:
1736
+ lhs = plain_refs[-1] # last plain ref for that var
1737
+ expressions.append(sql.Terminal(f"{lhs} = {rhs}"))
1738
+ if expressions:
1739
+ or_groups.append(sql.And(expressions) if len(expressions) > 1 else expressions[0])
1740
+
1741
+ wheres.extend(and_clauses)
1742
+ if or_groups:
1743
+ wheres.append(sql.Or(or_groups))
1744
+
1745
+ return wheres
1746
+
1747
+ def _process_wheres_clauses(self, wheres: list[sql.Expr]) -> Optional[sql.Where]:
1748
+ # conjunction of not_wheres
1749
+ if len(wheres) == 0:
1750
+ where = None
1751
+ elif len(wheres) == 1:
1752
+ where = sql.Where(wheres[0])
1753
+ else:
1754
+ where = sql.Where(sql.And(wheres))
1755
+ return where
1756
+
1757
+ def _generate_select_output(self, outputs: list[OutputVar], builtin_vars: dict[ir.Var, ir.Value|str|int],
1758
+ sql_vars: dict[ir.Lookup, str], var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field],
1759
+ var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
1760
+ var_to_construct: dict[ir.Var, ir.Construct]):
1761
+
1762
+ reference = partial(self._var_reference, var_lookups, sql_vars, var_column)
1763
+ resolve_builtin_var = partial(self._resolve_builtin_var, builtin_vars)
1764
+
1765
+ def handle_lookup_var(var, var_type, alias):
1766
+ lookup = var_lookups[var].some()
1767
+ relation = lookup.relation
1768
+ var_name = sql_vars[lookup]
1769
+ column_name = self._var_name(relation.id, var_column[var, lookup])
1770
+ vars.append(sql.VarRef(var_name, column_name, alias, var_type))
1771
+ if from_cdc_annotation in relation.annotations:
1772
+ not_null_vars.add(f"{var_name}.{column_name}")
1773
+
1774
+ def handle_construct(construct):
1775
+ # Generate constructions like hash(`x`, `y`, TABLE_ALIAS.COLUMN_NAME)
1776
+ elements = []
1777
+ for val in construct.values:
1778
+ if val in builtin_vars:
1779
+ val = resolve_builtin_var(val)
1780
+ if isinstance(val, str):
1781
+ # In case we parsed builtin into some expression, we may add it as an element.
1782
+ # For example, `TO_DATE('1990-1-1', 'Y-m-d')` or `(v1.value + 5)`.
1783
+ elements.append(f"{val}")
1784
+ continue
1785
+ if isinstance(val, ir.Var):
1786
+ if val in var_to_construct:
1787
+ elements.append(handle_construct(var_to_construct[val]))
1788
+ else:
1789
+ lookup = var_lookups[val].some()
1790
+ column_name = self._var_name(lookup.relation.id, var_column[val, lookup])
1791
+ lookup_var = f"{sql_vars[lookup]}.{column_name}"
1792
+ elements.append(lookup_var)
1793
+ if from_cdc_annotation in lookup.relation.annotations:
1794
+ not_null_vars.add(lookup_var)
1795
+ else:
1796
+ elements.append(str(self._convert_value(val)))
1797
+ return f"hash({', '.join(elements)})"
1798
+
1799
+ # finally, compute what the select will return
1800
+ vars = []
1801
+ not_null_vars = ordered_set()
1802
+ for output in outputs:
1803
+ alias, var, var_type, task = output.alias, output.value, output.value_type, output.task
1804
+ if isinstance(var, ir.Var):
1805
+ if var in var_lookups and not task:
1806
+ handle_lookup_var(var, var_type, alias)
1807
+ elif var in builtin_vars:
1808
+ var_ref = resolve_builtin_var(var)
1809
+ if var_ref in var_lookups:
1810
+ # Case: result of `cast` variable
1811
+ handle_lookup_var(var_ref, var_type, alias)
1812
+ elif isinstance(var_ref, ir.Literal):
1813
+ # Case: literal value from `cast` relation, e.g. `decimal(0)`
1814
+ vars.append(sql.VarRef(str(self._convert_value(var_ref.value)), alias=alias, type=var_type))
1815
+ else:
1816
+ # Example: We may have `decimal(0)` in QB which turns in IR into:
1817
+ # (cast(Decimal128, 0, vDecimal128) and decimal128(vDecimal128, res_3))
1818
+ # and we need to make it `0` in SQL.
1819
+ var_ref = var_ref.name if isinstance(var_ref, ir.Var) else str(var_ref)
1820
+ vars.append(sql.VarRef(var_ref, alias=alias, type=var_type))
1821
+ elif task:
1822
+ if isinstance(task, ir.Construct):
1823
+ # Generate constructions like hash(`x`, `y`, TABLE_ALIAS.COLUMN_NAME) as `alias`
1824
+ vars.append(sql.VarRef(handle_construct(task), alias=alias, type=var_type))
1825
+ elif isinstance(task, ir.Aggregate):
1826
+ result_arg = task.projection[-1] if task.aggregation == builtins.count else task.args[0]
1827
+ result_arg = resolve_builtin_var(result_arg)
1828
+ ref = reference(result_arg) if isinstance(result_arg, ir.Var) else str(result_arg)
1829
+ vars.append(sql.VarRef(str(ref), alias=alias, type=var_type))
1830
+ elif isinstance(task, ir.Union):
1831
+ # Handle `COALESCE` of all lookups of this var from the union
1832
+ lookups = self._extract_all_lookups_from_union(task)
1833
+ elements = []
1834
+
1835
+ for lu in lookups:
1836
+ if any(isinstance(arg, ir.Var) and arg == var for arg in lu.args):
1837
+ column_name = self._var_name(lu.relation.id, var_column[var, lu])
1838
+ elements.append(f"{sql_vars[lu]}.{column_name}")
1839
+
1840
+ expr = "COALESCE(" + ", ".join(elements) + ")"
1841
+ vars.append(sql.VarRef(expr, alias=alias, type=var_type))
1842
+ else:
1843
+ # TODO - abusing even more here, because var is a value!
1844
+ vars.append(sql.VarRef(str(self._convert_value(var)), alias=alias, type=var_type))
1845
+ return not_null_vars, vars
1846
+
1847
+ def _generate_select_nots(self, nots: Optional[list[ir.Not]], var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
1848
+ sql_vars: dict[ir.Lookup, str], var_column:dict[Tuple[ir.Var, ir.Lookup], ir.Field],
1849
+ index: int) -> tuple[list[sql.NotExists], int]:
1850
+ not_exists = []
1851
+ if nots:
1852
+ for not_expr in nots:
1853
+ unions = []
1854
+ inner_nots = []
1855
+ constructs = []
1856
+ if isinstance(not_expr.task, ir.Lookup):
1857
+ all_lookups = [not_expr.task]
1858
+ else:
1859
+ logical = cast(ir.Logical, not_expr.task)
1860
+ all_lookups = cast(list[ir.Lookup], filter_by_type(logical.body, ir.Lookup))
1861
+ logicals = cast(list[ir.Logical], filter_by_type(logical.body, ir.Logical))
1862
+ inner_nots = cast(list[ir.Not], filter_by_type(logical.body, ir.Not))
1863
+ unions = cast(list[ir.Union], filter_by_type(logical.body, ir.Union))
1864
+ constructs = cast(list[ir.Construct], filter_by_type(logical.body, ir.Construct))
1865
+
1866
+ # Some of the lookup relations we wrap into logical and we need to get them out for the SQL compilation.
1867
+ # For example QB `decimal(0)` in IR will look like this:
1868
+ # Logical ^[res]
1869
+ # Exists(vDecimal128)
1870
+ # Logical
1871
+ # cast(Decimal128, 0, vDecimal128)
1872
+ # decimal128(vDecimal128, res)
1873
+ if logicals:
1874
+ unions = self._extract_all_of_type_from_logicals(logicals, ir.Union) + unions
1875
+ all_lookups = self._extract_all_of_type_from_logicals(logicals, ir.Lookup) + all_lookups
1876
+
1877
+ union_lookups: dict[ir.Union, OrderedSet[ir.Lookup]] = self._extract_all_lookups_per_union(unions)
1878
+ all_lookups.extend(list(chain.from_iterable(union_lookups.values())))
1879
+
1880
+ lookups = OrderedSet.from_iterable(t for t in all_lookups if not builtins.is_builtin(t.relation))
1881
+ froms, wheres, not_sql_vars, not_var_column, not_var_lookups = self._extract_lookups_metadata(lookups, index)
1882
+ index += len(not_sql_vars)
1883
+
1884
+ all_sql_vars = {**sql_vars, **not_sql_vars}
1885
+ all_var_column = {**var_column, **not_var_column}
1886
+ all_var_lookups = {**var_lookups, **not_var_lookups}
1887
+
1888
+ var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
1889
+ builtin_lookups = OrderedSet.from_iterable(t for t in all_lookups if builtins.is_builtin(t.relation))
1890
+ builtin_vars, builtin_wheres, builtin_table_expressions = (
1891
+ self._resolve_builtins(builtin_lookups, all_var_lookups, all_var_column, all_sql_vars, var_to_construct))
1892
+
1893
+ froms.extend(self._process_builtin_table_expressions(builtin_table_expressions))
1894
+
1895
+ wheres.extend(builtin_wheres)
1896
+
1897
+ construct_wheres = self._process_constructs(lookups, var_lookups, var_column, sql_vars, builtin_vars,
1898
+ var_to_construct)
1899
+ wheres.extend(construct_wheres)
1900
+
1901
+ # We need to join the not exists select with the outside select query context
1902
+ for arg, lookup_set in not_var_lookups.items():
1903
+ if len(lookup_set) > 0:
1904
+ lu = lookup_set[0]
1905
+ column = not_var_column[cast(ir.Var, arg), lu]
1906
+ column_name = self._var_name(lu.relation.id, column)
1907
+ lhs = f"{not_sql_vars[lu]}.{column_name}"
1908
+
1909
+ # lookup the same var from the outside context to make the join
1910
+ matching_lookup = next(
1911
+ (lookup for (var, lookup) in var_column if var == arg),
1912
+ None
1913
+ )
1914
+
1915
+ if matching_lookup is not None:
1916
+ matching_column = var_column[(arg, matching_lookup)]
1917
+ matching_column_name = self._var_name(matching_lookup.relation.id, matching_column)
1918
+ rhs = f"{sql_vars[matching_lookup]}.{matching_column_name}"
1919
+ wheres.append(sql.Terminal(f"{lhs} = {rhs}"))
1920
+
1921
+ wheres.extend(self._generate_where_clauses(not_var_lookups, not_var_column, not_sql_vars, union_lookups))
1922
+
1923
+ inner_not_exists, index = self._generate_select_nots(inner_nots, not_var_lookups, not_sql_vars, not_var_column, index)
1924
+ wheres.extend(inner_not_exists)
1925
+
1926
+ where = self._process_wheres_clauses(wheres)
1927
+ not_exists.append(sql.NotExists(sql.Select(False, [1], froms, where)))
1928
+
1929
+ return not_exists, index
1930
+
1931
+ def _extract_all_of_type_from_logical(self, task: ir.Logical, target_type: type) -> list:
1932
+ """Recursively extract all instances of `target_type` from a Logical task."""
1933
+ return self._extract_all_of_type_from_logicals([task], target_type)
1934
+
1935
+ def _extract_all_of_type_from_logicals(self, logicals: list[ir.Logical], target_type: type) -> list:
1936
+ """Recursively extract all instances of `target_type` from a list of Logical tasks."""
1937
+ result = ordered_set()
1938
+
1939
+ def visit(logical: ir.Logical):
1940
+ for expr in logical.body:
1941
+ if isinstance(expr, ir.Logical):
1942
+ visit(expr)
1943
+ elif isinstance(expr, target_type):
1944
+ result.add(expr)
1945
+
1946
+ for logical in logicals or []:
1947
+ visit(logical)
1948
+
1949
+ return result.list if result.list else []
1950
+
1951
+ def _extract_all_lookups_per_union(self, unions: Optional[list[ir.Union]]) -> dict[ir.Union, OrderedSet[ir.Lookup]]:
1952
+ return {
1953
+ union: self._extract_all_lookups_from_union(union)
1954
+ for union in unions or []
1955
+ }
1956
+
1957
+ def _extract_all_lookups_from_union(self, union: ir.Union) -> OrderedSet[ir.Lookup]:
1958
+ lookups: OrderedSet[ir.Lookup] = OrderedSet()
1959
+ for task in union.tasks:
1960
+ if isinstance(task, ir.Logical):
1961
+ lookups.update(self._extract_all_of_type_from_logicals([task], ir.Lookup))
1962
+ elif isinstance(task, ir.Lookup):
1963
+ lookups.add(cast(ir.Lookup, task))
1964
+ return lookups
1965
+
1966
+ def _extract_all_not_null_vars_from_nots(self, nots: Optional[list[ir.Not]]) -> set[ir.Var]:
1967
+ vars: set[ir.Var] = set()
1968
+ null_vars: set[ir.Var] = set()
1969
+
1970
+ def visit(task):
1971
+ if isinstance(task, ir.Not):
1972
+ visit(task.task)
1973
+ elif isinstance(task, ir.Logical):
1974
+ for var in task.hoisted:
1975
+ if isinstance(var, ir.Var):
1976
+ vars.add(var)
1977
+ elif isinstance(var, ir.Default):
1978
+ (vars if var.value is not None else null_vars).add(var.var)
1979
+ for subtask in task.body:
1980
+ visit(subtask)
1981
+ elif isinstance(task, ir.Lookup):
1982
+ vars.update(arg for arg in task.args if isinstance(arg, ir.Var))
1983
+
1984
+ for not_task in nots or []:
1985
+ visit(not_task)
1986
+
1987
+ return vars - null_vars
1988
+
1989
+ def _var_to_expr(self, var, reference, resolve_builtin_var, var_to_construct: dict[ir.Var, ir.Construct],
1990
+ quote_strings: bool = True):
1991
+ """
1992
+ Convert a variable to an expression string.
1993
+ """
1994
+ if isinstance(var, ir.Var) and var in var_to_construct:
1995
+ return self._resolve_construct_var(reference, resolve_builtin_var, var_to_construct, var_to_construct[var])
1996
+ resolved = resolve_builtin_var(var)
1997
+ if isinstance(resolved, ir.Var):
1998
+ return reference(resolved)
1999
+ elif isinstance(resolved, ir.Literal):
2000
+ return str(self._convert_value(resolved, quote_strings=quote_strings))
2001
+ elif isinstance(resolved, int):
2002
+ return str(resolved)
2003
+ else:
2004
+ return str(resolved) if isinstance(var, ir.Var) or not quote_strings else f"'{resolved}'"
2005
+
2006
+ def _get_update_aliases(self, update: ir.Update, var_to_construct, var_to_union, skip_type:bool=False):
2007
+ relation = update.relation
2008
+ return [
2009
+ self._get_alias(
2010
+ self._var_name(relation.id, f),
2011
+ arg,
2012
+ self._convert_type(f.type) if not skip_type else None,
2013
+ var_to_construct,
2014
+ var_to_union,
2015
+ )
2016
+ for f, arg in zip(relation.fields, update.args)
2017
+ ]
2018
+
2019
+ def _get_alias(self, key, arg, arg_type, var_to_construct, var_to_union):
2020
+ if not isinstance(arg, ir.Var):
2021
+ return OutputVar(arg, key, arg_type)
2022
+
2023
+ return OutputVar(arg, key, arg_type, var_to_construct.get(arg) or var_to_union.get(arg))
2024
+
2025
+ def _get_tuples(self, logical: ir.Logical, u: ir.Update):
2026
+ """
2027
+ Get a list of tuples to perform this update.
2028
+
2029
+ This function traverses the update args, assuming they contain only static values or
2030
+ variables bound to a construct task, and generates a list of tuples to insert. There
2031
+ may be multiple tuples because arguments can be lists of values bound to a field
2032
+ whose role is multi.
2033
+ """
2034
+ # TODO - this only works if the variable is bound to a Construct task, we need a more general approach.
2035
+
2036
+ def find_construct(var):
2037
+ for stmt in logical.body:
2038
+ if isinstance(stmt, ir.Construct) and stmt.id_var == var:
2039
+ return stmt
2040
+ return None
2041
+
2042
+ def resolve_value(arg):
2043
+ if isinstance(arg, ir.Var):
2044
+ construct = find_construct(arg)
2045
+ if not construct:
2046
+ return self._convert_value(arg)
2047
+
2048
+ resolved = []
2049
+ for val in construct.values:
2050
+ if isinstance(val, ir.Var):
2051
+ inner_construct = find_construct(val)
2052
+ if inner_construct:
2053
+ nested = [str(self._convert_value(x)) for x in inner_construct.values]
2054
+ resolved.append(f"hash({', '.join(nested)})")
2055
+ else:
2056
+ resolved.append(str(self._convert_value(val)))
2057
+ else:
2058
+ resolved.append(str(self._convert_value(val)))
2059
+
2060
+ return f"hash({', '.join(resolved)})"
2061
+ elif isinstance(arg, FrozenOrderedSet):
2062
+ return frozen(*[self._convert_value(v) for v in arg])
2063
+ else:
2064
+ return self._convert_value(arg)
2065
+
2066
+ values = [resolve_value(a) for a in u.args]
2067
+ return self._product(values)
2068
+
2069
+ def _product(self, values):
2070
+ """ Compute a cartesian product of values when the value is a FrozenOrderedSet. """
2071
+ # TODO - some pass needs to check that this is correct, i.e. that we are using a
2072
+ # FrozenOrderedSet only if the field is of role multi.
2073
+ tuples = [[]]
2074
+ for value in values:
2075
+ if isinstance(value, FrozenOrderedSet):
2076
+ tuples = [prev + [element] for prev in tuples for element in value]
2077
+ else:
2078
+ tuples = [prev + [value] for prev in tuples]
2079
+ return [tuple(t) for t in tuples]
2080
+
2081
+ def _convert_value(self, v, quote_strings:bool=True) -> str|int:
2082
+ """ Convert the literal value in v to a SQL value."""
2083
+ if isinstance(v, str):
2084
+ return f"'{v}'" if quote_strings else v
2085
+ if isinstance(v, PyDecimal):
2086
+ return str(v)
2087
+ if isinstance(v, ir.ScalarType):
2088
+ return f"'{v.name}'"
2089
+ if isinstance(v, ir.Literal):
2090
+ if v.type == types.Date:
2091
+ return f"cast('{v.value}' as date)"
2092
+ if v.type == types.DateTime:
2093
+ return f"cast('{v.value}' as datetime)"
2094
+ return self._convert_value(v.value, quote_strings)
2095
+ if isinstance(v, float):
2096
+ if math.isnan(v):
2097
+ return "cast('NaN' as DOUBLE)"
2098
+ elif v == float("inf"):
2099
+ return "cast('Infinity' as DOUBLE)"
2100
+ elif v == float("-inf"):
2101
+ return "cast('-Infinity' as DOUBLE)"
2102
+ return str(v)
2103
+ if isinstance(v, datetime.datetime):
2104
+ return f"cast('{v}' as datetime)"
2105
+ if isinstance(v, datetime.date):
2106
+ return f"cast('{v}' as date)"
2107
+ if isinstance(v, bool):
2108
+ return str(v).lower()
2109
+ if isinstance(v, int):
2110
+ return v
2111
+ return str(v)
2112
+
2113
+ COMMON_CONVERSION = {
2114
+ Hash: "DECIMAL(38, 0)",
2115
+ String: "TEXT",
2116
+ Number: "DOUBLE",
2117
+ Bool: "BOOLEAN",
2118
+ Date: "DATE",
2119
+ DateTime: "DATETIME",
2120
+ Float: "FLOAT(53)",
2121
+ RowId: "NUMBER(38, 0)", # NUMBER(38,0) cannot hold the full UInt128 range — it can only go up to about 2¹²⁶. We need to find something better.
2122
+ UInt128: "NUMBER(38, 0)" # NUMBER(38,0) cannot hold the full UInt128 range — it can only go up to about 2¹²⁶. We need to find something better.
2123
+ }
2124
+ SNOWFLAKE_OVERRIDES = {
2125
+ Int64: "NUMBER(19, 0)",
2126
+ Int128: "NUMBER(38, 0)",
2127
+ }
2128
+ DUCKDB_OVERRIDES = {
2129
+ Int64: "BIGINT",
2130
+ Int128: "HUGEINT",
2131
+ }
2132
+ SF_BUILTIN_CONVERSION = {**COMMON_CONVERSION, **SNOWFLAKE_OVERRIDES}
2133
+ DUCKDB_BUILTIN_CONVERSION = {**COMMON_CONVERSION, **DUCKDB_OVERRIDES}
2134
+ def _convert_type(self, t: ir.Type) -> str:
2135
+ """ Convert the type t into the equivalent SQL type."""
2136
+ # entities become DECIMAL(38, 0)
2137
+ if not types.is_builtin(t) and not types.is_value_type(t):
2138
+ return "DECIMAL(38, 0)"
2139
+
2140
+ # convert known builtins
2141
+ base_type = typer.to_base_primitive(t)
2142
+ if isinstance(base_type, ir.ScalarType):
2143
+ if self._is_duck_db and base_type in self.DUCKDB_BUILTIN_CONVERSION:
2144
+ return self.DUCKDB_BUILTIN_CONVERSION[base_type]
2145
+ elif base_type in self.SF_BUILTIN_CONVERSION:
2146
+ return self.SF_BUILTIN_CONVERSION[base_type]
2147
+ if isinstance(base_type, ir.DecimalType):
2148
+ return f"DECIMAL({base_type.precision},{base_type.scale})"
2149
+ raise Exception(f"Unknown built-in type: {t}")
2150
+
2151
+ def _get_relations(self, model: ir.Model) -> Tuple[list[ir.Relation], list[ir.Relation]]:
2152
+ rw = ReadWriteVisitor()
2153
+ model.accept(rw)
2154
+
2155
+ root = cast(ir.Logical, model.root)
2156
+
2157
+ # For query compilation exclude read-only tables because we do not need to declare `CREATE TABLE` statements
2158
+ used_relations = rw.writes(root) if self._query_compilation else rw.writes(root) | rw.reads(root)
2159
+
2160
+ # Filter only relations that require table creation
2161
+ table_relations = [
2162
+ r for r in used_relations
2163
+ if self._is_table_creation_required(r)
2164
+ ]
2165
+
2166
+ used_builtins = [
2167
+ r for r in rw.reads(root)
2168
+ if builtins.is_builtin(r)
2169
+ ]
2170
+
2171
+ return table_relations, used_builtins
2172
+
2173
+ def _is_table_creation_required(self, r: ir.Relation) -> bool:
2174
+ """
2175
+ Determine whether the given relation should result in a SQL table creation.
2176
+
2177
+ Skips creation for:
2178
+ - Built-in relations or annotations
2179
+ - CDC relations
2180
+ - Boxed types or special "rank" name
2181
+ - Relations with unresolved field types (types.Any)
2182
+ - ValueType population relations
2183
+ """
2184
+ if (
2185
+ builtins.is_builtin(r) or
2186
+ builtins.is_annotation(r) or
2187
+ from_cdc_annotation in r.annotations or
2188
+ r.name == "rank" or
2189
+ # TODO: revisit this during `RAI-39124`. For now we filter out all error relations.
2190
+ self._is_error_relation(r)
2191
+ ):
2192
+ return False
2193
+
2194
+ if any(relation_field.type == types.Any for relation_field in r.fields):
2195
+ if not r.overloads:
2196
+ raise ValueError(f"Relation '{r.name}' has unresolved field types (`types.Any`) and no overloads.")
2197
+ return False
2198
+
2199
+ return not self._is_value_type_population_relation(r)
2200
+
2201
+ def _is_error_relation(self, r: ir.Relation) -> bool:
2202
+ return r.name in self._error_relation_names or self._relation_name(r).startswith('error_')
2203
+
2204
+ @staticmethod
2205
+ def _is_value_type_population_relation(r: ir.Relation) -> bool:
2206
+ """
2207
+ Check if the relation is a ValueType population relation:
2208
+ - Has exactly one field
2209
+ - Field type is a value type
2210
+ - Annotated with concept_relation_annotation
2211
+ """
2212
+ if not r.fields or len(r.fields) != 1:
2213
+ return False
2214
+ return types.is_value_type(r.fields[0].type) and concept_relation_annotation in r.annotations
2215
+
2216
+ def _relation_name(self, relation: ir.Relation):
2217
+ if helpers.is_external(relation) or helpers.builtins.is_builtin(relation):
2218
+ return relation.name
2219
+ return self.relation_name_cache.get_name(relation.id, helpers.sanitize(relation.name), helpers.relation_name_prefix(relation))
2220
+
2221
+ def _register_external_relations(self, model: ir.Model):
2222
+ # force all external relations to get a name in the cache, so that internal relations
2223
+ # cannot use those names in _relation_name
2224
+ for r in model.relations:
2225
+ if helpers.is_external(r):
2226
+ self.relation_name_cache.get_name(r.id, r.name)
2227
+
2228
+ def _get_relation_info(self, relation: ir.Relation) -> RelationInfo:
2229
+ if relation not in self.relation_infos:
2230
+ self.relation_infos[relation] = RelationInfo()
2231
+ return self.relation_infos[relation]
2232
+
2233
+ def mark_used(self, relation: ir.Relation):
2234
+ self._get_relation_info(relation).used = True
2235
+
2236
+ def add_table_select(self, relation: ir.Relation, select: sql.Select):
2237
+ self._get_relation_info(relation).table_selects.append(select)
2238
+
2239
+ def add_view_select(self, relation: ir.Relation, select: sql.Select):
2240
+ self._get_relation_info(relation).view_selects.append(select)
2241
+
2242
+ def add_dynamic_table_select(self, relation: ir.Relation, select: sql.Select):
2243
+ self._get_relation_info(relation).dynamic_table_selects.append(select)
2244
+
2245
+ def _var_name(self, relation_id: int, arg: Union[ir.Var, ir.Field]):
2246
+ name = helpers.sanitize(self.relation_arg_name_cache.get_name((relation_id, arg.id), arg.name))
2247
+ return f'"{name}"' if name.lower() in {"any", "order"} else name
2248
+
2249
+ def _register_relation_args(self, relations: list[ir.Relation]):
2250
+ """
2251
+ Register all relation arguments in the cache to ensure they have unique names.
2252
+ This is necessary for SQL compilation to avoid name collisions.
2253
+ """
2254
+ self.relation_arg_name_cache = NameCache()
2255
+ for r in relations:
2256
+ for rel_field in r.fields:
2257
+ self.relation_arg_name_cache.get_name((r.id, rel_field.id), rel_field.name)
2258
+
2259
+ def _sort_builtin_lookups(self, lookups: list[ir.Lookup], output_vars: set[ir.Var]) -> list[ir.Lookup]:
2260
+ # Process lookups with output vars at last because they depend on other builtin lookups.
2261
+ lookups_with_output_vars = [lookup for lookup in lookups if builtins.is_eq(lookup.relation)
2262
+ and any(arg in output_vars for arg in lookup.args)]
2263
+ other_lookups = [lookup for lookup in lookups if lookup not in lookups_with_output_vars]
2264
+
2265
+ sorted_lookups = topological_sort(other_lookups, self._build_builtin_lookups_dependencies(other_lookups))
2266
+
2267
+ return sorted_lookups + lookups_with_output_vars
2268
+
2269
+ @staticmethod
2270
+ def _build_builtin_lookups_dependencies(lookups: list[ir.Lookup]) -> list[Tuple[ir.Lookup, ir.Lookup]]:
2271
+ """
2272
+ Builds dependency edges for topological_sort:
2273
+ 1. Terminal comparisons (neq, gt, lt, gte, lte) come last.
2274
+ 2. Conditionals (starts_with, contains, etc.) come after basic lookups but before terminals.
2275
+ 3. eq with only constants comes first.
2276
+ 4. eq with two vars must wait until one of them is grounded.
2277
+ 5. A lookup whose last argument is used non-terminally in another must come first.
2278
+ 6. For builtins that take multiple input arguments (like range, concat, substring, etc.),
2279
+ ensure that all non-terminal arguments are processed before the builtin that consumes them.
2280
+ """
2281
+
2282
+ edges = []
2283
+ arg_usages = defaultdict(list) # arg -> List[(lookup, position)]
2284
+
2285
+ terminal_relations = {
2286
+ builtins.neq.name, builtins.gt.name, builtins.lt.name, builtins.gte.name, builtins.lte.name
2287
+ }
2288
+
2289
+ conditional_relations = {
2290
+ builtins.starts_with.name, builtins.ends_with.name, builtins.contains.name, builtins.like_match.name
2291
+ }
2292
+
2293
+ # Step 1: Collect argument usage positions
2294
+ for lookup in lookups:
2295
+ for idx, arg in enumerate(lookup.args):
2296
+ arg_usages[arg].append((lookup, idx))
2297
+
2298
+ # Step 2: Add edges based on lookup semantics
2299
+ for lookup in lookups:
2300
+ relation_name = lookup.relation.name
2301
+ args = lookup.args
2302
+
2303
+ # Rule 1: Terminal relations depend on everything else
2304
+ if relation_name in terminal_relations:
2305
+ for other in lookups:
2306
+ other_name = other.relation.name
2307
+ if other is not lookup and other_name not in terminal_relations:
2308
+ edges.append((other, lookup))
2309
+ continue # skip rest of rules for terminal lookups
2310
+
2311
+ # Rule 2: Conditional relations go before terminals, but after others
2312
+ if relation_name in conditional_relations:
2313
+ for other in lookups:
2314
+ if other is not lookup:
2315
+ other_name = other.relation.name
2316
+ if other_name not in terminal_relations and other_name not in conditional_relations:
2317
+ edges.append((other, lookup)) # only non-conditional, non-terminal
2318
+ continue
2319
+
2320
+ if relation_name == builtins.eq.name:
2321
+ var_args = [arg for arg in args if isinstance(arg, ir.Var)]
2322
+
2323
+ # Rule 3: eq with only constants comes first
2324
+ if len(var_args) == 1:
2325
+ # This lookup defines a var — should come before any that use this var non-terminally
2326
+ grounded_var = var_args[0]
2327
+ for other, pos in arg_usages[grounded_var]:
2328
+ if other is not lookup:
2329
+ if pos != len(other.args) - 1:
2330
+ edges.append((lookup, other))
2331
+ continue # skip adding other edges among terminal assignments like a=2, b=2
2332
+
2333
+ # Rule 4: eq with two vars must wait until one of them is grounded
2334
+ elif len(var_args) == 2:
2335
+ # eq(x, y): both are vars — lookup must come after those grounding either var
2336
+ for var in var_args:
2337
+ for other, pos in arg_usages[var]:
2338
+ if other is not lookup:
2339
+ if other.args[-1] == var:
2340
+ edges.append((other, lookup))
2341
+ continue
2342
+
2343
+ # In generate builtins has a single output var but `split` returns `index` and `part`
2344
+ num_outputs = 2 if lookup.relation == builtins.split else 1
2345
+
2346
+ # Rule 5: last output args must go first if used elsewhere non-terminally
2347
+ for out_arg in args[-num_outputs:]:
2348
+ for other, pos in arg_usages.get(out_arg, []):
2349
+ if other is not lookup and pos != len(other.args) - 1:
2350
+ edges.append((lookup, other))
2351
+
2352
+ # Rule 6: builtins with multiple input args must wait until all input args are grounded,
2353
+ # for example, range(start, end, step, result)
2354
+ if len(args) > num_outputs:
2355
+ for input_arg in args[:-num_outputs]:
2356
+ for other, pos in arg_usages.get(input_arg, []):
2357
+ if other is not lookup:
2358
+ other_name = other.relation.name
2359
+ if other_name not in terminal_relations and other_name not in conditional_relations:
2360
+ # Ensure any lookup that defines this arg (as last) comes before
2361
+ if other.args[-1] == input_arg:
2362
+ edges.append((other, lookup))
2363
+
2364
+ return edges
2365
+
2366
+ def _union_output_selects(self, statements: list[sql.Node]) -> list[sql.Node]:
2367
+ """Group consecutive sql.Select nodes into a single sql.UnionAllSelect if there is multiple."""
2368
+ result: list[sql.Node] = []
2369
+ selects: list[sql.Select] = []
2370
+
2371
+ for statement in statements:
2372
+ if isinstance(statement, sql.Select):
2373
+ selects.append(statement)
2374
+ else:
2375
+ result.append(statement)
2376
+
2377
+ if selects:
2378
+ if len(selects) > 1:
2379
+ result.append(sql.UnionAllSelect(selects))
2380
+ else:
2381
+ result.extend(selects)
2382
+
2383
+ return result
2384
+
2385
+ def _sort_dependencies(self, statements: list[sql.Node]) -> list[sql.Node]:
2386
+ """
2387
+ Sorts SQL statements to ensure proper execution order:
2388
+ 1. CREATE TABLE statements
2389
+ 2. INSERT statements and CREATE VIEW (topologically sorted by dependencies)
2390
+ 3. UPDATE statements
2391
+ 3. Other statements except SELECT queries
2392
+ 4. SELECT queries
2393
+ """
2394
+ udfs = []
2395
+ create_tables = []
2396
+ need_sort: dict[str, list[Union[sql.Insert, sql.CreateView, sql.CreateDynamicTable]]] = defaultdict(list)
2397
+ updates = []
2398
+ miscellaneous_statements = []
2399
+ selects = []
2400
+
2401
+ for statement in statements:
2402
+ if isinstance(statement, sql.CreateTable):
2403
+ create_tables.append(statement)
2404
+ elif isinstance(statement, sql.Insert):
2405
+ need_sort[statement.table].append(statement)
2406
+ elif isinstance(statement, sql.CreateView):
2407
+ need_sort[statement.name].append(statement)
2408
+ elif isinstance(statement, sql.CreateDynamicTable):
2409
+ need_sort[statement.name].append(statement)
2410
+ elif isinstance(statement, sql.Update):
2411
+ updates.append(statement)
2412
+ elif isinstance(statement, sql.Select):
2413
+ selects.append(statement)
2414
+ elif isinstance(statement, sql.CreateFunction):
2415
+ udfs.append(statement)
2416
+ else:
2417
+ miscellaneous_statements.append(statement)
2418
+
2419
+ sorted_statements = self._sort_statements_dependency_graph(need_sort)
2420
+
2421
+ return udfs + create_tables + sorted_statements + updates + miscellaneous_statements + selects
2422
+
2423
+ @staticmethod
2424
+ def _sort_statements_dependency_graph(statements: dict[str, list[Union[sql.Insert, sql.CreateView, sql.CreateDynamicTable]]]) -> list[sql.Insert]:
2425
+ """ Topologic sort INSERT and CREATE VIEW statements based on dependencies in their SELECT FROM clauses. """
2426
+ edges = ordered_set()
2427
+ nodes = OrderedSet.from_iterable(statements.keys())
2428
+
2429
+ def extract_dependencies(select: Optional[sql.Select], target_table: str):
2430
+ """Recursively extract dependency edges from FROM, JOIN, and WHERE clauses."""
2431
+ if not select:
2432
+ return
2433
+
2434
+ def register_dependency(source_table: str):
2435
+ edges.add((source_table, target_table))
2436
+ nodes.add(source_table)
2437
+
2438
+ # Process FROM clause
2439
+ if select.froms:
2440
+ if isinstance(select.froms, sql.Select): # Single sub-select
2441
+ extract_dependencies(select.froms, target_table)
2442
+ else:
2443
+ for from_clause in select.froms:
2444
+ register_dependency(from_clause.table)
2445
+
2446
+ # Process JOIN clause
2447
+ if select.joins:
2448
+ for join in select.joins:
2449
+ register_dependency(join.table)
2450
+
2451
+ # Process WHERE clause recursively
2452
+ def _extract_from_expr(expr: sql.Expr):
2453
+ if isinstance(expr, sql.NotExists):
2454
+ extract_dependencies(expr.expr, target_table)
2455
+ elif isinstance(expr, (sql.And, sql.Or)):
2456
+ for sub_expr in expr.expr:
2457
+ _extract_from_expr(sub_expr)
2458
+
2459
+ if select.where and select.where.expression:
2460
+ _extract_from_expr(select.where.expression)
2461
+
2462
+ for target_table, table_statements in statements.items():
2463
+ for statement in table_statements:
2464
+ if statement.query:
2465
+ query = statement.query
2466
+ if isinstance(query, list):
2467
+ for sub_query in query:
2468
+ extract_dependencies(sub_query, target_table)
2469
+ elif isinstance(query, sql.Select):
2470
+ extract_dependencies(query, target_table)
2471
+ elif isinstance(query, sql.CTE):
2472
+ for select in query.selects:
2473
+ extract_dependencies(select, target_table)
2474
+
2475
+ sorted_tables = topological_sort(list(nodes), list(edges))
2476
+
2477
+ sorted_statements = []
2478
+ for table in sorted_tables:
2479
+ if table in statements:
2480
+ sorted_statements.extend(statements.get(table, []))
2481
+
2482
+ return sorted_statements
2483
+
2484
+ class RecursiveLookupsRewriter(v.Rewriter):
2485
+ def __init__(self, recursive_relation: ir.Relation, new_recursive_relation: ir.Relation):
2486
+ super().__init__()
2487
+ self._recursive_relation:ir.Relation = recursive_relation
2488
+ self._new_recursive_relation:ir.Relation = new_recursive_relation
2489
+
2490
+ def handle_lookup(self, node: ir.Lookup, parent: ir.Node):
2491
+ if node.relation == self._recursive_relation:
2492
+ return node.reconstruct(node.engine, self._new_recursive_relation, node.args, node.annotations)
2493
+ return node
2494
+
2495
+ class DerivedRelationsVisitor(v.Visitor):
2496
+ _is_derived: bool = True
2497
+
2498
+ def is_derived(self) -> bool:
2499
+ return self._is_derived
2500
+
2501
+ def visit_relation(self, node: ir.Relation, parent: Optional[ir.Node]):
2502
+ if self._is_derived and from_cdc_annotation in node.annotations:
2503
+ self._is_derived = False