relationalai 0.13.5__py3-none-any.whl → 1.0.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (856) hide show
  1. relationalai/__init__.py +1 -256
  2. relationalai/config/__init__.py +56 -0
  3. relationalai/config/config.py +289 -0
  4. relationalai/config/config_fields.py +86 -0
  5. relationalai/config/connections/__init__.py +46 -0
  6. relationalai/config/connections/base.py +23 -0
  7. relationalai/config/connections/duckdb.py +29 -0
  8. relationalai/config/connections/snowflake.py +243 -0
  9. relationalai/config/external/__init__.py +17 -0
  10. relationalai/config/external/dbt_converter.py +101 -0
  11. relationalai/config/external/dbt_models.py +93 -0
  12. relationalai/config/external/snowflake_converter.py +41 -0
  13. relationalai/config/external/snowflake_models.py +85 -0
  14. relationalai/config/external/utils.py +19 -0
  15. relationalai/config/shims.py +1 -0
  16. relationalai/semantics/__init__.py +146 -22
  17. relationalai/semantics/backends/lqp/annotations.py +11 -0
  18. relationalai/semantics/backends/sql/sql_compiler.py +327 -0
  19. relationalai/semantics/frontend/base.py +1719 -0
  20. relationalai/semantics/frontend/core.py +179 -0
  21. relationalai/semantics/frontend/front_compiler.py +1316 -0
  22. relationalai/semantics/frontend/pprint.py +408 -0
  23. relationalai/semantics/metamodel/__init__.py +6 -40
  24. relationalai/semantics/metamodel/builtins.py +206 -772
  25. relationalai/semantics/metamodel/metamodel.py +465 -0
  26. relationalai/semantics/metamodel/metamodel_analyzer.py +519 -0
  27. relationalai/semantics/metamodel/pprint.py +414 -0
  28. relationalai/semantics/metamodel/rewriter.py +266 -0
  29. relationalai/semantics/metamodel/typer.py +1213 -0
  30. relationalai/semantics/std/__init__.py +60 -40
  31. relationalai/semantics/std/aggregates.py +148 -0
  32. relationalai/semantics/std/common.py +44 -0
  33. relationalai/semantics/std/constraints.py +37 -43
  34. relationalai/semantics/std/datetime.py +249 -135
  35. relationalai/semantics/std/decimals.py +45 -52
  36. relationalai/semantics/std/floats.py +13 -5
  37. relationalai/semantics/std/integers.py +26 -11
  38. relationalai/semantics/std/math.py +183 -112
  39. relationalai/semantics/std/numbers.py +86 -0
  40. relationalai/semantics/std/re.py +80 -62
  41. relationalai/semantics/std/strings.py +101 -46
  42. relationalai/shims/executor.py +179 -0
  43. relationalai/shims/helpers.py +126 -0
  44. relationalai/shims/hoister.py +221 -0
  45. relationalai/shims/mm2v0.py +1394 -0
  46. relationalai/tools/cli/__init__.py +6 -0
  47. relationalai/tools/cli/cli.py +90 -0
  48. relationalai/tools/cli/components/__init__.py +5 -0
  49. relationalai/tools/cli/components/progress_reader.py +1524 -0
  50. relationalai/tools/cli/components/utils.py +58 -0
  51. relationalai/tools/cli/config_template.py +45 -0
  52. relationalai/tools/cli/dev.py +19 -0
  53. relationalai/tools/debugger.py +289 -183
  54. relationalai/tools/typer_debugger.py +93 -0
  55. relationalai/util/dataclasses.py +43 -0
  56. relationalai/util/docutils.py +40 -0
  57. relationalai/util/error.py +199 -0
  58. relationalai/util/format.py +48 -109
  59. relationalai/util/naming.py +145 -0
  60. relationalai/util/python.py +35 -0
  61. relationalai/util/runtime.py +156 -0
  62. relationalai/util/schema.py +197 -0
  63. relationalai/util/source.py +185 -0
  64. relationalai/util/structures.py +163 -0
  65. relationalai/util/tracing.py +261 -0
  66. relationalai-1.0.0a2.dist-info/METADATA +44 -0
  67. relationalai-1.0.0a2.dist-info/RECORD +489 -0
  68. relationalai-1.0.0a2.dist-info/WHEEL +5 -0
  69. relationalai-1.0.0a2.dist-info/entry_points.txt +3 -0
  70. relationalai-1.0.0a2.dist-info/top_level.txt +2 -0
  71. v0/relationalai/__init__.py +216 -0
  72. v0/relationalai/clients/__init__.py +5 -0
  73. v0/relationalai/clients/azure.py +477 -0
  74. v0/relationalai/clients/client.py +912 -0
  75. v0/relationalai/clients/config.py +673 -0
  76. v0/relationalai/clients/direct_access_client.py +118 -0
  77. v0/relationalai/clients/hash_util.py +31 -0
  78. v0/relationalai/clients/local.py +571 -0
  79. v0/relationalai/clients/profile_polling.py +73 -0
  80. v0/relationalai/clients/result_helpers.py +420 -0
  81. v0/relationalai/clients/snowflake.py +3869 -0
  82. v0/relationalai/clients/types.py +113 -0
  83. v0/relationalai/clients/use_index_poller.py +980 -0
  84. v0/relationalai/clients/util.py +356 -0
  85. v0/relationalai/debugging.py +389 -0
  86. v0/relationalai/dsl.py +1749 -0
  87. v0/relationalai/early_access/builder/__init__.py +30 -0
  88. v0/relationalai/early_access/builder/builder/__init__.py +35 -0
  89. v0/relationalai/early_access/builder/snowflake/__init__.py +12 -0
  90. v0/relationalai/early_access/builder/std/__init__.py +25 -0
  91. v0/relationalai/early_access/builder/std/decimals/__init__.py +12 -0
  92. v0/relationalai/early_access/builder/std/integers/__init__.py +12 -0
  93. v0/relationalai/early_access/builder/std/math/__init__.py +12 -0
  94. v0/relationalai/early_access/builder/std/strings/__init__.py +14 -0
  95. v0/relationalai/early_access/devtools/__init__.py +12 -0
  96. v0/relationalai/early_access/devtools/benchmark_lqp/__init__.py +12 -0
  97. v0/relationalai/early_access/devtools/extract_lqp/__init__.py +12 -0
  98. v0/relationalai/early_access/dsl/adapters/orm/adapter_qb.py +427 -0
  99. v0/relationalai/early_access/dsl/adapters/orm/parser.py +636 -0
  100. v0/relationalai/early_access/dsl/adapters/owl/adapter.py +176 -0
  101. v0/relationalai/early_access/dsl/adapters/owl/parser.py +160 -0
  102. v0/relationalai/early_access/dsl/bindings/common.py +402 -0
  103. v0/relationalai/early_access/dsl/bindings/csv.py +170 -0
  104. v0/relationalai/early_access/dsl/bindings/legacy/binding_models.py +143 -0
  105. v0/relationalai/early_access/dsl/bindings/snowflake.py +64 -0
  106. v0/relationalai/early_access/dsl/codegen/binder.py +411 -0
  107. v0/relationalai/early_access/dsl/codegen/common.py +79 -0
  108. v0/relationalai/early_access/dsl/codegen/helpers.py +23 -0
  109. v0/relationalai/early_access/dsl/codegen/relations.py +700 -0
  110. v0/relationalai/early_access/dsl/codegen/weaver.py +417 -0
  111. v0/relationalai/early_access/dsl/core/builders/__init__.py +47 -0
  112. v0/relationalai/early_access/dsl/core/builders/logic.py +19 -0
  113. v0/relationalai/early_access/dsl/core/builders/scalar_constraint.py +11 -0
  114. v0/relationalai/early_access/dsl/core/constraints/predicate/atomic.py +455 -0
  115. v0/relationalai/early_access/dsl/core/constraints/predicate/universal.py +73 -0
  116. v0/relationalai/early_access/dsl/core/constraints/scalar.py +310 -0
  117. v0/relationalai/early_access/dsl/core/context.py +13 -0
  118. v0/relationalai/early_access/dsl/core/cset.py +132 -0
  119. v0/relationalai/early_access/dsl/core/exprs/__init__.py +116 -0
  120. v0/relationalai/early_access/dsl/core/exprs/relational.py +18 -0
  121. v0/relationalai/early_access/dsl/core/exprs/scalar.py +412 -0
  122. v0/relationalai/early_access/dsl/core/instances.py +44 -0
  123. v0/relationalai/early_access/dsl/core/logic/__init__.py +193 -0
  124. v0/relationalai/early_access/dsl/core/logic/aggregation.py +98 -0
  125. v0/relationalai/early_access/dsl/core/logic/exists.py +223 -0
  126. v0/relationalai/early_access/dsl/core/logic/helper.py +163 -0
  127. v0/relationalai/early_access/dsl/core/namespaces.py +32 -0
  128. v0/relationalai/early_access/dsl/core/relations.py +276 -0
  129. v0/relationalai/early_access/dsl/core/rules.py +112 -0
  130. v0/relationalai/early_access/dsl/core/std/__init__.py +45 -0
  131. v0/relationalai/early_access/dsl/core/temporal/recall.py +6 -0
  132. v0/relationalai/early_access/dsl/core/types/__init__.py +270 -0
  133. v0/relationalai/early_access/dsl/core/types/concepts.py +128 -0
  134. v0/relationalai/early_access/dsl/core/types/constrained/__init__.py +267 -0
  135. v0/relationalai/early_access/dsl/core/types/constrained/nominal.py +143 -0
  136. v0/relationalai/early_access/dsl/core/types/constrained/subtype.py +124 -0
  137. v0/relationalai/early_access/dsl/core/types/standard.py +92 -0
  138. v0/relationalai/early_access/dsl/core/types/unconstrained.py +50 -0
  139. v0/relationalai/early_access/dsl/core/types/variables.py +203 -0
  140. v0/relationalai/early_access/dsl/ir/compiler.py +318 -0
  141. v0/relationalai/early_access/dsl/ir/executor.py +260 -0
  142. v0/relationalai/early_access/dsl/ontologies/constraints.py +88 -0
  143. v0/relationalai/early_access/dsl/ontologies/export.py +30 -0
  144. v0/relationalai/early_access/dsl/ontologies/models.py +453 -0
  145. v0/relationalai/early_access/dsl/ontologies/python_printer.py +303 -0
  146. v0/relationalai/early_access/dsl/ontologies/readings.py +60 -0
  147. v0/relationalai/early_access/dsl/ontologies/relationships.py +322 -0
  148. v0/relationalai/early_access/dsl/ontologies/roles.py +87 -0
  149. v0/relationalai/early_access/dsl/ontologies/subtyping.py +55 -0
  150. v0/relationalai/early_access/dsl/orm/constraints.py +438 -0
  151. v0/relationalai/early_access/dsl/orm/measures/dimensions.py +200 -0
  152. v0/relationalai/early_access/dsl/orm/measures/initializer.py +16 -0
  153. v0/relationalai/early_access/dsl/orm/measures/measure_rules.py +275 -0
  154. v0/relationalai/early_access/dsl/orm/measures/measures.py +299 -0
  155. v0/relationalai/early_access/dsl/orm/measures/role_exprs.py +268 -0
  156. v0/relationalai/early_access/dsl/orm/models.py +256 -0
  157. v0/relationalai/early_access/dsl/orm/object_oriented_printer.py +344 -0
  158. v0/relationalai/early_access/dsl/orm/printer.py +469 -0
  159. v0/relationalai/early_access/dsl/orm/reasoners.py +480 -0
  160. v0/relationalai/early_access/dsl/orm/relations.py +19 -0
  161. v0/relationalai/early_access/dsl/orm/relationships.py +251 -0
  162. v0/relationalai/early_access/dsl/orm/types.py +42 -0
  163. v0/relationalai/early_access/dsl/orm/utils.py +79 -0
  164. v0/relationalai/early_access/dsl/orm/verb.py +204 -0
  165. v0/relationalai/early_access/dsl/physical_metadata/tables.py +133 -0
  166. v0/relationalai/early_access/dsl/relations.py +170 -0
  167. v0/relationalai/early_access/dsl/rulesets.py +69 -0
  168. v0/relationalai/early_access/dsl/schemas/__init__.py +450 -0
  169. v0/relationalai/early_access/dsl/schemas/builder.py +48 -0
  170. v0/relationalai/early_access/dsl/schemas/comp_names.py +51 -0
  171. v0/relationalai/early_access/dsl/schemas/components.py +203 -0
  172. v0/relationalai/early_access/dsl/schemas/contexts.py +156 -0
  173. v0/relationalai/early_access/dsl/schemas/exprs.py +89 -0
  174. v0/relationalai/early_access/dsl/schemas/fragments.py +464 -0
  175. v0/relationalai/early_access/dsl/serialization.py +79 -0
  176. v0/relationalai/early_access/dsl/serialize/exporter.py +163 -0
  177. v0/relationalai/early_access/dsl/snow/api.py +104 -0
  178. v0/relationalai/early_access/dsl/snow/common.py +76 -0
  179. v0/relationalai/early_access/dsl/state_mgmt/__init__.py +129 -0
  180. v0/relationalai/early_access/dsl/state_mgmt/state_charts.py +125 -0
  181. v0/relationalai/early_access/dsl/state_mgmt/transitions.py +130 -0
  182. v0/relationalai/early_access/dsl/types/__init__.py +40 -0
  183. v0/relationalai/early_access/dsl/types/concepts.py +12 -0
  184. v0/relationalai/early_access/dsl/types/entities.py +135 -0
  185. v0/relationalai/early_access/dsl/types/values.py +17 -0
  186. v0/relationalai/early_access/dsl/utils.py +102 -0
  187. v0/relationalai/early_access/graphs/__init__.py +13 -0
  188. v0/relationalai/early_access/lqp/__init__.py +12 -0
  189. v0/relationalai/early_access/lqp/compiler/__init__.py +12 -0
  190. v0/relationalai/early_access/lqp/constructors/__init__.py +18 -0
  191. v0/relationalai/early_access/lqp/executor/__init__.py +12 -0
  192. v0/relationalai/early_access/lqp/ir/__init__.py +12 -0
  193. v0/relationalai/early_access/lqp/passes/__init__.py +12 -0
  194. v0/relationalai/early_access/lqp/pragmas/__init__.py +12 -0
  195. v0/relationalai/early_access/lqp/primitives/__init__.py +12 -0
  196. v0/relationalai/early_access/lqp/types/__init__.py +12 -0
  197. v0/relationalai/early_access/lqp/utils/__init__.py +12 -0
  198. v0/relationalai/early_access/lqp/validators/__init__.py +12 -0
  199. v0/relationalai/early_access/metamodel/__init__.py +58 -0
  200. v0/relationalai/early_access/metamodel/builtins/__init__.py +12 -0
  201. v0/relationalai/early_access/metamodel/compiler/__init__.py +12 -0
  202. v0/relationalai/early_access/metamodel/dependency/__init__.py +12 -0
  203. v0/relationalai/early_access/metamodel/factory/__init__.py +17 -0
  204. v0/relationalai/early_access/metamodel/helpers/__init__.py +12 -0
  205. v0/relationalai/early_access/metamodel/ir/__init__.py +14 -0
  206. v0/relationalai/early_access/metamodel/rewrite/__init__.py +7 -0
  207. v0/relationalai/early_access/metamodel/typer/__init__.py +3 -0
  208. v0/relationalai/early_access/metamodel/typer/typer/__init__.py +12 -0
  209. v0/relationalai/early_access/metamodel/types/__init__.py +15 -0
  210. v0/relationalai/early_access/metamodel/util/__init__.py +15 -0
  211. v0/relationalai/early_access/metamodel/visitor/__init__.py +12 -0
  212. v0/relationalai/early_access/rel/__init__.py +12 -0
  213. v0/relationalai/early_access/rel/executor/__init__.py +12 -0
  214. v0/relationalai/early_access/rel/rel_utils/__init__.py +12 -0
  215. v0/relationalai/early_access/rel/rewrite/__init__.py +7 -0
  216. v0/relationalai/early_access/solvers/__init__.py +19 -0
  217. v0/relationalai/early_access/sql/__init__.py +11 -0
  218. v0/relationalai/early_access/sql/executor/__init__.py +3 -0
  219. v0/relationalai/early_access/sql/rewrite/__init__.py +3 -0
  220. v0/relationalai/early_access/tests/logging/__init__.py +12 -0
  221. v0/relationalai/early_access/tests/test_snapshot_base/__init__.py +12 -0
  222. v0/relationalai/early_access/tests/utils/__init__.py +12 -0
  223. v0/relationalai/environments/__init__.py +35 -0
  224. v0/relationalai/environments/base.py +381 -0
  225. v0/relationalai/environments/colab.py +14 -0
  226. v0/relationalai/environments/generic.py +71 -0
  227. v0/relationalai/environments/ipython.py +68 -0
  228. v0/relationalai/environments/jupyter.py +9 -0
  229. v0/relationalai/environments/snowbook.py +169 -0
  230. v0/relationalai/errors.py +2478 -0
  231. v0/relationalai/experimental/SF.py +38 -0
  232. v0/relationalai/experimental/inspect.py +47 -0
  233. v0/relationalai/experimental/pathfinder/__init__.py +158 -0
  234. v0/relationalai/experimental/pathfinder/api.py +160 -0
  235. v0/relationalai/experimental/pathfinder/automaton.py +584 -0
  236. v0/relationalai/experimental/pathfinder/bridge.py +226 -0
  237. v0/relationalai/experimental/pathfinder/compiler.py +416 -0
  238. v0/relationalai/experimental/pathfinder/datalog.py +214 -0
  239. v0/relationalai/experimental/pathfinder/diagnostics.py +56 -0
  240. v0/relationalai/experimental/pathfinder/filter.py +236 -0
  241. v0/relationalai/experimental/pathfinder/glushkov.py +439 -0
  242. v0/relationalai/experimental/pathfinder/options.py +265 -0
  243. v0/relationalai/experimental/pathfinder/rpq.py +344 -0
  244. v0/relationalai/experimental/pathfinder/transition.py +200 -0
  245. v0/relationalai/experimental/pathfinder/utils.py +26 -0
  246. v0/relationalai/experimental/paths/api.py +143 -0
  247. v0/relationalai/experimental/paths/benchmarks/grid_graph.py +37 -0
  248. v0/relationalai/experimental/paths/examples/basic_example.py +40 -0
  249. v0/relationalai/experimental/paths/examples/minimal_engine_warmup.py +3 -0
  250. v0/relationalai/experimental/paths/examples/movie_example.py +77 -0
  251. v0/relationalai/experimental/paths/examples/paths_benchmark.py +115 -0
  252. v0/relationalai/experimental/paths/examples/paths_example.py +116 -0
  253. v0/relationalai/experimental/paths/examples/pattern_to_automaton.py +28 -0
  254. v0/relationalai/experimental/paths/find_paths_via_automaton.py +85 -0
  255. v0/relationalai/experimental/paths/graph.py +185 -0
  256. v0/relationalai/experimental/paths/path_algorithms/find_paths.py +280 -0
  257. v0/relationalai/experimental/paths/path_algorithms/one_sided_ball_repetition.py +26 -0
  258. v0/relationalai/experimental/paths/path_algorithms/one_sided_ball_upto.py +111 -0
  259. v0/relationalai/experimental/paths/path_algorithms/single.py +59 -0
  260. v0/relationalai/experimental/paths/path_algorithms/two_sided_balls_repetition.py +39 -0
  261. v0/relationalai/experimental/paths/path_algorithms/two_sided_balls_upto.py +103 -0
  262. v0/relationalai/experimental/paths/path_algorithms/usp-old.py +130 -0
  263. v0/relationalai/experimental/paths/path_algorithms/usp-tuple.py +183 -0
  264. v0/relationalai/experimental/paths/path_algorithms/usp.py +150 -0
  265. v0/relationalai/experimental/paths/product_graph.py +93 -0
  266. v0/relationalai/experimental/paths/rpq/automaton.py +584 -0
  267. v0/relationalai/experimental/paths/rpq/diagnostics.py +56 -0
  268. v0/relationalai/experimental/paths/rpq/rpq.py +378 -0
  269. v0/relationalai/experimental/paths/tests/tests_limit_sp_max_length.py +90 -0
  270. v0/relationalai/experimental/paths/tests/tests_limit_sp_multiple.py +119 -0
  271. v0/relationalai/experimental/paths/tests/tests_limit_sp_single.py +104 -0
  272. v0/relationalai/experimental/paths/tests/tests_limit_walks_multiple.py +113 -0
  273. v0/relationalai/experimental/paths/tests/tests_limit_walks_single.py +149 -0
  274. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_multiple.py +70 -0
  275. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_single.py +64 -0
  276. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_upto_multiple.py +115 -0
  277. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_upto_single.py +75 -0
  278. v0/relationalai/experimental/paths/tests/tests_single_paths.py +152 -0
  279. v0/relationalai/experimental/paths/tests/tests_single_walks.py +208 -0
  280. v0/relationalai/experimental/paths/tests/tests_single_walks_undirected.py +297 -0
  281. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_multiple.py +107 -0
  282. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_single.py +76 -0
  283. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_upto_multiple.py +76 -0
  284. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_upto_single.py +110 -0
  285. v0/relationalai/experimental/paths/tests/tests_usp_nsp_multiple.py +229 -0
  286. v0/relationalai/experimental/paths/tests/tests_usp_nsp_single.py +108 -0
  287. v0/relationalai/experimental/paths/tree_agg.py +168 -0
  288. v0/relationalai/experimental/paths/utilities/iterators.py +27 -0
  289. v0/relationalai/experimental/paths/utilities/prefix_sum.py +91 -0
  290. v0/relationalai/experimental/solvers.py +1087 -0
  291. v0/relationalai/loaders/csv.py +195 -0
  292. v0/relationalai/loaders/loader.py +177 -0
  293. v0/relationalai/loaders/types.py +23 -0
  294. v0/relationalai/rel_emitter.py +373 -0
  295. v0/relationalai/rel_utils.py +185 -0
  296. v0/relationalai/semantics/__init__.py +29 -0
  297. v0/relationalai/semantics/devtools/benchmark_lqp.py +536 -0
  298. v0/relationalai/semantics/devtools/compilation_manager.py +294 -0
  299. v0/relationalai/semantics/devtools/extract_lqp.py +110 -0
  300. v0/relationalai/semantics/internal/internal.py +3785 -0
  301. v0/relationalai/semantics/internal/snowflake.py +325 -0
  302. v0/relationalai/semantics/lqp/builtins.py +16 -0
  303. v0/relationalai/semantics/lqp/compiler.py +22 -0
  304. v0/relationalai/semantics/lqp/constructors.py +68 -0
  305. v0/relationalai/semantics/lqp/executor.py +474 -0
  306. v0/relationalai/semantics/lqp/intrinsics.py +24 -0
  307. v0/relationalai/semantics/lqp/ir.py +124 -0
  308. v0/relationalai/semantics/lqp/model2lqp.py +877 -0
  309. v0/relationalai/semantics/lqp/passes.py +680 -0
  310. v0/relationalai/semantics/lqp/primitives.py +252 -0
  311. v0/relationalai/semantics/lqp/result_helpers.py +202 -0
  312. v0/relationalai/semantics/lqp/rewrite/__init__.py +18 -0
  313. v0/relationalai/semantics/lqp/rewrite/annotate_constraints.py +57 -0
  314. v0/relationalai/semantics/lqp/rewrite/cdc.py +216 -0
  315. v0/relationalai/semantics/lqp/rewrite/extract_common.py +338 -0
  316. v0/relationalai/semantics/lqp/rewrite/extract_keys.py +490 -0
  317. v0/relationalai/semantics/lqp/rewrite/function_annotations.py +114 -0
  318. v0/relationalai/semantics/lqp/rewrite/functional_dependencies.py +314 -0
  319. v0/relationalai/semantics/lqp/rewrite/quantify_vars.py +296 -0
  320. v0/relationalai/semantics/lqp/rewrite/splinter.py +76 -0
  321. v0/relationalai/semantics/lqp/types.py +101 -0
  322. v0/relationalai/semantics/lqp/utils.py +160 -0
  323. v0/relationalai/semantics/lqp/validators.py +57 -0
  324. v0/relationalai/semantics/metamodel/__init__.py +40 -0
  325. v0/relationalai/semantics/metamodel/builtins.py +776 -0
  326. v0/relationalai/semantics/metamodel/compiler.py +133 -0
  327. v0/relationalai/semantics/metamodel/dependency.py +862 -0
  328. v0/relationalai/semantics/metamodel/executor.py +61 -0
  329. v0/relationalai/semantics/metamodel/factory.py +287 -0
  330. v0/relationalai/semantics/metamodel/helpers.py +361 -0
  331. v0/relationalai/semantics/metamodel/ir.py +923 -0
  332. v0/relationalai/semantics/metamodel/rewrite/__init__.py +7 -0
  333. v0/relationalai/semantics/metamodel/rewrite/discharge_constraints.py +39 -0
  334. v0/relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +210 -0
  335. v0/relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py +78 -0
  336. v0/relationalai/semantics/metamodel/rewrite/flatten.py +554 -0
  337. v0/relationalai/semantics/metamodel/rewrite/format_outputs.py +165 -0
  338. v0/relationalai/semantics/metamodel/typer/checker.py +353 -0
  339. v0/relationalai/semantics/metamodel/typer/typer.py +1395 -0
  340. v0/relationalai/semantics/metamodel/util.py +505 -0
  341. v0/relationalai/semantics/metamodel/visitor.py +944 -0
  342. v0/relationalai/semantics/reasoners/__init__.py +10 -0
  343. v0/relationalai/semantics/reasoners/graph/__init__.py +37 -0
  344. v0/relationalai/semantics/reasoners/graph/core.py +9019 -0
  345. v0/relationalai/semantics/reasoners/optimization/__init__.py +68 -0
  346. v0/relationalai/semantics/reasoners/optimization/common.py +88 -0
  347. v0/relationalai/semantics/reasoners/optimization/solvers_dev.py +568 -0
  348. v0/relationalai/semantics/reasoners/optimization/solvers_pb.py +1163 -0
  349. v0/relationalai/semantics/rel/builtins.py +40 -0
  350. v0/relationalai/semantics/rel/compiler.py +989 -0
  351. v0/relationalai/semantics/rel/executor.py +359 -0
  352. v0/relationalai/semantics/rel/rel.py +482 -0
  353. v0/relationalai/semantics/rel/rel_utils.py +276 -0
  354. v0/relationalai/semantics/snowflake/__init__.py +3 -0
  355. v0/relationalai/semantics/sql/compiler.py +2503 -0
  356. v0/relationalai/semantics/sql/executor/duck_db.py +52 -0
  357. v0/relationalai/semantics/sql/executor/result_helpers.py +64 -0
  358. v0/relationalai/semantics/sql/executor/snowflake.py +145 -0
  359. v0/relationalai/semantics/sql/rewrite/denormalize.py +222 -0
  360. v0/relationalai/semantics/sql/rewrite/double_negation.py +49 -0
  361. v0/relationalai/semantics/sql/rewrite/recursive_union.py +127 -0
  362. v0/relationalai/semantics/sql/rewrite/sort_output_query.py +246 -0
  363. v0/relationalai/semantics/sql/sql.py +504 -0
  364. v0/relationalai/semantics/std/__init__.py +54 -0
  365. v0/relationalai/semantics/std/constraints.py +43 -0
  366. v0/relationalai/semantics/std/datetime.py +363 -0
  367. v0/relationalai/semantics/std/decimals.py +62 -0
  368. v0/relationalai/semantics/std/floats.py +7 -0
  369. v0/relationalai/semantics/std/integers.py +22 -0
  370. v0/relationalai/semantics/std/math.py +141 -0
  371. v0/relationalai/semantics/std/pragmas.py +11 -0
  372. v0/relationalai/semantics/std/re.py +83 -0
  373. v0/relationalai/semantics/std/std.py +14 -0
  374. v0/relationalai/semantics/std/strings.py +63 -0
  375. v0/relationalai/semantics/tests/__init__.py +0 -0
  376. v0/relationalai/semantics/tests/test_snapshot_abstract.py +143 -0
  377. v0/relationalai/semantics/tests/test_snapshot_base.py +9 -0
  378. v0/relationalai/semantics/tests/utils.py +46 -0
  379. v0/relationalai/std/__init__.py +70 -0
  380. v0/relationalai/tools/__init__.py +0 -0
  381. v0/relationalai/tools/cli.py +1940 -0
  382. v0/relationalai/tools/cli_controls.py +1826 -0
  383. v0/relationalai/tools/cli_helpers.py +390 -0
  384. v0/relationalai/tools/debugger.py +183 -0
  385. v0/relationalai/tools/debugger_client.py +109 -0
  386. v0/relationalai/tools/debugger_server.py +302 -0
  387. v0/relationalai/tools/dev.py +685 -0
  388. v0/relationalai/tools/qb_debugger.py +425 -0
  389. v0/relationalai/util/clean_up_databases.py +95 -0
  390. v0/relationalai/util/format.py +123 -0
  391. v0/relationalai/util/list_databases.py +9 -0
  392. v0/relationalai/util/otel_configuration.py +25 -0
  393. v0/relationalai/util/otel_handler.py +484 -0
  394. v0/relationalai/util/snowflake_handler.py +88 -0
  395. v0/relationalai/util/span_format_test.py +43 -0
  396. v0/relationalai/util/span_tracker.py +207 -0
  397. v0/relationalai/util/spans_file_handler.py +72 -0
  398. v0/relationalai/util/tracing_handler.py +34 -0
  399. frontend/debugger/dist/.gitignore +0 -2
  400. frontend/debugger/dist/assets/favicon-Dy0ZgA6N.png +0 -0
  401. frontend/debugger/dist/assets/index-Cssla-O7.js +0 -208
  402. frontend/debugger/dist/assets/index-DlHsYx1V.css +0 -9
  403. frontend/debugger/dist/index.html +0 -17
  404. relationalai/clients/__init__.py +0 -18
  405. relationalai/clients/client.py +0 -946
  406. relationalai/clients/config.py +0 -673
  407. relationalai/clients/direct_access_client.py +0 -118
  408. relationalai/clients/exec_txn_poller.py +0 -153
  409. relationalai/clients/hash_util.py +0 -31
  410. relationalai/clients/local.py +0 -594
  411. relationalai/clients/profile_polling.py +0 -73
  412. relationalai/clients/resources/__init__.py +0 -8
  413. relationalai/clients/resources/azure/azure.py +0 -502
  414. relationalai/clients/resources/snowflake/__init__.py +0 -20
  415. relationalai/clients/resources/snowflake/cli_resources.py +0 -98
  416. relationalai/clients/resources/snowflake/direct_access_resources.py +0 -739
  417. relationalai/clients/resources/snowflake/engine_service.py +0 -381
  418. relationalai/clients/resources/snowflake/engine_state_handlers.py +0 -315
  419. relationalai/clients/resources/snowflake/error_handlers.py +0 -240
  420. relationalai/clients/resources/snowflake/export_procedure.py.jinja +0 -249
  421. relationalai/clients/resources/snowflake/resources_factory.py +0 -99
  422. relationalai/clients/resources/snowflake/snowflake.py +0 -3193
  423. relationalai/clients/resources/snowflake/use_index_poller.py +0 -1019
  424. relationalai/clients/resources/snowflake/use_index_resources.py +0 -188
  425. relationalai/clients/resources/snowflake/util.py +0 -387
  426. relationalai/clients/result_helpers.py +0 -420
  427. relationalai/clients/types.py +0 -118
  428. relationalai/clients/util.py +0 -356
  429. relationalai/debugging.py +0 -389
  430. relationalai/dsl.py +0 -1749
  431. relationalai/early_access/builder/__init__.py +0 -30
  432. relationalai/early_access/builder/builder/__init__.py +0 -35
  433. relationalai/early_access/builder/snowflake/__init__.py +0 -12
  434. relationalai/early_access/builder/std/__init__.py +0 -25
  435. relationalai/early_access/builder/std/decimals/__init__.py +0 -12
  436. relationalai/early_access/builder/std/integers/__init__.py +0 -12
  437. relationalai/early_access/builder/std/math/__init__.py +0 -12
  438. relationalai/early_access/builder/std/strings/__init__.py +0 -14
  439. relationalai/early_access/devtools/__init__.py +0 -12
  440. relationalai/early_access/devtools/benchmark_lqp/__init__.py +0 -12
  441. relationalai/early_access/devtools/extract_lqp/__init__.py +0 -12
  442. relationalai/early_access/dsl/adapters/orm/adapter_qb.py +0 -427
  443. relationalai/early_access/dsl/adapters/orm/parser.py +0 -636
  444. relationalai/early_access/dsl/adapters/owl/adapter.py +0 -176
  445. relationalai/early_access/dsl/adapters/owl/parser.py +0 -160
  446. relationalai/early_access/dsl/bindings/common.py +0 -402
  447. relationalai/early_access/dsl/bindings/csv.py +0 -170
  448. relationalai/early_access/dsl/bindings/legacy/binding_models.py +0 -143
  449. relationalai/early_access/dsl/bindings/snowflake.py +0 -64
  450. relationalai/early_access/dsl/codegen/binder.py +0 -411
  451. relationalai/early_access/dsl/codegen/common.py +0 -79
  452. relationalai/early_access/dsl/codegen/helpers.py +0 -23
  453. relationalai/early_access/dsl/codegen/relations.py +0 -700
  454. relationalai/early_access/dsl/codegen/weaver.py +0 -417
  455. relationalai/early_access/dsl/core/builders/__init__.py +0 -47
  456. relationalai/early_access/dsl/core/builders/logic.py +0 -19
  457. relationalai/early_access/dsl/core/builders/scalar_constraint.py +0 -11
  458. relationalai/early_access/dsl/core/constraints/predicate/atomic.py +0 -455
  459. relationalai/early_access/dsl/core/constraints/predicate/universal.py +0 -73
  460. relationalai/early_access/dsl/core/constraints/scalar.py +0 -310
  461. relationalai/early_access/dsl/core/context.py +0 -13
  462. relationalai/early_access/dsl/core/cset.py +0 -132
  463. relationalai/early_access/dsl/core/exprs/__init__.py +0 -116
  464. relationalai/early_access/dsl/core/exprs/relational.py +0 -18
  465. relationalai/early_access/dsl/core/exprs/scalar.py +0 -412
  466. relationalai/early_access/dsl/core/instances.py +0 -44
  467. relationalai/early_access/dsl/core/logic/__init__.py +0 -193
  468. relationalai/early_access/dsl/core/logic/aggregation.py +0 -98
  469. relationalai/early_access/dsl/core/logic/exists.py +0 -223
  470. relationalai/early_access/dsl/core/logic/helper.py +0 -163
  471. relationalai/early_access/dsl/core/namespaces.py +0 -32
  472. relationalai/early_access/dsl/core/relations.py +0 -276
  473. relationalai/early_access/dsl/core/rules.py +0 -112
  474. relationalai/early_access/dsl/core/std/__init__.py +0 -45
  475. relationalai/early_access/dsl/core/temporal/recall.py +0 -6
  476. relationalai/early_access/dsl/core/types/__init__.py +0 -270
  477. relationalai/early_access/dsl/core/types/concepts.py +0 -128
  478. relationalai/early_access/dsl/core/types/constrained/__init__.py +0 -267
  479. relationalai/early_access/dsl/core/types/constrained/nominal.py +0 -143
  480. relationalai/early_access/dsl/core/types/constrained/subtype.py +0 -124
  481. relationalai/early_access/dsl/core/types/standard.py +0 -92
  482. relationalai/early_access/dsl/core/types/unconstrained.py +0 -50
  483. relationalai/early_access/dsl/core/types/variables.py +0 -203
  484. relationalai/early_access/dsl/ir/compiler.py +0 -318
  485. relationalai/early_access/dsl/ir/executor.py +0 -260
  486. relationalai/early_access/dsl/ontologies/constraints.py +0 -88
  487. relationalai/early_access/dsl/ontologies/export.py +0 -30
  488. relationalai/early_access/dsl/ontologies/models.py +0 -453
  489. relationalai/early_access/dsl/ontologies/python_printer.py +0 -303
  490. relationalai/early_access/dsl/ontologies/readings.py +0 -60
  491. relationalai/early_access/dsl/ontologies/relationships.py +0 -322
  492. relationalai/early_access/dsl/ontologies/roles.py +0 -87
  493. relationalai/early_access/dsl/ontologies/subtyping.py +0 -55
  494. relationalai/early_access/dsl/orm/constraints.py +0 -438
  495. relationalai/early_access/dsl/orm/measures/dimensions.py +0 -200
  496. relationalai/early_access/dsl/orm/measures/initializer.py +0 -16
  497. relationalai/early_access/dsl/orm/measures/measure_rules.py +0 -275
  498. relationalai/early_access/dsl/orm/measures/measures.py +0 -299
  499. relationalai/early_access/dsl/orm/measures/role_exprs.py +0 -268
  500. relationalai/early_access/dsl/orm/models.py +0 -256
  501. relationalai/early_access/dsl/orm/object_oriented_printer.py +0 -344
  502. relationalai/early_access/dsl/orm/printer.py +0 -469
  503. relationalai/early_access/dsl/orm/reasoners.py +0 -480
  504. relationalai/early_access/dsl/orm/relations.py +0 -19
  505. relationalai/early_access/dsl/orm/relationships.py +0 -251
  506. relationalai/early_access/dsl/orm/types.py +0 -42
  507. relationalai/early_access/dsl/orm/utils.py +0 -79
  508. relationalai/early_access/dsl/orm/verb.py +0 -204
  509. relationalai/early_access/dsl/physical_metadata/tables.py +0 -133
  510. relationalai/early_access/dsl/relations.py +0 -170
  511. relationalai/early_access/dsl/rulesets.py +0 -69
  512. relationalai/early_access/dsl/schemas/__init__.py +0 -450
  513. relationalai/early_access/dsl/schemas/builder.py +0 -48
  514. relationalai/early_access/dsl/schemas/comp_names.py +0 -51
  515. relationalai/early_access/dsl/schemas/components.py +0 -203
  516. relationalai/early_access/dsl/schemas/contexts.py +0 -156
  517. relationalai/early_access/dsl/schemas/exprs.py +0 -89
  518. relationalai/early_access/dsl/schemas/fragments.py +0 -464
  519. relationalai/early_access/dsl/serialization.py +0 -79
  520. relationalai/early_access/dsl/serialize/exporter.py +0 -163
  521. relationalai/early_access/dsl/snow/api.py +0 -105
  522. relationalai/early_access/dsl/snow/common.py +0 -76
  523. relationalai/early_access/dsl/state_mgmt/__init__.py +0 -129
  524. relationalai/early_access/dsl/state_mgmt/state_charts.py +0 -125
  525. relationalai/early_access/dsl/state_mgmt/transitions.py +0 -130
  526. relationalai/early_access/dsl/types/__init__.py +0 -40
  527. relationalai/early_access/dsl/types/concepts.py +0 -12
  528. relationalai/early_access/dsl/types/entities.py +0 -135
  529. relationalai/early_access/dsl/types/values.py +0 -17
  530. relationalai/early_access/dsl/utils.py +0 -102
  531. relationalai/early_access/graphs/__init__.py +0 -13
  532. relationalai/early_access/lqp/__init__.py +0 -12
  533. relationalai/early_access/lqp/compiler/__init__.py +0 -12
  534. relationalai/early_access/lqp/constructors/__init__.py +0 -18
  535. relationalai/early_access/lqp/executor/__init__.py +0 -12
  536. relationalai/early_access/lqp/ir/__init__.py +0 -12
  537. relationalai/early_access/lqp/passes/__init__.py +0 -12
  538. relationalai/early_access/lqp/pragmas/__init__.py +0 -12
  539. relationalai/early_access/lqp/primitives/__init__.py +0 -12
  540. relationalai/early_access/lqp/types/__init__.py +0 -12
  541. relationalai/early_access/lqp/utils/__init__.py +0 -12
  542. relationalai/early_access/lqp/validators/__init__.py +0 -12
  543. relationalai/early_access/metamodel/__init__.py +0 -58
  544. relationalai/early_access/metamodel/builtins/__init__.py +0 -12
  545. relationalai/early_access/metamodel/compiler/__init__.py +0 -12
  546. relationalai/early_access/metamodel/dependency/__init__.py +0 -12
  547. relationalai/early_access/metamodel/factory/__init__.py +0 -17
  548. relationalai/early_access/metamodel/helpers/__init__.py +0 -12
  549. relationalai/early_access/metamodel/ir/__init__.py +0 -14
  550. relationalai/early_access/metamodel/rewrite/__init__.py +0 -7
  551. relationalai/early_access/metamodel/typer/__init__.py +0 -3
  552. relationalai/early_access/metamodel/typer/typer/__init__.py +0 -12
  553. relationalai/early_access/metamodel/types/__init__.py +0 -15
  554. relationalai/early_access/metamodel/util/__init__.py +0 -15
  555. relationalai/early_access/metamodel/visitor/__init__.py +0 -12
  556. relationalai/early_access/rel/__init__.py +0 -12
  557. relationalai/early_access/rel/executor/__init__.py +0 -12
  558. relationalai/early_access/rel/rel_utils/__init__.py +0 -12
  559. relationalai/early_access/rel/rewrite/__init__.py +0 -7
  560. relationalai/early_access/solvers/__init__.py +0 -19
  561. relationalai/early_access/sql/__init__.py +0 -11
  562. relationalai/early_access/sql/executor/__init__.py +0 -3
  563. relationalai/early_access/sql/rewrite/__init__.py +0 -3
  564. relationalai/early_access/tests/logging/__init__.py +0 -12
  565. relationalai/early_access/tests/test_snapshot_base/__init__.py +0 -12
  566. relationalai/early_access/tests/utils/__init__.py +0 -12
  567. relationalai/environments/__init__.py +0 -35
  568. relationalai/environments/base.py +0 -381
  569. relationalai/environments/colab.py +0 -14
  570. relationalai/environments/generic.py +0 -71
  571. relationalai/environments/ipython.py +0 -68
  572. relationalai/environments/jupyter.py +0 -9
  573. relationalai/environments/snowbook.py +0 -169
  574. relationalai/errors.py +0 -2496
  575. relationalai/experimental/SF.py +0 -38
  576. relationalai/experimental/inspect.py +0 -47
  577. relationalai/experimental/pathfinder/__init__.py +0 -158
  578. relationalai/experimental/pathfinder/api.py +0 -160
  579. relationalai/experimental/pathfinder/automaton.py +0 -584
  580. relationalai/experimental/pathfinder/bridge.py +0 -226
  581. relationalai/experimental/pathfinder/compiler.py +0 -416
  582. relationalai/experimental/pathfinder/datalog.py +0 -214
  583. relationalai/experimental/pathfinder/diagnostics.py +0 -56
  584. relationalai/experimental/pathfinder/filter.py +0 -236
  585. relationalai/experimental/pathfinder/glushkov.py +0 -439
  586. relationalai/experimental/pathfinder/options.py +0 -265
  587. relationalai/experimental/pathfinder/pathfinder-v0.7.0.rel +0 -1951
  588. relationalai/experimental/pathfinder/rpq.py +0 -344
  589. relationalai/experimental/pathfinder/transition.py +0 -200
  590. relationalai/experimental/pathfinder/utils.py +0 -26
  591. relationalai/experimental/paths/README.md +0 -107
  592. relationalai/experimental/paths/api.py +0 -143
  593. relationalai/experimental/paths/benchmarks/grid_graph.py +0 -37
  594. relationalai/experimental/paths/code_organization.md +0 -2
  595. relationalai/experimental/paths/examples/Movies.ipynb +0 -16328
  596. relationalai/experimental/paths/examples/basic_example.py +0 -40
  597. relationalai/experimental/paths/examples/minimal_engine_warmup.py +0 -3
  598. relationalai/experimental/paths/examples/movie_example.py +0 -77
  599. relationalai/experimental/paths/examples/movies_data/actedin.csv +0 -193
  600. relationalai/experimental/paths/examples/movies_data/directed.csv +0 -45
  601. relationalai/experimental/paths/examples/movies_data/follows.csv +0 -7
  602. relationalai/experimental/paths/examples/movies_data/movies.csv +0 -39
  603. relationalai/experimental/paths/examples/movies_data/person.csv +0 -134
  604. relationalai/experimental/paths/examples/movies_data/produced.csv +0 -16
  605. relationalai/experimental/paths/examples/movies_data/ratings.csv +0 -10
  606. relationalai/experimental/paths/examples/movies_data/wrote.csv +0 -11
  607. relationalai/experimental/paths/examples/paths_benchmark.py +0 -115
  608. relationalai/experimental/paths/examples/paths_example.py +0 -116
  609. relationalai/experimental/paths/examples/pattern_to_automaton.py +0 -28
  610. relationalai/experimental/paths/find_paths_via_automaton.py +0 -85
  611. relationalai/experimental/paths/graph.py +0 -185
  612. relationalai/experimental/paths/path_algorithms/find_paths.py +0 -280
  613. relationalai/experimental/paths/path_algorithms/one_sided_ball_repetition.py +0 -26
  614. relationalai/experimental/paths/path_algorithms/one_sided_ball_upto.py +0 -111
  615. relationalai/experimental/paths/path_algorithms/single.py +0 -59
  616. relationalai/experimental/paths/path_algorithms/two_sided_balls_repetition.py +0 -39
  617. relationalai/experimental/paths/path_algorithms/two_sided_balls_upto.py +0 -103
  618. relationalai/experimental/paths/path_algorithms/usp-old.py +0 -130
  619. relationalai/experimental/paths/path_algorithms/usp-tuple.py +0 -183
  620. relationalai/experimental/paths/path_algorithms/usp.py +0 -150
  621. relationalai/experimental/paths/product_graph.py +0 -93
  622. relationalai/experimental/paths/rpq/automaton.py +0 -584
  623. relationalai/experimental/paths/rpq/diagnostics.py +0 -56
  624. relationalai/experimental/paths/rpq/rpq.py +0 -378
  625. relationalai/experimental/paths/tests/tests_limit_sp_max_length.py +0 -90
  626. relationalai/experimental/paths/tests/tests_limit_sp_multiple.py +0 -119
  627. relationalai/experimental/paths/tests/tests_limit_sp_single.py +0 -104
  628. relationalai/experimental/paths/tests/tests_limit_walks_multiple.py +0 -113
  629. relationalai/experimental/paths/tests/tests_limit_walks_single.py +0 -149
  630. relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_multiple.py +0 -70
  631. relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_single.py +0 -64
  632. relationalai/experimental/paths/tests/tests_one_sided_ball_upto_multiple.py +0 -115
  633. relationalai/experimental/paths/tests/tests_one_sided_ball_upto_single.py +0 -75
  634. relationalai/experimental/paths/tests/tests_single_paths.py +0 -152
  635. relationalai/experimental/paths/tests/tests_single_walks.py +0 -208
  636. relationalai/experimental/paths/tests/tests_single_walks_undirected.py +0 -297
  637. relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_multiple.py +0 -107
  638. relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_single.py +0 -76
  639. relationalai/experimental/paths/tests/tests_two_sided_balls_upto_multiple.py +0 -76
  640. relationalai/experimental/paths/tests/tests_two_sided_balls_upto_single.py +0 -110
  641. relationalai/experimental/paths/tests/tests_usp_nsp_multiple.py +0 -229
  642. relationalai/experimental/paths/tests/tests_usp_nsp_single.py +0 -108
  643. relationalai/experimental/paths/tree_agg.py +0 -168
  644. relationalai/experimental/paths/utilities/iterators.py +0 -27
  645. relationalai/experimental/paths/utilities/prefix_sum.py +0 -91
  646. relationalai/experimental/solvers.py +0 -1095
  647. relationalai/loaders/csv.py +0 -195
  648. relationalai/loaders/loader.py +0 -177
  649. relationalai/loaders/types.py +0 -23
  650. relationalai/rel_emitter.py +0 -373
  651. relationalai/rel_utils.py +0 -185
  652. relationalai/semantics/designs/query_builder/identify_by.md +0 -106
  653. relationalai/semantics/devtools/benchmark_lqp.py +0 -535
  654. relationalai/semantics/devtools/compilation_manager.py +0 -294
  655. relationalai/semantics/devtools/extract_lqp.py +0 -110
  656. relationalai/semantics/internal/internal.py +0 -3785
  657. relationalai/semantics/internal/snowflake.py +0 -329
  658. relationalai/semantics/lqp/README.md +0 -34
  659. relationalai/semantics/lqp/algorithms.py +0 -173
  660. relationalai/semantics/lqp/builtins.py +0 -213
  661. relationalai/semantics/lqp/compiler.py +0 -22
  662. relationalai/semantics/lqp/constructors.py +0 -68
  663. relationalai/semantics/lqp/executor.py +0 -518
  664. relationalai/semantics/lqp/export_rewriter.py +0 -40
  665. relationalai/semantics/lqp/intrinsics.py +0 -24
  666. relationalai/semantics/lqp/ir.py +0 -150
  667. relationalai/semantics/lqp/model2lqp.py +0 -1056
  668. relationalai/semantics/lqp/passes.py +0 -38
  669. relationalai/semantics/lqp/primitives.py +0 -252
  670. relationalai/semantics/lqp/result_helpers.py +0 -266
  671. relationalai/semantics/lqp/rewrite/__init__.py +0 -32
  672. relationalai/semantics/lqp/rewrite/algorithm.py +0 -385
  673. relationalai/semantics/lqp/rewrite/annotate_constraints.py +0 -69
  674. relationalai/semantics/lqp/rewrite/cdc.py +0 -216
  675. relationalai/semantics/lqp/rewrite/constants_to_vars.py +0 -70
  676. relationalai/semantics/lqp/rewrite/deduplicate_vars.py +0 -104
  677. relationalai/semantics/lqp/rewrite/eliminate_data.py +0 -108
  678. relationalai/semantics/lqp/rewrite/extract_common.py +0 -340
  679. relationalai/semantics/lqp/rewrite/extract_keys.py +0 -577
  680. relationalai/semantics/lqp/rewrite/flatten_script.py +0 -301
  681. relationalai/semantics/lqp/rewrite/function_annotations.py +0 -114
  682. relationalai/semantics/lqp/rewrite/functional_dependencies.py +0 -348
  683. relationalai/semantics/lqp/rewrite/period_math.py +0 -77
  684. relationalai/semantics/lqp/rewrite/quantify_vars.py +0 -339
  685. relationalai/semantics/lqp/rewrite/splinter.py +0 -76
  686. relationalai/semantics/lqp/rewrite/unify_definitions.py +0 -323
  687. relationalai/semantics/lqp/types.py +0 -101
  688. relationalai/semantics/lqp/utils.py +0 -170
  689. relationalai/semantics/lqp/validators.py +0 -70
  690. relationalai/semantics/metamodel/compiler.py +0 -134
  691. relationalai/semantics/metamodel/dependency.py +0 -880
  692. relationalai/semantics/metamodel/executor.py +0 -78
  693. relationalai/semantics/metamodel/factory.py +0 -287
  694. relationalai/semantics/metamodel/helpers.py +0 -368
  695. relationalai/semantics/metamodel/ir.py +0 -924
  696. relationalai/semantics/metamodel/rewrite/__init__.py +0 -8
  697. relationalai/semantics/metamodel/rewrite/discharge_constraints.py +0 -39
  698. relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +0 -220
  699. relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py +0 -78
  700. relationalai/semantics/metamodel/rewrite/flatten.py +0 -590
  701. relationalai/semantics/metamodel/rewrite/format_outputs.py +0 -256
  702. relationalai/semantics/metamodel/rewrite/handle_aggregations_and_ranks.py +0 -237
  703. relationalai/semantics/metamodel/typer/checker.py +0 -355
  704. relationalai/semantics/metamodel/typer/typer.py +0 -1396
  705. relationalai/semantics/metamodel/util.py +0 -506
  706. relationalai/semantics/metamodel/visitor.py +0 -945
  707. relationalai/semantics/reasoners/__init__.py +0 -10
  708. relationalai/semantics/reasoners/graph/README.md +0 -620
  709. relationalai/semantics/reasoners/graph/__init__.py +0 -37
  710. relationalai/semantics/reasoners/graph/core.py +0 -9019
  711. relationalai/semantics/reasoners/graph/design/beyond_demand_transform.md +0 -797
  712. relationalai/semantics/reasoners/graph/tests/README.md +0 -21
  713. relationalai/semantics/reasoners/optimization/__init__.py +0 -68
  714. relationalai/semantics/reasoners/optimization/common.py +0 -88
  715. relationalai/semantics/reasoners/optimization/solvers_dev.py +0 -568
  716. relationalai/semantics/reasoners/optimization/solvers_pb.py +0 -1407
  717. relationalai/semantics/rel/builtins.py +0 -40
  718. relationalai/semantics/rel/compiler.py +0 -994
  719. relationalai/semantics/rel/executor.py +0 -363
  720. relationalai/semantics/rel/rel.py +0 -482
  721. relationalai/semantics/rel/rel_utils.py +0 -276
  722. relationalai/semantics/snowflake/__init__.py +0 -3
  723. relationalai/semantics/sql/compiler.py +0 -2503
  724. relationalai/semantics/sql/executor/duck_db.py +0 -52
  725. relationalai/semantics/sql/executor/result_helpers.py +0 -64
  726. relationalai/semantics/sql/executor/snowflake.py +0 -149
  727. relationalai/semantics/sql/rewrite/denormalize.py +0 -222
  728. relationalai/semantics/sql/rewrite/double_negation.py +0 -49
  729. relationalai/semantics/sql/rewrite/recursive_union.py +0 -127
  730. relationalai/semantics/sql/rewrite/sort_output_query.py +0 -246
  731. relationalai/semantics/sql/sql.py +0 -504
  732. relationalai/semantics/std/pragmas.py +0 -11
  733. relationalai/semantics/std/std.py +0 -14
  734. relationalai/semantics/tests/lqp/algorithms.py +0 -345
  735. relationalai/semantics/tests/test_snapshot_abstract.py +0 -144
  736. relationalai/semantics/tests/test_snapshot_base.py +0 -9
  737. relationalai/semantics/tests/utils.py +0 -46
  738. relationalai/std/__init__.py +0 -70
  739. relationalai/tools/cli.py +0 -2089
  740. relationalai/tools/cli_controls.py +0 -1975
  741. relationalai/tools/cli_helpers.py +0 -802
  742. relationalai/tools/debugger_client.py +0 -109
  743. relationalai/tools/debugger_server.py +0 -302
  744. relationalai/tools/dev.py +0 -685
  745. relationalai/tools/notes +0 -7
  746. relationalai/tools/qb_debugger.py +0 -425
  747. relationalai/tools/txn_progress.py +0 -188
  748. relationalai/util/clean_up_databases.py +0 -95
  749. relationalai/util/list_databases.py +0 -9
  750. relationalai/util/otel_configuration.py +0 -26
  751. relationalai/util/otel_handler.py +0 -484
  752. relationalai/util/snowflake_handler.py +0 -88
  753. relationalai/util/span_format_test.py +0 -43
  754. relationalai/util/span_tracker.py +0 -207
  755. relationalai/util/spans_file_handler.py +0 -72
  756. relationalai/util/tracing_handler.py +0 -34
  757. relationalai-0.13.5.dist-info/METADATA +0 -74
  758. relationalai-0.13.5.dist-info/RECORD +0 -473
  759. relationalai-0.13.5.dist-info/WHEEL +0 -4
  760. relationalai-0.13.5.dist-info/entry_points.txt +0 -3
  761. relationalai-0.13.5.dist-info/licenses/LICENSE +0 -202
  762. relationalai_test_util/__init__.py +0 -4
  763. relationalai_test_util/fixtures.py +0 -233
  764. relationalai_test_util/snapshot.py +0 -252
  765. relationalai_test_util/traceback.py +0 -118
  766. /relationalai/{analysis → semantics/frontend}/__init__.py +0 -0
  767. /relationalai/{auth/__init__.py → semantics/metamodel/metamodel_compiler.py} +0 -0
  768. /relationalai/{early_access → shims}/__init__.py +0 -0
  769. {relationalai/early_access/dsl/adapters → v0/relationalai/analysis}/__init__.py +0 -0
  770. {relationalai → v0/relationalai}/analysis/mechanistic.py +0 -0
  771. {relationalai → v0/relationalai}/analysis/whynot.py +0 -0
  772. {relationalai/early_access/dsl/adapters/orm → v0/relationalai/auth}/__init__.py +0 -0
  773. {relationalai → v0/relationalai}/auth/jwt_generator.py +0 -0
  774. {relationalai → v0/relationalai}/auth/oauth_callback_server.py +0 -0
  775. {relationalai → v0/relationalai}/auth/token_handler.py +0 -0
  776. {relationalai → v0/relationalai}/auth/util.py +0 -0
  777. {relationalai/clients/resources/snowflake → v0/relationalai/clients}/cache_store.py +0 -0
  778. {relationalai → v0/relationalai}/compiler.py +0 -0
  779. {relationalai → v0/relationalai}/dependencies.py +0 -0
  780. {relationalai → v0/relationalai}/docutils.py +0 -0
  781. {relationalai/early_access/dsl/adapters/owl → v0/relationalai/early_access}/__init__.py +0 -0
  782. {relationalai → v0/relationalai}/early_access/dsl/__init__.py +0 -0
  783. {relationalai/early_access/dsl/bindings → v0/relationalai/early_access/dsl/adapters}/__init__.py +0 -0
  784. {relationalai/early_access/dsl/bindings/legacy → v0/relationalai/early_access/dsl/adapters/orm}/__init__.py +0 -0
  785. {relationalai → v0/relationalai}/early_access/dsl/adapters/orm/model.py +0 -0
  786. {relationalai/early_access/dsl/codegen → v0/relationalai/early_access/dsl/adapters/owl}/__init__.py +0 -0
  787. {relationalai → v0/relationalai}/early_access/dsl/adapters/owl/model.py +0 -0
  788. {relationalai/early_access/dsl/core/temporal → v0/relationalai/early_access/dsl/bindings}/__init__.py +0 -0
  789. {relationalai/early_access/dsl/ir → v0/relationalai/early_access/dsl/bindings/legacy}/__init__.py +0 -0
  790. {relationalai/early_access/dsl/ontologies → v0/relationalai/early_access/dsl/codegen}/__init__.py +0 -0
  791. {relationalai → v0/relationalai}/early_access/dsl/constants.py +0 -0
  792. {relationalai → v0/relationalai}/early_access/dsl/core/__init__.py +0 -0
  793. {relationalai → v0/relationalai}/early_access/dsl/core/constraints/__init__.py +0 -0
  794. {relationalai → v0/relationalai}/early_access/dsl/core/constraints/predicate/__init__.py +0 -0
  795. {relationalai → v0/relationalai}/early_access/dsl/core/stack.py +0 -0
  796. {relationalai/early_access/dsl/orm → v0/relationalai/early_access/dsl/core/temporal}/__init__.py +0 -0
  797. {relationalai → v0/relationalai}/early_access/dsl/core/utils.py +0 -0
  798. {relationalai/early_access/dsl/orm/measures → v0/relationalai/early_access/dsl/ir}/__init__.py +0 -0
  799. {relationalai/early_access/dsl/physical_metadata → v0/relationalai/early_access/dsl/ontologies}/__init__.py +0 -0
  800. {relationalai → v0/relationalai}/early_access/dsl/ontologies/raw_source.py +0 -0
  801. {relationalai/early_access/dsl/serialize → v0/relationalai/early_access/dsl/orm}/__init__.py +0 -0
  802. {relationalai/early_access/dsl/snow → v0/relationalai/early_access/dsl/orm/measures}/__init__.py +0 -0
  803. {relationalai → v0/relationalai}/early_access/dsl/orm/reasoner_errors.py +0 -0
  804. {relationalai/loaders → v0/relationalai/early_access/dsl/physical_metadata}/__init__.py +0 -0
  805. {relationalai/semantics/tests → v0/relationalai/early_access/dsl/serialize}/__init__.py +0 -0
  806. {relationalai → v0/relationalai}/early_access/dsl/serialize/binding_model.py +0 -0
  807. {relationalai → v0/relationalai}/early_access/dsl/serialize/model.py +0 -0
  808. {relationalai/semantics/tests/lqp → v0/relationalai/early_access/dsl/snow}/__init__.py +0 -0
  809. {relationalai → v0/relationalai}/early_access/tests/__init__.py +0 -0
  810. {relationalai → v0/relationalai}/environments/ci.py +0 -0
  811. {relationalai → v0/relationalai}/environments/hex.py +0 -0
  812. {relationalai → v0/relationalai}/environments/terminal.py +0 -0
  813. {relationalai → v0/relationalai}/experimental/__init__.py +0 -0
  814. {relationalai → v0/relationalai}/experimental/graphs.py +0 -0
  815. {relationalai → v0/relationalai}/experimental/paths/__init__.py +0 -0
  816. {relationalai → v0/relationalai}/experimental/paths/benchmarks/__init__.py +0 -0
  817. {relationalai → v0/relationalai}/experimental/paths/path_algorithms/__init__.py +0 -0
  818. {relationalai → v0/relationalai}/experimental/paths/rpq/__init__.py +0 -0
  819. {relationalai → v0/relationalai}/experimental/paths/rpq/filter.py +0 -0
  820. {relationalai → v0/relationalai}/experimental/paths/rpq/glushkov.py +0 -0
  821. {relationalai → v0/relationalai}/experimental/paths/rpq/transition.py +0 -0
  822. {relationalai → v0/relationalai}/experimental/paths/utilities/__init__.py +0 -0
  823. {relationalai → v0/relationalai}/experimental/paths/utilities/utilities.py +0 -0
  824. {relationalai/tools → v0/relationalai/loaders}/__init__.py +0 -0
  825. {relationalai → v0/relationalai}/metagen.py +0 -0
  826. {relationalai → v0/relationalai}/metamodel.py +0 -0
  827. {relationalai → v0/relationalai}/rel.py +0 -0
  828. {relationalai → v0/relationalai}/semantics/devtools/__init__.py +0 -0
  829. {relationalai → v0/relationalai}/semantics/internal/__init__.py +0 -0
  830. {relationalai → v0/relationalai}/semantics/internal/annotations.py +0 -0
  831. {relationalai → v0/relationalai}/semantics/lqp/__init__.py +0 -0
  832. {relationalai → v0/relationalai}/semantics/lqp/pragmas.py +0 -0
  833. {relationalai → v0/relationalai}/semantics/metamodel/dataflow.py +0 -0
  834. {relationalai → v0/relationalai}/semantics/metamodel/typer/__init__.py +0 -0
  835. {relationalai → v0/relationalai}/semantics/metamodel/types.py +0 -0
  836. {relationalai → v0/relationalai}/semantics/reasoners/experimental/__init__.py +0 -0
  837. {relationalai → v0/relationalai}/semantics/rel/__init__.py +0 -0
  838. {relationalai → v0/relationalai}/semantics/sql/__init__.py +0 -0
  839. {relationalai → v0/relationalai}/semantics/sql/executor/__init__.py +0 -0
  840. {relationalai → v0/relationalai}/semantics/sql/rewrite/__init__.py +0 -0
  841. {relationalai → v0/relationalai}/semantics/tests/logging.py +0 -0
  842. {relationalai → v0/relationalai}/std/aggregates.py +0 -0
  843. {relationalai → v0/relationalai}/std/dates.py +0 -0
  844. {relationalai → v0/relationalai}/std/graphs.py +0 -0
  845. {relationalai → v0/relationalai}/std/inspect.py +0 -0
  846. {relationalai → v0/relationalai}/std/math.py +0 -0
  847. {relationalai → v0/relationalai}/std/re.py +0 -0
  848. {relationalai → v0/relationalai}/std/strings.py +0 -0
  849. {relationalai → v0/relationalai}/tools/cleanup_snapshots.py +0 -0
  850. {relationalai → v0/relationalai}/tools/constants.py +0 -0
  851. {relationalai → v0/relationalai}/tools/query_utils.py +0 -0
  852. {relationalai → v0/relationalai}/tools/snapshot_viewer.py +0 -0
  853. {relationalai → v0/relationalai}/util/__init__.py +0 -0
  854. {relationalai → v0/relationalai}/util/constants.py +0 -0
  855. {relationalai → v0/relationalai}/util/graph.py +0 -0
  856. {relationalai → v0/relationalai}/util/timeout.py +0 -0
@@ -1,3193 +0,0 @@
1
- # pyright: reportUnusedExpression=false
2
- from __future__ import annotations
3
- import base64
4
- import importlib.resources
5
- import io
6
- import re
7
- import json
8
- import time
9
- import textwrap
10
- import ast
11
- import uuid
12
- import warnings
13
- import atexit
14
- import hashlib
15
- from dataclasses import dataclass
16
-
17
- from ....auth.token_handler import TokenHandler
18
- from ....clients.exec_txn_poller import ExecTxnPoller
19
- import snowflake.snowpark
20
-
21
- from ....rel_utils import sanitize_identifier, to_fqn_relation_name
22
- from ....tools.constants import FIELD_PLACEHOLDER, SNOWFLAKE_AUTHS, USE_GRAPH_INDEX, DEFAULT_QUERY_TIMEOUT_MINS, WAIT_FOR_STREAM_SYNC, Generation
23
- from .... import std
24
- from collections import defaultdict
25
- import requests
26
- import snowflake.connector
27
- import pyarrow as pa
28
-
29
- from snowflake.snowpark import Session
30
- from snowflake.snowpark.context import get_active_session
31
- from ... import result_helpers
32
- from .... import debugging
33
- from typing import Any, Dict, Iterable, Tuple, List, Literal, cast
34
-
35
- from pandas import DataFrame
36
-
37
- from ....tools.cli_controls import Spinner
38
- from ...types import AvailableModel, EngineState, Import, ImportSource, ImportSourceTable, ImportsStatus, SourceInfo, TransactionAsyncResponse
39
- from ...config import Config
40
- from ...client import Client, ExportParams, ProviderBase, ResourcesBase
41
- from ...util import IdentityParser, escape_for_f_string, get_pyrel_version, get_with_retries, poll_with_specified_overhead, safe_json_loads, sanitize_module_name, scrub_exception, wrap_with_request_id, normalize_datetime
42
- from .engine_service import EngineServiceSQL, EngineType
43
- from .util import (
44
- collect_error_messages,
45
- process_jinja_template,
46
- type_to_sql,
47
- type_to_snowpark,
48
- sanitize_user_name as _sanitize_user_name,
49
- normalize_params,
50
- format_sproc_name,
51
- is_azure_url,
52
- is_container_runtime,
53
- imports_to_dicts,
54
- txn_list_to_dicts,
55
- decrypt_artifact,
56
- )
57
- from ....environments import runtime_env, HexEnvironment, SnowbookEnvironment
58
- from .... import dsl, rel, metamodel as m
59
- from ....errors import EngineProvisioningFailed, EngineNameValidationException, Errors, GuardRailsException, InvalidAliasError, InvalidEngineSizeError, InvalidSourceTypeWarning, RAIException, HexSessionException, SnowflakeChangeTrackingNotEnabledException, SnowflakeDatabaseException, SnowflakeImportMissingException, SnowflakeInvalidSource, SnowflakeMissingConfigValuesException, SnowflakeProxyAPIDeprecationWarning, SnowflakeProxySourceError, ModelNotFoundException, UnknownSourceWarning, RowsDroppedFromTargetTableWarning, QueryTimeoutExceededException
60
- from concurrent.futures import ThreadPoolExecutor
61
- from datetime import datetime, timedelta
62
- from snowflake.snowpark.types import StringType, StructField, StructType
63
- # Import error handlers and constants
64
- from .error_handlers import (
65
- ErrorHandler,
66
- DuoSecurityErrorHandler,
67
- AppMissingErrorHandler,
68
- AppFunctionMissingErrorHandler,
69
- DatabaseErrorsHandler,
70
- EngineErrorsHandler,
71
- ServiceNotStartedErrorHandler,
72
- TransactionAbortedErrorHandler,
73
- )
74
- # Import engine state handlers
75
- from .engine_state_handlers import (
76
- EngineStateHandler,
77
- EngineContext,
78
- SyncPendingStateHandler,
79
- SyncSuspendedStateHandler,
80
- SyncReadyStateHandler,
81
- SyncGoneStateHandler,
82
- SyncMissingEngineHandler,
83
- AsyncPendingStateHandler,
84
- AsyncSuspendedStateHandler,
85
- AsyncReadyStateHandler,
86
- AsyncGoneStateHandler,
87
- AsyncMissingEngineHandler,
88
- )
89
-
90
-
91
- #--------------------------------------------------
92
- # Constants
93
- #--------------------------------------------------
94
-
95
- # transaction list and get return different fields (duration vs timings)
96
- LIST_TXN_SQL_FIELDS = ["id", "database_name", "engine_name", "state", "abort_reason", "read_only","created_by", "created_on", "finished_at", "duration"]
97
- GET_TXN_SQL_FIELDS = ["id", "database", "engine", "state", "abort_reason", "read_only","created_by", "created_on", "finished_at", "timings"]
98
- VALID_ENGINE_STATES = ["READY", "PENDING"]
99
- # Note: ENGINE_ERRORS, ENGINE_NOT_READY_MSGS, DATABASE_ERRORS moved to util.py
100
- PYREL_ROOT_DB = 'pyrel_root_db'
101
-
102
- TERMINAL_TXN_STATES = ["COMPLETED", "ABORTED"]
103
-
104
- TXN_ABORT_REASON_TIMEOUT = "transaction timeout"
105
- GUARDRAILS_ABORT_REASON = "guard rail violation"
106
-
107
- ENABLE_GUARD_RAILS_FLAG = "enable_guard_rails"
108
-
109
- ENABLE_GUARD_RAILS_HEADER = "X-RAI-Enable-Guard-Rails"
110
-
111
- #--------------------------------------------------
112
- # Helpers
113
- #--------------------------------------------------
114
-
115
- def should_enable_guard_rails(config) -> bool:
116
- return bool(config.get(ENABLE_GUARD_RAILS_FLAG, False))
117
-
118
- #--------------------------------------------------
119
- # Resources
120
- #--------------------------------------------------
121
-
122
- APP_NAME = "___RAI_APP___"
123
-
124
- @dataclass
125
- class ExecContext:
126
- """Execution context for SQL queries, containing all parameters needed for execution and retry."""
127
- code: str
128
- params: List[Any] | None = None
129
- raw: bool = False
130
- help: bool = True
131
- skip_engine_db_error_retry: bool = False
132
-
133
- def re_execute(self, resources: 'Resources') -> Any:
134
- """Re-execute this context's query using the provided resources instance."""
135
- return resources._exec(
136
- code=self.code,
137
- params=self.params,
138
- raw=self.raw,
139
- help=self.help,
140
- skip_engine_db_error_retry=self.skip_engine_db_error_retry
141
- )
142
-
143
-
144
- @dataclass
145
- class TxnCreationResult:
146
- """Result of creating a transaction via _create_v2_txn.
147
-
148
- This standardizes the response format between different implementations
149
- (SQL stored procedure vs HTTP direct access).
150
- """
151
- txn_id: str
152
- state: str
153
- artifact_info: Dict[str, Dict] # Populated if fast-path (state is COMPLETED/ABORTED)
154
-
155
-
156
- @dataclass
157
- class TxnStatusResponse:
158
- """Transaction progress response for transaction status checks."""
159
- txn_id: str
160
- finished: bool
161
- abort_reason: str | None = None
162
- progress: Dict | None = None
163
-
164
- class Resources(ResourcesBase):
165
- def __init__(
166
- self,
167
- profile: str | None = None,
168
- config: Config | None = None,
169
- connection: Session | None = None,
170
- dry_run: bool = False,
171
- reset_session: bool = False,
172
- generation: Generation | None = None,
173
- language: str = "rel", # Accepted for backward compatibility, but not stored in base class
174
- ):
175
- super().__init__(profile, config=config)
176
- self._token_handler: TokenHandler | None = None
177
- self._session = connection
178
- self.generation = generation
179
- if self._session is None and not dry_run:
180
- try:
181
- # we may still be constructing the config, so this can fail now,
182
- # if so we'll create later
183
- self._session = self.get_sf_session(reset_session)
184
- except Exception:
185
- pass
186
- self._pending_transactions: list[str] = []
187
- self._ns_cache = {}
188
- # self.sources contains fully qualified Snowflake table/view names
189
- self.sources: set[str] = set()
190
- self._sproc_models = None
191
- # Store language for backward compatibility (used by child classes for use_index polling)
192
- self.language = language
193
- # Engine subsystem (composition: keeps engine CRUD isolated from the core Resources class)
194
- self._engines = EngineServiceSQL(self)
195
- # Register error and state handlers
196
- self._register_handlers()
197
- # Register atexit callback to cancel pending transactions
198
- atexit.register(self.cancel_pending_transactions)
199
-
200
- @property
201
- def engines(self) -> EngineServiceSQL:
202
- return self._engines
203
-
204
- #--------------------------------------------------
205
- # Initialization & Properties
206
- #--------------------------------------------------
207
-
208
- def _register_handlers(self) -> None:
209
- """Register error and engine state handlers for processing."""
210
- # Register base handlers using getter methods that subclasses can override
211
- # Use defensive copying to ensure each instance has its own handler lists
212
- # and prevent cross-instance contamination from subclass mutations
213
- self._error_handlers = list(self._get_error_handlers())
214
- self._sync_engine_state_handlers = list(self._get_engine_state_handlers(is_async=False))
215
- self._async_engine_state_handlers = list(self._get_engine_state_handlers(is_async=True))
216
-
217
- def _get_error_handlers(self) -> list[ErrorHandler]:
218
- """Get list of error handlers. Subclasses can override to add custom handlers.
219
-
220
- Returns:
221
- List of error handlers for standard error processing using Strategy Pattern.
222
-
223
- Example:
224
- def _get_error_handlers(self) -> list[ErrorHandler]:
225
- # Get base handlers
226
- handlers = super()._get_error_handlers()
227
- # Add custom handler
228
- handlers.append(MyCustomErrorHandler())
229
- return handlers
230
- """
231
- return [
232
- AppMissingErrorHandler(),
233
- AppFunctionMissingErrorHandler(),
234
- ServiceNotStartedErrorHandler(),
235
- DuoSecurityErrorHandler(),
236
- DatabaseErrorsHandler(),
237
- EngineErrorsHandler(),
238
- TransactionAbortedErrorHandler(),
239
- ]
240
-
241
- def _get_engine_state_handlers(self, is_async: bool = False) -> list[EngineStateHandler]:
242
- """Get list of engine state handlers. Subclasses can override.
243
-
244
- Args:
245
- is_async: If True, returns async handlers; if False, returns sync handlers.
246
-
247
- Returns:
248
- List of engine state handlers for processing engine states.
249
-
250
- Example:
251
- def _get_engine_state_handlers(self, is_async: bool = False) -> list[EngineStateHandler]:
252
- # Get base handlers
253
- handlers = super()._get_engine_state_handlers(is_async)
254
- # Add custom handler
255
- handlers.append(MyCustomStateHandler())
256
- return handlers
257
- """
258
- if is_async:
259
- return [
260
- AsyncPendingStateHandler(),
261
- AsyncSuspendedStateHandler(),
262
- AsyncReadyStateHandler(),
263
- AsyncGoneStateHandler(),
264
- AsyncMissingEngineHandler(),
265
- ]
266
- else:
267
- return [
268
- SyncPendingStateHandler(),
269
- SyncSuspendedStateHandler(),
270
- SyncReadyStateHandler(),
271
- SyncGoneStateHandler(),
272
- SyncMissingEngineHandler(),
273
- ]
274
-
275
- @property
276
- def token_handler(self) -> TokenHandler:
277
- if not self._token_handler:
278
- self._token_handler = TokenHandler.from_config(self.config)
279
- return self._token_handler
280
-
281
- def reset(self):
282
- """Reset the session."""
283
- self._session = None
284
-
285
- #--------------------------------------------------
286
- # Session Management
287
- #--------------------------------------------------
288
-
289
- def is_erp_running(self, app_name: str) -> bool:
290
- """Check if the ERP is running. The app.service_status() returns single row/column containing an array of JSON service status objects."""
291
- query = f"CALL {app_name}.app.service_status();"
292
- try:
293
- result = self._exec(query)
294
- # The result is a list of dictionaries, each with a "STATUS" key
295
- # The column name containing the result is "SERVICE_STATUS"
296
- services_status = json.loads(result[0]["SERVICE_STATUS"])
297
- # Find the dictionary with "name" of "main" and check if its "status" is "READY"
298
- for service in services_status:
299
- if service.get("name") == "main" and service.get("status") == "READY":
300
- return True
301
- return False
302
- except Exception:
303
- return False
304
-
305
- def get_sf_session(self, reset_session: bool = False):
306
- if self._session:
307
- return self._session
308
-
309
- if isinstance(runtime_env, HexEnvironment):
310
- raise HexSessionException()
311
- if isinstance(runtime_env, SnowbookEnvironment):
312
- return get_active_session()
313
- else:
314
- # if there's already been a session created, try using that
315
- # if reset_session is true always try to get the new session
316
- if not reset_session:
317
- try:
318
- return get_active_session()
319
- except Exception:
320
- pass
321
-
322
- # otherwise, create a new session
323
- missing_keys = []
324
- connection_parameters = {}
325
-
326
- authenticator = self.config.get('authenticator', None)
327
- passcode = self.config.get("passcode", "")
328
- private_key_file = self.config.get("private_key_file", "")
329
-
330
- # If the authenticator is not set, we need to set it based on the provided parameters
331
- if authenticator is None:
332
- if private_key_file != "":
333
- authenticator = "snowflake_jwt"
334
- elif passcode != "":
335
- authenticator = "username_password_mfa"
336
- else:
337
- authenticator = "snowflake"
338
- # set the default authenticator in the config so we can skip it when we check for missing keys
339
- self.config.set("authenticator", authenticator)
340
-
341
- if authenticator in SNOWFLAKE_AUTHS:
342
- required_keys = {
343
- key for key, value in SNOWFLAKE_AUTHS[authenticator].items() if value.get("required", True)
344
- }
345
- for key in required_keys:
346
- if self.config.get(key, None) is None:
347
- default = SNOWFLAKE_AUTHS[authenticator][key].get("value", None)
348
- if default is None or default == FIELD_PLACEHOLDER:
349
- # No default value and no value in the config, add to missing keys
350
- missing_keys.append(key)
351
- else:
352
- # Set the default value in the config from the auth defaults
353
- self.config.set(key, default)
354
- if missing_keys:
355
- profile = getattr(self.config, 'profile', None)
356
- config_file_path = getattr(self.config, 'file_path', None)
357
- raise SnowflakeMissingConfigValuesException(missing_keys, profile, config_file_path)
358
- for key in SNOWFLAKE_AUTHS[authenticator]:
359
- connection_parameters[key] = self.config.get(key, None)
360
- else:
361
- raise ValueError(f'Authenticator "{authenticator}" not supported')
362
-
363
- return self._build_snowflake_session(connection_parameters)
364
-
365
- def _build_snowflake_session(self, connection_parameters: Dict[str, Any]) -> Session:
366
- try:
367
- tmp = {
368
- "client_session_keep_alive": True,
369
- "client_session_keep_alive_heartbeat_frequency": 60 * 5,
370
- }
371
- tmp.update(connection_parameters)
372
- connection_parameters = tmp
373
- # authenticator programmatic access token needs to be upper cased to work...
374
- connection_parameters["authenticator"] = connection_parameters["authenticator"].upper()
375
- if "authenticator" in connection_parameters and connection_parameters["authenticator"] == "OAUTH_AUTHORIZATION_CODE":
376
- # we are replicating OAUTH_AUTHORIZATION_CODE by first retrieving the token
377
- # and then authenticating with the token via the OAUTH authenticator
378
- connection_parameters["token"] = self.token_handler.get_session_login_token()
379
- connection_parameters["authenticator"] = "OAUTH"
380
- return Session.builder.configs(connection_parameters).create()
381
- except snowflake.connector.errors.Error as e:
382
- raise SnowflakeDatabaseException(e)
383
- except Exception as e:
384
- raise e
385
-
386
- #--------------------------------------------------
387
- # Core Execution Methods
388
- #--------------------------------------------------
389
-
390
- def _exec_sql(self, code: str, params: List[Any] | None, raw=False):
391
- """
392
- Lowest-level SQL execution method.
393
-
394
- Directly executes SQL via the Snowflake session. This is the foundation
395
- for all other execution methods. It:
396
- - Replaces APP_NAME placeholder with actual app name
397
- - Executes SQL with optional parameters
398
- - Returns either raw session results or collected results
399
-
400
- Args:
401
- code: SQL code to execute (may contain APP_NAME placeholder)
402
- params: Optional SQL parameters
403
- raw: If True, return raw session results; if False, collect results
404
-
405
- Returns:
406
- Raw session results if raw=True, otherwise collected results
407
- """
408
- assert self._session is not None
409
- sess_results = self._session.sql(
410
- code.replace(APP_NAME, self.get_app_name()),
411
- params
412
- )
413
- if raw:
414
- return sess_results
415
- return sess_results.collect()
416
-
417
- def _exec(
418
- self,
419
- code: str,
420
- params: List[Any] | Any | None = None,
421
- raw: bool = False,
422
- help: bool = True,
423
- skip_engine_db_error_retry: bool = False
424
- ) -> Any:
425
- """
426
- Mid-level SQL execution method with error handling.
427
-
428
- This is the primary method for executing SQL queries. It wraps _exec_sql
429
- with comprehensive error handling and parameter normalization. Used
430
- extensively throughout the codebase for direct SQL operations like:
431
- - SHOW commands (warehouses, databases, etc.)
432
- - CALL statements to RAI app stored procedures
433
- - Transaction management queries
434
-
435
- The error handling flow:
436
- 1. Normalizes parameters and creates execution context
437
- 2. Calls _exec_sql to execute the query
438
- 3. On error, uses standard error handling (Strategy Pattern), which subclasses
439
- can influence via `_get_error_handlers()` or by overriding `_handle_standard_exec_errors()`
440
-
441
- Args:
442
- code: SQL code to execute
443
- params: Optional SQL parameters (normalized to list if needed)
444
- raw: If True, return raw session results; if False, collect results
445
- help: If True, enable error handling; if False, raise errors immediately
446
- skip_engine_db_error_retry: If True, skip use_index retry logic in error handlers
447
-
448
- Returns:
449
- Query results (collected or raw depending on 'raw' parameter)
450
- """
451
- # print(f"\n--- sql---\n{code}\n--- end sql---\n")
452
- # Ensure session is initialized
453
- if not self._session:
454
- self._session = self.get_sf_session()
455
-
456
- # Normalize parameters
457
- normalized_params = normalize_params(params)
458
-
459
- # Create execution context
460
- ctx = ExecContext(
461
- code=code,
462
- params=normalized_params,
463
- raw=raw,
464
- help=help,
465
- skip_engine_db_error_retry=skip_engine_db_error_retry
466
- )
467
-
468
- # Execute SQL
469
- try:
470
- return self._exec_sql(ctx.code, ctx.params, raw=ctx.raw)
471
- except Exception as e:
472
- if not ctx.help:
473
- raise e
474
-
475
- # Handle standard errors
476
- result = self._handle_standard_exec_errors(e, ctx)
477
- if result is not None:
478
- return result
479
-
480
- #--------------------------------------------------
481
- # Error Handling
482
- #--------------------------------------------------
483
-
484
- def _handle_standard_exec_errors(self, e: Exception, ctx: ExecContext) -> Any | None:
485
- """
486
- Handle standard Snowflake/RAI errors using Strategy Pattern.
487
-
488
- Each error type has a dedicated handler class that encapsulates
489
- the detection logic and exception creation. Handlers are processed
490
- in order until one matches and handles the error.
491
- """
492
- message = str(e).lower()
493
-
494
- # Try each handler in order until one matches
495
- for handler in self._error_handlers:
496
- if handler.matches(e, message, ctx, self):
497
- result = handler.handle(e, ctx, self)
498
- if result is not None:
499
- return result
500
- return # Handler raised exception, we're done
501
-
502
- # Fallback: transform to RAIException
503
- raise RAIException(str(e))
504
-
505
- #--------------------------------------------------
506
- # Feature Detection & Configuration
507
- #--------------------------------------------------
508
-
509
- def is_direct_access_enabled(self) -> bool:
510
- try:
511
- feature_enabled = self._exec(
512
- f"call {APP_NAME}.APP.DIRECT_INGRESS_ENABLED();"
513
- )
514
- if not feature_enabled:
515
- return False
516
-
517
- # Even if the feature is enabled, customers still need to reactivate ERP to ensure the endpoint is available.
518
- endpoint = self._exec(
519
- f"call {APP_NAME}.APP.SERVICE_ENDPOINT(true);"
520
- )
521
- if not endpoint or endpoint[0][0] is None:
522
- return False
523
-
524
- return feature_enabled[0][0]
525
- except Exception as e:
526
- raise Exception(f"Unable to determine if direct access is enabled. Details error: {e}") from e
527
-
528
-
529
- def is_account_flag_set(self, flag: str) -> bool:
530
- results = self._exec(
531
- f"SHOW PARAMETERS LIKE '%{flag}%' IN ACCOUNT;"
532
- )
533
- if not results:
534
- return False
535
- return results[0]["value"] == "true"
536
-
537
- #--------------------------------------------------
538
- # Databases
539
- #--------------------------------------------------
540
-
541
- def get_database(self, database: str):
542
- try:
543
- results = self._exec(
544
- f"call {APP_NAME}.api.get_database('{database}');"
545
- )
546
- except Exception as e:
547
- messages = collect_error_messages(e)
548
- if any("database does not exist" in msg for msg in messages):
549
- return None
550
- raise e
551
-
552
- if not results:
553
- return None
554
- db = results[0]
555
- if not db:
556
- return None
557
- return {
558
- "id": db["ID"],
559
- "name": db["NAME"],
560
- "created_by": db["CREATED_BY"],
561
- "created_on": db["CREATED_ON"],
562
- "deleted_by": db["DELETED_BY"],
563
- "deleted_on": db["DELETED_ON"],
564
- "state": db["STATE"],
565
- }
566
-
567
- def get_installed_packages(self, database: str) -> Dict | None:
568
- query = f"call {APP_NAME}.api.get_installed_package_versions('{database}');"
569
- try:
570
- results = self._exec(query)
571
- except Exception as e:
572
- messages = collect_error_messages(e)
573
- if any("database does not exist" in msg for msg in messages):
574
- return None
575
- # fallback to None for old sql-lib versions
576
- if any("unknown user-defined function" in msg for msg in messages):
577
- return None
578
- raise e
579
-
580
- if not results:
581
- return None
582
-
583
- row = results[0]
584
- if not row:
585
- return None
586
-
587
- return safe_json_loads(row["PACKAGE_VERSIONS"])
588
-
589
- #--------------------------------------------------
590
- # Engines
591
- #--------------------------------------------------
592
-
593
- def _prepare_engine_params(
594
- self,
595
- name: str | None,
596
- size: str | None,
597
- use_default_size: bool = False
598
- ) -> tuple[str, str | None]:
599
- """
600
- Prepare engine parameters by resolving and validating name and size.
601
-
602
- Args:
603
- name: Engine name (None to use default)
604
- size: Engine size (None to use config or default)
605
- use_default_size: If True and size is None, use get_default_engine_size()
606
-
607
- Returns:
608
- Tuple of (engine_name, engine_size)
609
-
610
- Raises:
611
- EngineNameValidationException: If engine name is invalid
612
- Exception: If engine size is invalid
613
- """
614
- from relationalai.tools.cli_helpers import validate_engine_name
615
-
616
- engine_name = name or self.get_default_engine_name()
617
-
618
- # Resolve engine size
619
- if size:
620
- engine_size = size
621
- else:
622
- if use_default_size:
623
- engine_size = self.config.get_default_engine_size()
624
- else:
625
- engine_size = self.config.get("engine_size", None)
626
-
627
- # Validate engine size
628
- if engine_size:
629
- is_size_valid, sizes = self._engines.validate_engine_size(engine_size)
630
- if not is_size_valid:
631
- error_msg = f"Invalid engine size '{engine_size}'. Valid sizes are: {', '.join(sizes)}"
632
- if use_default_size:
633
- error_msg = f"Invalid engine size in config: '{engine_size}'. Valid sizes are: {', '.join(sizes)}"
634
- raise Exception(error_msg)
635
-
636
- # Validate engine name
637
- is_name_valid, _ = validate_engine_name(engine_name)
638
- if not is_name_valid:
639
- raise EngineNameValidationException(engine_name)
640
-
641
- return engine_name, engine_size
642
-
643
- def _get_state_handler(self, state: str | None, handlers: list[EngineStateHandler]) -> EngineStateHandler:
644
- """Find the appropriate state handler for the given state."""
645
- for handler in handlers:
646
- if handler.handles_state(state):
647
- return handler
648
- # Fallback to missing engine handler if no match
649
- return handlers[-1] # Last handler should be MissingEngineHandler
650
-
651
- def _process_engine_state(
652
- self,
653
- engine: EngineState | Dict[str, Any] | None,
654
- context: EngineContext,
655
- handlers: list[EngineStateHandler],
656
- set_active_on_success: bool = False
657
- ) -> EngineState | Dict[str, Any] | None:
658
- """
659
- Process engine state using appropriate state handler.
660
-
661
- Args:
662
- engine: Current engine state (or None if missing)
663
- context: Engine context for state handling
664
- handlers: List of state handlers to use (sync or async)
665
- set_active_on_success: If True, set engine as active when handler returns engine
666
-
667
- Returns:
668
- Engine state after processing, or None if engine needs to be created
669
- """
670
- # Find and execute appropriate state handler
671
- state = engine["state"] if engine else None
672
- handler = self._get_state_handler(state, handlers)
673
- engine = handler.handle(engine, context, self)
674
-
675
- # If handler returned None and we didn't start with None state, engine needs to be created
676
- # (e.g., GONE state deleted the engine, so we need to create a new one)
677
- if not engine and state is not None:
678
- handler = self._get_state_handler(None, handlers)
679
- handler.handle(None, context, self)
680
- elif set_active_on_success:
681
- # Cast to EngineState for type safety (handlers return EngineDict which is compatible)
682
- self._set_active_engine(cast(EngineState, engine))
683
-
684
- return engine
685
-
686
- def _handle_engine_creation_errors(self, error: Exception, engine_name: str, preserve_rai_exception: bool = False) -> None:
687
- """
688
- Handle errors during engine creation using error handlers.
689
-
690
- Args:
691
- error: The exception that occurred
692
- engine_name: Name of the engine being created
693
- preserve_rai_exception: If True, re-raise RAIException without wrapping
694
-
695
- Raises:
696
- RAIException: If preserve_rai_exception is True and error is RAIException
697
- EngineProvisioningFailed: If error is not handled by error handlers
698
- """
699
- # Preserve RAIException passthrough if requested (for async mode)
700
- if preserve_rai_exception and isinstance(error, RAIException):
701
- raise error
702
-
703
- # Check if this is a known error type that should be handled by error handlers
704
- message = str(error).lower()
705
- handled = False
706
- # Engine creation isn't tied to a specific SQL ExecContext; pass a context that
707
- # disables use_index retry behavior (and any future ctx-dependent handlers).
708
- ctx = ExecContext(code="", help=True, skip_engine_db_error_retry=True)
709
- for handler in self._error_handlers:
710
- if handler.matches(error, message, ctx, self):
711
- handler.handle(error, ctx, self)
712
- handled = True
713
- break # Handler raised exception, we're done
714
-
715
- # If not handled by error handlers, wrap in EngineProvisioningFailed
716
- if not handled:
717
- raise EngineProvisioningFailed(engine_name, error) from error
718
-
719
- def get_engine_sizes(self, cloud_provider: str|None=None):
720
- return self._engines.get_engine_sizes(cloud_provider=cloud_provider)
721
-
722
- def list_engines(
723
- self,
724
- state: str | None = None,
725
- name: str | None = None,
726
- type: str | None = None,
727
- size: str | None = None,
728
- created_by: str | None = None,
729
- ):
730
- return self._engines.list_engines(
731
- state=state,
732
- name=name,
733
- type=type,
734
- size=size,
735
- created_by=created_by,
736
- )
737
-
738
- def get_engine(self, name: str, type: str):
739
- return self._engines.get_engine(name, type)
740
-
741
- def get_default_engine_name(self) -> str:
742
- if self.config.get("engine_name", None) is not None:
743
- profile = self.config.profile
744
- raise InvalidAliasError(f"""
745
- 'engine_name' is not a valid config option.
746
- If you meant to use a specific engine, use 'engine' instead.
747
- Otherwise, remove it from your '{profile}' configuration profile.
748
- """)
749
- engine = self.config.get("engine", None)
750
- if not engine and self.config.get("user", None):
751
- engine = _sanitize_user_name(str(self.config.get("user")))
752
- if not engine:
753
- engine = self.get_user_based_engine_name()
754
- self.config.set("engine", engine)
755
- return engine
756
-
757
- def is_valid_engine_state(self, name:str):
758
- return name in VALID_ENGINE_STATES
759
-
760
- # Can be overridden by subclasses (e.g. DirectAccessResources)
761
- def _create_engine(
762
- self,
763
- name: str,
764
- type: str = EngineType.LOGIC,
765
- size: str | None = None,
766
- auto_suspend_mins: int | None= None,
767
- is_async: bool = False,
768
- headers: Dict | None = None,
769
- settings: Dict[str, Any] | None = None,
770
- ):
771
- return self._engines._create_engine(
772
- name=name,
773
- type=type,
774
- size=size,
775
- auto_suspend_mins=auto_suspend_mins,
776
- is_async=is_async,
777
- headers=headers,
778
- settings=settings,
779
- )
780
-
781
- def create_engine(
782
- self,
783
- name: str,
784
- type: str | None = None,
785
- size: str | None = None,
786
- auto_suspend_mins: int | None = None,
787
- headers: Dict | None = None,
788
- settings: Dict[str, Any] | None = None,
789
- ):
790
- if type is None:
791
- type = EngineType.LOGIC
792
- # Route through _create_engine so subclasses (e.g. DirectAccessResources)
793
- # can override engine creation behavior.
794
- return self._create_engine(
795
- name=name,
796
- type=type,
797
- size=size,
798
- auto_suspend_mins=auto_suspend_mins,
799
- is_async=False,
800
- headers=headers,
801
- settings=settings,
802
- )
803
-
804
- def create_engine_async(
805
- self,
806
- name: str,
807
- type: str = EngineType.LOGIC,
808
- size: str | None = None,
809
- auto_suspend_mins: int | None = None,
810
- ):
811
- # Route through _create_engine so subclasses (e.g. DirectAccessResources)
812
- # can override async engine creation behavior.
813
- return self._create_engine(
814
- name=name,
815
- type=type,
816
- size=size,
817
- auto_suspend_mins=auto_suspend_mins,
818
- is_async=True,
819
- )
820
-
821
- def delete_engine(self, name: str, type: str):
822
- return self._engines.delete_engine(name, type)
823
-
824
- def suspend_engine(self, name: str, type: str | None = None):
825
- return self._engines.suspend_engine(name, type)
826
-
827
- def resume_engine(self, name: str, type: str | None = None, headers: Dict | None = None) -> Dict:
828
- return self._engines.resume_engine(name, type=type, headers=headers)
829
-
830
- def resume_engine_async(self, name: str, type: str | None = None, headers: Dict | None = None) -> Dict:
831
- return self._engines.resume_engine_async(name, type=type, headers=headers)
832
-
833
- def alter_engine_pool(self, size:str|None=None, mins:int|None=None, maxs:int|None=None):
834
- """Alter engine pool node limits for Snowflake."""
835
- return self._engines.alter_engine_pool(size=size, mins=mins, maxs=maxs)
836
-
837
- #--------------------------------------------------
838
- # Graphs
839
- #--------------------------------------------------
840
-
841
- def list_graphs(self) -> List[AvailableModel]:
842
- with debugging.span("list_models"):
843
- query = textwrap.dedent(f"""
844
- SELECT NAME, ID, CREATED_BY, CREATED_ON, STATE, DELETED_BY, DELETED_ON
845
- FROM {APP_NAME}.api.databases
846
- WHERE state <> 'DELETED'
847
- ORDER BY NAME ASC;
848
- """)
849
- results = self._exec(query)
850
- if not results:
851
- return []
852
- return [
853
- {
854
- "name": row["NAME"],
855
- "id": row["ID"],
856
- "created_by": row["CREATED_BY"],
857
- "created_on": row["CREATED_ON"],
858
- "state": row["STATE"],
859
- "deleted_by": row["DELETED_BY"],
860
- "deleted_on": row["DELETED_ON"],
861
- }
862
- for row in results
863
- ]
864
-
865
- def get_graph(self, name: str):
866
- res = self.get_database(name)
867
- if res and res.get("state") != "DELETED":
868
- return res
869
-
870
- def create_graph(self, name: str):
871
- with debugging.span("create_model", name=name):
872
- self._exec(f"call {APP_NAME}.api.create_database('{name}', false, {debugging.gen_current_propagation_headers()});")
873
-
874
- def delete_graph(self, name:str, force=False, language:str="rel"):
875
- prop_hdrs = debugging.gen_current_propagation_headers()
876
- if self.config.get("use_graph_index", USE_GRAPH_INDEX):
877
- keep_database = not force and self.config.get("reuse_model", True)
878
- with debugging.span("release_index", name=name, keep_database=keep_database, language=language):
879
- #TODO add headers to release_index
880
- response = self._exec(f"call {APP_NAME}.api.release_index('{name}', OBJECT_CONSTRUCT('keep_database', {keep_database}, 'language', '{language}', 'user_agent', '{get_pyrel_version(self.generation)}'));")
881
- if response:
882
- result = next(iter(response))
883
- obj = json.loads(result["RELEASE_INDEX"])
884
- error = obj.get('error', None)
885
- if error and "Model database not found" not in error:
886
- raise Exception(f"Error releasing index: {error}")
887
- else:
888
- raise Exception("There was no response from the release index call.")
889
- else:
890
- with debugging.span("delete_model", name=name):
891
- self._exec(f"call {APP_NAME}.api.delete_database('{name}', false, {prop_hdrs});")
892
-
893
- def clone_graph(self, target_name:str, source_name:str, nowait_durable=True, force=False):
894
- if force and self.get_graph(target_name):
895
- self.delete_graph(target_name)
896
- with debugging.span("clone_model", target_name=target_name, source_name=source_name):
897
- # not a mistake: the clone_database argument order is indeed target then source:
898
- headers = debugging.gen_current_propagation_headers()
899
- self._exec(f"call {APP_NAME}.api.clone_database('{target_name}', '{source_name}', {nowait_durable}, {headers});")
900
-
901
- def _poll_use_index(
902
- self,
903
- app_name: str,
904
- sources: Iterable[str],
905
- model: str,
906
- engine_name: str,
907
- engine_size: str | None = None,
908
- program_span_id: str | None = None,
909
- headers: Dict | None = None,
910
- ) -> None:
911
- """
912
- Poll use_index to prepare indices for the given sources.
913
-
914
- This is an optional interface method. Base Resources provides a no-op implementation.
915
- UseIndexResources and DirectAccessResources override this to provide actual polling.
916
-
917
- Returns:
918
- None for base implementation. Child classes may return poller results.
919
- """
920
- return None
921
-
922
- def maybe_poll_use_index(
923
- self,
924
- app_name: str,
925
- sources: Iterable[str],
926
- model: str,
927
- engine_name: str,
928
- engine_size: str | None = None,
929
- program_span_id: str | None = None,
930
- headers: Dict | None = None,
931
- ) -> None:
932
- """
933
- Only call _poll_use_index if there are sources to process.
934
-
935
- This is an optional interface method. Base Resources provides a no-op implementation.
936
- UseIndexResources and DirectAccessResources override this to provide actual polling with caching.
937
-
938
- Returns:
939
- None for base implementation. Child classes may return poller results.
940
- """
941
- return None
942
-
943
- #--------------------------------------------------
944
- # Models
945
- #--------------------------------------------------
946
-
947
- def list_models(self, database: str, engine: str):
948
- pass
949
-
950
- def create_models(self, database: str, engine: str | None, models:List[Tuple[str, str]]) -> List[Any]:
951
- rel_code = self.create_models_code(models)
952
- self.exec_raw(database, engine, rel_code, readonly=False)
953
- # TODO: handle SPCS errors once they're figured out
954
- return []
955
-
956
- def delete_model(self, database:str, engine:str | None, name:str):
957
- self.exec_raw(database, engine, f"def delete[:rel, :catalog, :model, \"{name}\"]: rel[:catalog, :model, \"{name}\"]", readonly=False)
958
-
959
- def create_models_code(self, models:List[Tuple[str, str]]) -> str:
960
- lines = []
961
- for (name, code) in models:
962
- name = name.replace("\"", "\\\"")
963
- assert "\"\"\"\"\"\"\"" not in code, "Code literals must use fewer than 7 quotes."
964
-
965
- lines.append(textwrap.dedent(f"""
966
- def delete[:rel, :catalog, :model, "{name}"]: rel[:catalog, :model, "{name}"]
967
- def insert[:rel, :catalog, :model, "{name}"]: raw\"\"\"\"\"\"\"
968
- """) + code + "\n\"\"\"\"\"\"\"")
969
- rel_code = "\n\n".join(lines)
970
- return rel_code
971
-
972
- #--------------------------------------------------
973
- # Exports
974
- #--------------------------------------------------
975
-
976
- def list_exports(self, database: str, engine: str):
977
- return []
978
-
979
-
980
- def get_export_code(self, params: ExportParams, all_installs):
981
- sql_inputs = ", ".join([f"{name} {type_to_sql(type)}" for (name, _, type) in params.inputs])
982
- input_names = [name for (name, *_) in params.inputs]
983
- has_return_hint = params.out_fields and isinstance(params.out_fields[0], tuple)
984
- if has_return_hint:
985
- sql_out = ", ".join([f"\"{name}\" {type_to_sql(type)}" for (name, type) in params.out_fields])
986
- sql_out_names = ", ".join([f"('{name}', '{type_to_sql(type)}')" for (ix, (name, type)) in enumerate(params.out_fields)])
987
- py_outs = ", ".join([f"StructField(\"{name}\", {type_to_snowpark(type)})" for (name, type) in params.out_fields])
988
- else:
989
- sql_out = ""
990
- sql_out_names = ", ".join([f"'{name}'" for name in params.out_fields])
991
- py_outs = ", ".join([f"StructField(\"{name}\", {type_to_snowpark(str)})" for name in params.out_fields])
992
- py_inputs = ", ".join([name for (name, *_) in params.inputs])
993
- safe_rel = escape_for_f_string(params.code).strip()
994
- clean_inputs = []
995
- for (name, var, type) in params.inputs:
996
- if type is str:
997
- clean_inputs.append(f"{name} = '\"' + escape({name}) + '\"'")
998
- # Replace `var` with `name` and keep the following non-word character unchanged
999
- pattern = re.compile(re.escape(var) + r'(\W)')
1000
- value = format_sproc_name(name, type)
1001
- safe_rel = re.sub(pattern, rf"{{{value}}}\1", safe_rel)
1002
- if py_inputs:
1003
- py_inputs = f", {py_inputs}"
1004
- clean_inputs = ("\n").join(clean_inputs)
1005
- file = "export_procedure.py.jinja"
1006
- with importlib.resources.open_text(
1007
- "relationalai.clients.resources.snowflake", file
1008
- ) as f:
1009
- template = f.read()
1010
- def quote(s: str, f = False) -> str:
1011
- return '"' + s + '"' if not f else 'f"' + s + '"'
1012
-
1013
- wait_for_stream_sync = self.config.get("wait_for_stream_sync", WAIT_FOR_STREAM_SYNC)
1014
- # 1. Check the sources for staled sources
1015
- # 2. Get the object references for the sources
1016
- # TODO: this could be optimized to do it in the run time of the stored procedure
1017
- # instead of doing it here. It will make it more reliable when sources are
1018
- # modified after the stored procedure is created.
1019
- checked_sources = self._check_source_updates(self.sources)
1020
- source_obj_references = self._get_source_references(checked_sources)
1021
-
1022
- # Escape double quotes in the source object references
1023
- escaped_source_obj_references = [source.replace('"', '\\"') for source in source_obj_references]
1024
- escaped_proc_database = params.proc_database.replace('"', '\\"')
1025
-
1026
- normalized_func_name = IdentityParser(params.func_name).identity
1027
- assert normalized_func_name is not None, "Function name must be set"
1028
- skip_invalid_data = params.skip_invalid_data
1029
- python_code = process_jinja_template(
1030
- template,
1031
- func_name=quote(normalized_func_name),
1032
- database=quote(params.root_database),
1033
- proc_database=quote(escaped_proc_database),
1034
- engine=quote(params.engine),
1035
- rel_code=quote(safe_rel, f=True),
1036
- APP_NAME=quote(APP_NAME),
1037
- input_names=input_names,
1038
- outputs=sql_out,
1039
- sql_out_names=sql_out_names,
1040
- clean_inputs=clean_inputs,
1041
- py_inputs=py_inputs,
1042
- py_outs=py_outs,
1043
- skip_invalid_data=skip_invalid_data,
1044
- source_references=", ".join(escaped_source_obj_references),
1045
- install_code=all_installs.replace("\\", "\\\\").replace("\n", "\\n"),
1046
- has_return_hint=has_return_hint,
1047
- wait_for_stream_sync=wait_for_stream_sync,
1048
- ).strip()
1049
- return_clause = f"TABLE({sql_out})" if sql_out else "STRING"
1050
- destination_input = "" if sql_out else "save_as_table STRING DEFAULT NULL,"
1051
- module_name = sanitize_module_name(normalized_func_name)
1052
- stage = f"@{self.get_app_name()}.app_state.stored_proc_code_stage"
1053
- file_loc = f"{stage}/{module_name}.py"
1054
- python_code = python_code.replace(APP_NAME, self.get_app_name())
1055
-
1056
- hash = hashlib.sha256()
1057
- hash.update(python_code.encode('utf-8'))
1058
- code_hash = hash.hexdigest()
1059
- print(code_hash)
1060
-
1061
- sql_code = textwrap.dedent(f"""
1062
- CREATE OR REPLACE PROCEDURE {normalized_func_name}({sql_inputs}{sql_inputs and ',' or ''} {destination_input} engine STRING DEFAULT NULL)
1063
- RETURNS {return_clause}
1064
- LANGUAGE PYTHON
1065
- RUNTIME_VERSION = '3.10'
1066
- IMPORTS = ('{file_loc}')
1067
- PACKAGES = ('snowflake-snowpark-python')
1068
- HANDLER = 'checked_handle'
1069
- EXECUTE AS CALLER
1070
- AS
1071
- $$
1072
- import {module_name}
1073
- import inspect, hashlib, os, sys
1074
- def checked_handle(*args, **kwargs):
1075
- import_dir = sys._xoptions["snowflake_import_directory"]
1076
- wheel_path = os.path.join(import_dir, '{module_name}.py')
1077
- h = hashlib.sha256()
1078
- with open(wheel_path, 'rb') as f:
1079
- for chunk in iter(lambda: f.read(1<<20), b''):
1080
- h.update(chunk)
1081
- code_hash = h.hexdigest()
1082
- if code_hash != '{code_hash}':
1083
- raise RuntimeError("Code hash mismatch. The code has been modified since it was uploaded.")
1084
- # Call the handle function with the provided arguments
1085
- return {module_name}.handle(*args, **kwargs)
1086
-
1087
- $$;
1088
- """)
1089
- # print(f"\n--- python---\n{python_code}\n--- end python---\n")
1090
- # This check helps catch invalid code early and for dry runs:
1091
- try:
1092
- ast.parse(python_code)
1093
- except SyntaxError:
1094
- raise ValueError(f"Internal error: invalid Python code generated:\n{python_code}")
1095
- return (sql_code, python_code, file_loc)
1096
-
1097
- def get_sproc_models(self, params: ExportParams):
1098
- if self._sproc_models is not None:
1099
- return self._sproc_models
1100
-
1101
- with debugging.span("get_sproc_models"):
1102
- code = """
1103
- def output(name, model):
1104
- rel(:catalog, :model, name, model)
1105
- and not starts_with(name, "rel/")
1106
- and not starts_with(name, "pkg/rel")
1107
- and not starts_with(name, "pkg/std")
1108
- and starts_with(name, "pkg/")
1109
- """
1110
- res = self.exec_raw(params.model_database, params.engine, code, readonly=True, nowait_durable=True)
1111
- df, errors = result_helpers.format_results(res, None, ["name", "model"])
1112
- models = []
1113
- for row in df.itertuples():
1114
- models.append((row.name, row.model))
1115
- self._sproc_models = models
1116
- return models
1117
-
1118
- def create_export(self, params: ExportParams):
1119
- with debugging.span("create_export") as span:
1120
- if params.dry_run:
1121
- (sql_code, python_code, file_loc) = self.get_export_code(params, params.install_code)
1122
- span["sql"] = sql_code
1123
- return
1124
-
1125
- start = time.perf_counter()
1126
- use_graph_index = self.config.get("use_graph_index", USE_GRAPH_INDEX)
1127
- # for the non graph index case we need to create the cloned proc database
1128
- if not use_graph_index:
1129
- raise RAIException(
1130
- "To ensure permissions are properly accounted for, stored procedures require using the graph index. "
1131
- "Set use_graph_index=True in your config to proceed."
1132
- )
1133
-
1134
- models = self.get_sproc_models(params)
1135
- lib_installs = self.create_models_code(models)
1136
- all_installs = lib_installs + "\n\n" + params.install_code
1137
-
1138
- (sql_code, python_code, file_loc) = self.get_export_code(params, all_installs)
1139
-
1140
- span["sql"] = sql_code
1141
- assert self._session
1142
-
1143
- with debugging.span("upload_sproc_code"):
1144
- code_bytes = python_code.encode('utf-8')
1145
- code_stream = io.BytesIO(code_bytes)
1146
- self._session.file.put_stream(code_stream, file_loc, auto_compress=False, overwrite=True)
1147
-
1148
- with debugging.span("sql_install"):
1149
- self._exec(sql_code)
1150
-
1151
- debugging.time("export", time.perf_counter() - start, DataFrame(), code=sql_code.replace(APP_NAME, self.get_app_name()))
1152
-
1153
-
1154
- def create_export_table(self, database: str, engine: str, table: str, relation: str, columns: Dict[str, str], code: str, refresh: str|None=None):
1155
- print("Snowflake doesn't support creating export tables yet. Try creating the table manually first.")
1156
- pass
1157
-
1158
- def delete_export(self, database: str, engine: str, name: str):
1159
- pass
1160
-
1161
- #--------------------------------------------------
1162
- # Imports
1163
- #--------------------------------------------------
1164
-
1165
-
1166
- def change_stream_status(self, stream_id: str, model:str, suspend: bool):
1167
- if stream_id and model:
1168
- if suspend:
1169
- self._exec(f"CALL {APP_NAME}.api.suspend_data_stream('{stream_id}', '{model}');")
1170
- else:
1171
- self._exec(f"CALL {APP_NAME}.api.resume_data_stream('{stream_id}', '{model}');")
1172
-
1173
- def change_imports_status(self, suspend: bool):
1174
- if suspend:
1175
- self._exec(f"CALL {APP_NAME}.app.suspend_cdc();")
1176
- else:
1177
- self._exec(f"CALL {APP_NAME}.app.resume_cdc();")
1178
-
1179
- def get_imports_status(self) -> ImportsStatus|None:
1180
- # NOTE: We expect there to only ever be one result?
1181
- results = self._exec(f"CALL {APP_NAME}.app.cdc_status();")
1182
- if results:
1183
- result = next(iter(results))
1184
- engine = result['CDC_ENGINE_NAME']
1185
- engine_status = result['CDC_ENGINE_STATUS']
1186
- engine_size = result['CDC_ENGINE_SIZE']
1187
- task_status = result['CDC_TASK_STATUS']
1188
- info = result['CDC_TASK_INFO']
1189
- enabled = result['CDC_ENABLED']
1190
- return {"engine": engine, "engine_size": engine_size, "engine_status": engine_status, "status": task_status, "enabled": enabled, "info": info }
1191
- return None
1192
-
1193
- def set_imports_engine_size(self, size:str):
1194
- try:
1195
- self._exec(f"CALL {APP_NAME}.app.alter_cdc_engine_size('{size}');")
1196
- except Exception as e:
1197
- raise e
1198
-
1199
- def list_imports(
1200
- self,
1201
- id:str|None = None,
1202
- name:str|None = None,
1203
- model:str|None = None,
1204
- status:str|None = None,
1205
- creator:str|None = None,
1206
- ) -> list[Import]:
1207
- where = []
1208
- if id and isinstance(id, str):
1209
- where.append(f"LOWER(ID) = '{id.lower()}'")
1210
- if name and isinstance(name, str):
1211
- where.append(f"LOWER(FQ_OBJECT_NAME) = '{name.lower()}'")
1212
- if model and isinstance(model, str):
1213
- where.append(f"LOWER(RAI_DATABASE) = '{model.lower()}'")
1214
- if creator and isinstance(creator, str):
1215
- where.append(f"LOWER(CREATED_BY) = '{creator.lower()}'")
1216
- if status and isinstance(status, str):
1217
- where.append(f"LOWER(batch_status) = '{status.lower()}'")
1218
- where_clause = " AND ".join(where)
1219
-
1220
- # This is roughly inspired by the native app code because we don't have a way to
1221
- # get the status of multiple streams at once and doing them individually is way
1222
- # too slow. We use window functions to get the status of the stream and the batch
1223
- # details.
1224
- statement = f"""
1225
- SELECT
1226
- ID,
1227
- RAI_DATABASE,
1228
- FQ_OBJECT_NAME,
1229
- CREATED_AT,
1230
- CREATED_BY,
1231
- CASE
1232
- WHEN nextBatch.quarantined > 0 THEN 'quarantined'
1233
- ELSE nextBatch.status
1234
- END as batch_status,
1235
- nextBatch.processing_errors,
1236
- nextBatch.batches
1237
- FROM {APP_NAME}.api.data_streams as ds
1238
- LEFT JOIN (
1239
- SELECT DISTINCT
1240
- data_stream_id,
1241
- -- Get status from the progress record using window functions
1242
- FIRST_VALUE(status) OVER (
1243
- PARTITION BY data_stream_id
1244
- ORDER BY
1245
- CASE WHEN unloaded IS NOT NULL THEN 1 ELSE 0 END DESC,
1246
- unloaded ASC
1247
- ) as status,
1248
- -- Get batch_details from the same record
1249
- FIRST_VALUE(batch_details) OVER (
1250
- PARTITION BY data_stream_id
1251
- ORDER BY
1252
- CASE WHEN unloaded IS NOT NULL THEN 1 ELSE 0 END DESC,
1253
- unloaded ASC
1254
- ) as batch_details,
1255
- -- Aggregate the other fields
1256
- FIRST_VALUE(processing_details:processingErrors) OVER (
1257
- PARTITION BY data_stream_id
1258
- ORDER BY
1259
- CASE WHEN unloaded IS NOT NULL THEN 1 ELSE 0 END DESC,
1260
- unloaded ASC
1261
- ) as processing_errors,
1262
- MIN(unloaded) OVER (PARTITION BY data_stream_id) as unloaded,
1263
- COUNT(*) OVER (PARTITION BY data_stream_id) as batches,
1264
- COUNT_IF(status = 'quarantined') OVER (PARTITION BY data_stream_id) as quarantined
1265
- FROM {APP_NAME}.api.data_stream_batches
1266
- ) nextBatch
1267
- ON ds.id = nextBatch.data_stream_id
1268
- {f"where {where_clause}" if where_clause else ""}
1269
- ORDER BY FQ_OBJECT_NAME ASC;
1270
- """
1271
- results = self._exec(statement)
1272
- items = []
1273
- if results:
1274
- for stream in results:
1275
- (id, db, name, created_at, created_by, status, processing_errors, batches) = stream
1276
- if status and isinstance(status, str):
1277
- status = status.upper()
1278
- if processing_errors:
1279
- if status in ["QUARANTINED", "PENDING"]:
1280
- start = processing_errors.rfind("Error")
1281
- if start != -1:
1282
- processing_errors = processing_errors[start:-1]
1283
- else:
1284
- processing_errors = None
1285
- items.append(cast(Import, {
1286
- "id": id,
1287
- "model": db,
1288
- "name": name,
1289
- "created": created_at,
1290
- "creator": created_by,
1291
- "status": status.upper() if status else None,
1292
- "errors": processing_errors if processing_errors != "[]" else None,
1293
- "batches": f"{batches}" if batches else "",
1294
- }))
1295
- return items
1296
-
1297
- def poll_imports(self, sources:List[str], model:str):
1298
- source_set = self._create_source_set(sources)
1299
- def check_imports():
1300
- imports = [
1301
- import_
1302
- for import_ in self.list_imports(model=model)
1303
- if import_["name"] in source_set
1304
- ]
1305
- # loop through printing status for each in the format (index): (name) - (status)
1306
- statuses = [import_["status"] for import_ in imports]
1307
- if all(status == "LOADED" for status in statuses):
1308
- return True
1309
- if any(status == "QUARANTINED" for status in statuses):
1310
- failed_imports = [import_["name"] for import_ in imports if import_["status"] == "QUARANTINED"]
1311
- raise RAIException("Imports failed:" + ", ".join(failed_imports)) from None
1312
- # this check is necessary in case some of the tables are empty;
1313
- # such tables may be synced even though their status is None:
1314
- def synced(import_):
1315
- if import_["status"] == "LOADED":
1316
- return True
1317
- if import_["status"] is None:
1318
- import_full_status = self.get_import_stream(import_["name"], model)
1319
- if import_full_status and import_full_status[0]["data_sync_status"] == "SYNCED":
1320
- return True
1321
- return False
1322
- if all(synced(import_) for import_ in imports):
1323
- return True
1324
- poll_with_specified_overhead(check_imports, overhead_rate=0.1, max_delay=10)
1325
-
1326
- def _create_source_set(self, sources: List[str]) -> set:
1327
- return {
1328
- source.upper() if not IdentityParser(source).has_double_quoted_identifier else IdentityParser(source).identity
1329
- for source in sources
1330
- }
1331
-
1332
- def get_import_stream(self, name:str|None, model:str|None):
1333
- results = self._exec(f"CALL {APP_NAME}.api.get_data_stream('{name}', '{model}');")
1334
- if not results:
1335
- return None
1336
- return imports_to_dicts(results)
1337
-
1338
- def create_import_stream(self, source:ImportSource, model:str, rate = 1, options: dict|None = None):
1339
- assert isinstance(source, ImportSourceTable), "Snowflake integration only supports loading from SF Tables. Try loading your data as a table via the Snowflake interface first."
1340
- object = source.fqn
1341
-
1342
- # Parse only to the schema level
1343
- schemaParser = IdentityParser(f"{source.database}.{source.schema}")
1344
-
1345
- if object.lower() in [x["name"].lower() for x in self.list_imports(model=model)]:
1346
- return
1347
-
1348
- query = f"SHOW OBJECTS LIKE '{source.table}' IN {schemaParser.identity}"
1349
-
1350
- info = self._exec(query)
1351
- if not info:
1352
- raise ValueError(f"Object {source.table} not found in schema {schemaParser.identity}")
1353
- else:
1354
- data = info[0]
1355
- if not data:
1356
- raise ValueError(f"Object {source.table} not found in {schemaParser.identity}")
1357
- # (time, name, db_name, schema_name, kind, *rest)
1358
- kind = data["kind"]
1359
-
1360
- relation_name = to_fqn_relation_name(object)
1361
-
1362
- command = f"""call {APP_NAME}.api.create_data_stream(
1363
- {APP_NAME}.api.object_reference('{kind}', '{object}'),
1364
- '{model}',
1365
- '{relation_name}');"""
1366
-
1367
- def create_stream(tracking_just_changed=False):
1368
- try:
1369
- self._exec(command)
1370
- except Exception as e:
1371
- messages = collect_error_messages(e)
1372
- if any("ensure that change_tracking is enabled on the source object" in msg for msg in messages):
1373
- if self.config.get("ensure_change_tracking", False) and not tracking_just_changed:
1374
- try:
1375
- self._exec(f"ALTER {kind} {object} SET CHANGE_TRACKING = TRUE;")
1376
- create_stream(tracking_just_changed=True)
1377
- except Exception:
1378
- pass
1379
- else:
1380
- print("\n")
1381
- exception = SnowflakeChangeTrackingNotEnabledException((object, kind))
1382
- raise exception from None
1383
- elif any("database does not exist" in msg for msg in messages):
1384
- print("\n")
1385
- raise ModelNotFoundException(model) from None
1386
- raise e
1387
-
1388
- create_stream()
1389
-
1390
- def create_import_snapshot(self, source:ImportSource, model:str, options: dict|None = None):
1391
- raise Exception("Snowflake integration doesn't support snapshot imports yet")
1392
-
1393
- def delete_import(self, import_name:str, model:str, force = False):
1394
- engine = self.get_default_engine_name()
1395
- rel_name = to_fqn_relation_name(import_name)
1396
- try:
1397
- self._exec(f"""call {APP_NAME}.api.delete_data_stream(
1398
- '{import_name}',
1399
- '{model}'
1400
- );""")
1401
- except RAIException as err:
1402
- if "streams do not exist" not in str(err) or not force:
1403
- raise
1404
-
1405
- # if force is true, we delete the leftover relation to free up the name (in case the user re-creates the stream)
1406
- if force:
1407
- self.exec_raw(model, engine, f"""
1408
- declare ::{rel_name}
1409
- def delete[:\"{rel_name}\"]: {{ {rel_name} }}
1410
- """, readonly=False, bypass_index=True)
1411
-
1412
- #--------------------------------------------------
1413
- # Exec Async
1414
- #--------------------------------------------------
1415
-
1416
- def _check_exec_async_status(self, txn_id: str, headers: Dict | None = None) -> TxnStatusResponse:
1417
- """Check whether the given transaction has completed."""
1418
- if headers is None:
1419
- headers = {}
1420
-
1421
- with debugging.span("check_status"):
1422
- response = self._exec(f"CALL {APP_NAME}.api.get_transaction('{txn_id}',{headers});")
1423
- assert response, f"No results from get_transaction('{txn_id}')"
1424
-
1425
- response_row = next(iter(response)).asDict()
1426
- status: str = response_row['STATE']
1427
-
1428
- # remove the transaction from the pending list if it's completed or aborted
1429
- if status in ["COMPLETED", "ABORTED"]:
1430
- if txn_id in self._pending_transactions:
1431
- self._pending_transactions.remove(txn_id)
1432
-
1433
- if status == "ABORTED":
1434
- if response_row.get("ABORT_REASON", "") == TXN_ABORT_REASON_TIMEOUT:
1435
- config_file_path = getattr(self.config, 'file_path', None)
1436
- # todo: use the timeout returned alongside the transaction as soon as it's exposed
1437
- timeout_mins = int(self.config.get("query_timeout_mins", DEFAULT_QUERY_TIMEOUT_MINS) or DEFAULT_QUERY_TIMEOUT_MINS)
1438
- raise QueryTimeoutExceededException(
1439
- timeout_mins=timeout_mins,
1440
- query_id=txn_id,
1441
- config_file_path=config_file_path,
1442
- )
1443
- elif response_row.get("ABORT_REASON", "") == GUARDRAILS_ABORT_REASON:
1444
- raise GuardRailsException()
1445
-
1446
- return TxnStatusResponse(
1447
- txn_id=txn_id,
1448
- finished=status in ["COMPLETED", "ABORTED"],
1449
- abort_reason=response_row.get("ABORT_REASON", None),
1450
- )
1451
-
1452
-
1453
- def _list_exec_async_artifacts(self, txn_id: str, headers: Dict | None = None) -> Dict[str, Dict]:
1454
- """Grab the list of artifacts produced in the transaction and the URLs to retrieve their contents."""
1455
- if headers is None:
1456
- headers = {}
1457
- with debugging.span("list_results"):
1458
- response = self._exec(
1459
- f"CALL {APP_NAME}.api.get_own_transaction_artifacts('{txn_id}',{headers});"
1460
- )
1461
- assert response, f"No results from get_own_transaction_artifacts('{txn_id}')"
1462
- return {row["FILENAME"]: row for row in response}
1463
-
1464
- def _fetch_exec_async_artifacts(
1465
- self, artifact_info: Dict[str, Dict[str, Any]]
1466
- ) -> Dict[str, Any]:
1467
- """Grab the contents of the given artifacts from SF in parallel using threads."""
1468
-
1469
- with requests.Session() as session:
1470
- def _fetch_data(name_info):
1471
- filename, metadata = name_info
1472
-
1473
- try:
1474
- # Extract the presigned URL and encryption material from metadata
1475
- url_key = self.get_url_key(metadata)
1476
- presigned_url = metadata[url_key]
1477
- encryption_material = metadata["ENCRYPTION_MATERIAL"]
1478
-
1479
- response = get_with_retries(session, presigned_url, config=self.config)
1480
- response.raise_for_status() # Throw if something goes wrong
1481
-
1482
- decrypted = self._maybe_decrypt(response.content, encryption_material)
1483
- return (filename, decrypted)
1484
-
1485
- except requests.RequestException as e:
1486
- raise scrub_exception(wrap_with_request_id(e))
1487
-
1488
- # Create a list of tuples for the map function
1489
- name_info_pairs = list(artifact_info.items())
1490
-
1491
- with ThreadPoolExecutor(max_workers=5) as executor:
1492
- results = executor.map(_fetch_data, name_info_pairs)
1493
-
1494
- return {name: data for (name, data) in results}
1495
-
1496
- def _maybe_decrypt(self, content: bytes, encryption_material: str) -> bytes:
1497
- # Decrypt if encryption material is present
1498
- if encryption_material:
1499
- # if there's no padding, the initial file was empty
1500
- if len(content) == 0:
1501
- return b""
1502
-
1503
- return decrypt_artifact(content, encryption_material)
1504
-
1505
- # otherwise, return content directly
1506
- return content
1507
-
1508
- def _parse_exec_async_results(self, arrow_files: List[Tuple[str, bytes]]):
1509
- """Mimics the logic in _parse_arrow_results of railib/api.py#L303 without requiring a wrapping multipart form."""
1510
- results = []
1511
-
1512
- for file_name, file_content in arrow_files:
1513
- with pa.ipc.open_stream(file_content) as reader:
1514
- schema = reader.schema
1515
- batches = [batch for batch in reader]
1516
- table = pa.Table.from_batches(batches=batches, schema=schema)
1517
- results.append({"relationId": file_name, "table": table})
1518
-
1519
- return results
1520
-
1521
- def _download_results(
1522
- self, artifact_info: Dict[str, Dict], txn_id: str, state: str
1523
- ) -> TransactionAsyncResponse:
1524
- with debugging.span("download_results"):
1525
- # Fetch artifacts
1526
- artifacts = self._fetch_exec_async_artifacts(artifact_info)
1527
-
1528
- # Directly use meta_json as it is fetched
1529
- meta_json_bytes = artifacts["metadata.json"]
1530
-
1531
- # Decode the bytes and parse the JSON
1532
- meta_json_str = meta_json_bytes.decode('utf-8')
1533
- meta_json = json.loads(meta_json_str) # Parse the JSON string
1534
-
1535
- # Use the metadata to map arrow files to the relations they contain
1536
- try:
1537
- arrow_files_to_relations = {
1538
- artifact["filename"]: artifact["relationId"]
1539
- for artifact in meta_json
1540
- }
1541
- except KeyError:
1542
- # TODO: Remove this fallback mechanism later once several engine versions are updated
1543
- arrow_files_to_relations = {
1544
- f"{ix}.arrow": artifact["relationId"]
1545
- for ix, artifact in enumerate(meta_json)
1546
- }
1547
-
1548
- # Hydrate the arrow files into tables
1549
- results = self._parse_exec_async_results(
1550
- [
1551
- (arrow_files_to_relations[name], content)
1552
- for name, content in artifacts.items()
1553
- if name.endswith(".arrow")
1554
- ]
1555
- )
1556
-
1557
- # Create and return the response
1558
- rsp = TransactionAsyncResponse()
1559
- rsp.transaction = {
1560
- "id": txn_id,
1561
- "state": state,
1562
- "response_format_version": None,
1563
- }
1564
- rsp.metadata = meta_json
1565
- rsp.problems = artifacts.get(
1566
- "problems.json"
1567
- ) # Safely access possible missing keys
1568
- rsp.results = results
1569
- return rsp
1570
-
1571
- def get_transaction_problems(self, txn_id: str) -> List[Dict[str, Any]]:
1572
- with debugging.span("get_own_transaction_problems"):
1573
- response = self._exec(
1574
- f"select * from table({APP_NAME}.api.get_own_transaction_problems('{txn_id}'));"
1575
- )
1576
- if not response:
1577
- return []
1578
- return response
1579
-
1580
- def get_url_key(self, metadata) -> str:
1581
- # In Azure, there is only one type of URL, which is used for both internal and
1582
- # external access; always use that one
1583
- if is_azure_url(metadata['PRESIGNED_URL']):
1584
- return 'PRESIGNED_URL'
1585
-
1586
- configured = self.config.get("download_url_type", None)
1587
- if configured == "internal":
1588
- return 'PRESIGNED_URL_AP'
1589
- elif configured == "external":
1590
- return "PRESIGNED_URL"
1591
-
1592
- if is_container_runtime():
1593
- return 'PRESIGNED_URL_AP'
1594
-
1595
- return 'PRESIGNED_URL'
1596
-
1597
- def _exec_rai_app(
1598
- self,
1599
- database: str,
1600
- engine: str | None,
1601
- raw_code: str,
1602
- inputs: Dict,
1603
- readonly=True,
1604
- nowait_durable=False,
1605
- request_headers: Dict | None = None,
1606
- bypass_index=False,
1607
- language: str = "rel",
1608
- query_timeout_mins: int | None = None,
1609
- ):
1610
- """
1611
- High-level method to execute RAI app stored procedures.
1612
-
1613
- Builds and executes SQL to call the RAI app's exec_async_v2 stored procedure.
1614
- This method handles the SQL string construction for two different formats:
1615
- 1. New format (with graph index): Uses object payload with parameterized query
1616
- 2. Legacy format: Uses positional parameters
1617
-
1618
- The choice between formats depends on the use_graph_index configuration.
1619
- The new format allows the stored procedure to hash the model and username
1620
- to determine the database, while the legacy format uses the passed database directly.
1621
-
1622
- This method is called by _exec_async_v2 to create transactions. It skips
1623
- use_index retry logic (skip_engine_db_error_retry=True) because that
1624
- is handled at a higher level by exec_raw/exec_lqp.
1625
-
1626
- Args:
1627
- database: Database/model name
1628
- engine: Engine name (optional)
1629
- raw_code: Code to execute (REL, LQP, or SQL)
1630
- inputs: Input parameters for the query
1631
- readonly: Whether the transaction is read-only
1632
- nowait_durable: Whether to wait for durable writes
1633
- request_headers: Optional HTTP headers
1634
- bypass_index: Whether to bypass graph index setup
1635
- language: Query language ("rel" or "lqp")
1636
- query_timeout_mins: Optional query timeout in minutes
1637
-
1638
- Returns:
1639
- Response from the stored procedure call (transaction creation result)
1640
-
1641
- Raises:
1642
- Exception: If transaction creation fails
1643
- """
1644
- assert language == "rel" or language == "lqp", "Only 'rel' and 'lqp' languages are supported"
1645
- if query_timeout_mins is None and (timeout_value := self.config.get("query_timeout_mins", DEFAULT_QUERY_TIMEOUT_MINS)) is not None:
1646
- query_timeout_mins = int(timeout_value)
1647
- # Depending on the shape of the input, the behavior of exec_async_v2 changes.
1648
- # When using the new format (with an object), the function retrieves the
1649
- # 'rai' database by hashing the model and username. In contrast, the
1650
- # current version directly uses the passed database value.
1651
- # Therefore, we must use the original exec_async_v2 when not using the
1652
- # graph index to ensure the correct database is utilized.
1653
- use_graph_index = self.config.get("use_graph_index", USE_GRAPH_INDEX)
1654
- if use_graph_index and not bypass_index:
1655
- payload = {
1656
- 'database': database,
1657
- 'engine': engine,
1658
- 'inputs': inputs,
1659
- 'readonly': readonly,
1660
- 'nowait_durable': nowait_durable,
1661
- 'language': language,
1662
- 'headers': request_headers
1663
- }
1664
- if query_timeout_mins is not None:
1665
- payload["timeout_mins"] = query_timeout_mins
1666
- sql_string = f"CALL {APP_NAME}.api.exec_async_v2(?, {payload});"
1667
- else:
1668
- if query_timeout_mins is not None:
1669
- sql_string = f"CALL {APP_NAME}.api.exec_async_v2('{database}','{engine}', ?, {inputs}, {readonly}, {nowait_durable}, '{language}', {query_timeout_mins}, {request_headers});"
1670
- else:
1671
- sql_string = f"CALL {APP_NAME}.api.exec_async_v2('{database}','{engine}', ?, {inputs}, {readonly}, {nowait_durable}, '{language}', {request_headers});"
1672
- # Don't let exec setup GI on failure, exec_raw and exec_lqp will do that and add the correct headers.
1673
- response = self._exec(
1674
- sql_string,
1675
- raw_code,
1676
- skip_engine_db_error_retry=True,
1677
- )
1678
- if not response:
1679
- raise Exception("Failed to create transaction")
1680
- return response
1681
-
1682
- def _create_v2_txn(
1683
- self,
1684
- database: str,
1685
- engine: str | None,
1686
- raw_code: str,
1687
- inputs: Dict,
1688
- headers: Dict[str, str],
1689
- readonly: bool,
1690
- nowait_durable: bool,
1691
- bypass_index: bool,
1692
- language: str,
1693
- query_timeout_mins: int | None,
1694
- ) -> TxnCreationResult:
1695
- """
1696
- Create a transaction and return the result.
1697
-
1698
- This method handles calling the RAI app stored procedure to create a transaction
1699
- and parses the response into a standardized TxnCreationResult format.
1700
-
1701
- This method can be overridden by subclasses (e.g., DirectAccessResources)
1702
- to use different transport mechanisms (HTTP instead of SQL).
1703
-
1704
- Args:
1705
- database: Database/model name
1706
- engine: Engine name (optional)
1707
- raw_code: Code to execute (REL, LQP, or SQL)
1708
- inputs: Input parameters for the query
1709
- headers: HTTP headers (must be prepared by caller)
1710
- readonly: Whether the transaction is read-only
1711
- nowait_durable: Whether to wait for durable writes
1712
- bypass_index: Whether to bypass graph index setup
1713
- language: Query language ("rel" or "lqp")
1714
- query_timeout_mins: Optional query timeout in minutes
1715
-
1716
- Returns:
1717
- TxnCreationResult containing txn_id, state, and artifact_info
1718
- """
1719
- response = self._exec_rai_app(
1720
- database=database,
1721
- engine=engine,
1722
- raw_code=raw_code,
1723
- inputs=inputs,
1724
- readonly=readonly,
1725
- nowait_durable=nowait_durable,
1726
- request_headers=headers,
1727
- bypass_index=bypass_index,
1728
- language=language,
1729
- query_timeout_mins=query_timeout_mins,
1730
- )
1731
-
1732
- rows = list(iter(response))
1733
-
1734
- # process the first row since txn_id and state are the same for all rows
1735
- first_row = rows[0]
1736
- txn_id = first_row['ID']
1737
- state = first_row['STATE']
1738
-
1739
- # Build artifact_info if transaction completed immediately (fast path)
1740
- artifact_info: Dict[str, Dict] = {}
1741
- if state in ["COMPLETED", "ABORTED"]:
1742
- for row in rows:
1743
- filename = row['FILENAME']
1744
- artifact_info[filename] = row
1745
-
1746
- return TxnCreationResult(txn_id=txn_id, state=state, artifact_info=artifact_info)
1747
-
1748
- def _exec_async_v2(
1749
- self,
1750
- database: str,
1751
- engine: str | None,
1752
- raw_code: str,
1753
- inputs: Dict | None = None,
1754
- readonly=True,
1755
- nowait_durable=False,
1756
- headers: Dict | None = None,
1757
- bypass_index=False,
1758
- language: str = "rel",
1759
- query_timeout_mins: int | None = None,
1760
- gi_setup_skipped: bool = False,
1761
- ):
1762
- """
1763
- High-level async execution method with transaction polling and artifact management.
1764
-
1765
- This is the core method for executing queries asynchronously. It:
1766
- 1. Creates a transaction by calling _create_v2_txn
1767
- 2. Handles two execution paths:
1768
- - Fast path: Transaction completes immediately (COMPLETED/ABORTED)
1769
- - Slow path: Transaction is pending, requires polling until completion
1770
- 3. Manages pending transactions list
1771
- 4. Downloads and returns query results/artifacts
1772
-
1773
- This method is called by _execute_code (base implementation), and calls the
1774
- following methods that can be overridden by child classes (e.g.,
1775
- DirectAccessResources uses HTTP instead):
1776
- - _create_v2_txn
1777
- - _check_exec_async_status
1778
- - _list_exec_async_artifacts
1779
- - _download_results
1780
-
1781
- Args:
1782
- database: Database/model name
1783
- engine: Engine name (optional)
1784
- raw_code: Code to execute (REL, LQP, or SQL)
1785
- inputs: Input parameters for the query
1786
- readonly: Whether the transaction is read-only
1787
- nowait_durable: Whether to wait for durable writes
1788
- headers: Optional HTTP headers
1789
- bypass_index: Whether to bypass graph index setup
1790
- language: Query language ("rel" or "lqp")
1791
- query_timeout_mins: Optional query timeout in minutes
1792
- gi_setup_skipped: Whether graph index setup was skipped (for retry logic)
1793
-
1794
- Returns:
1795
- Query results (downloaded artifacts)
1796
- """
1797
- if inputs is None:
1798
- inputs = {}
1799
- request_headers = debugging.add_current_propagation_headers(headers)
1800
- query_attrs_dict = json.loads(request_headers.get("X-Query-Attributes", "{}"))
1801
-
1802
- with debugging.span("transaction", **query_attrs_dict) as txn_span:
1803
- txn_start_time = time.time()
1804
- with ExecTxnPoller(
1805
- config=self.config,
1806
- resource=self, txn_id=None, headers=request_headers,
1807
- txn_start_time=txn_start_time
1808
- ) as poller:
1809
- with debugging.span("create_v2", **query_attrs_dict) as create_span:
1810
- # Prepare headers for transaction creation
1811
- request_headers['user-agent'] = get_pyrel_version(self.generation)
1812
- request_headers['gi_setup_skipped'] = str(gi_setup_skipped)
1813
- request_headers['pyrel_program_id'] = debugging.get_program_span_id() or ""
1814
- request_headers[ENABLE_GUARD_RAILS_HEADER] = str(should_enable_guard_rails(self.config))
1815
-
1816
- # Create the transaction
1817
- result = self._create_v2_txn(
1818
- database=database,
1819
- engine=engine,
1820
- raw_code=raw_code,
1821
- inputs=inputs,
1822
- headers=request_headers,
1823
- readonly=readonly,
1824
- nowait_durable=nowait_durable,
1825
- bypass_index=bypass_index,
1826
- language=language,
1827
- query_timeout_mins=query_timeout_mins,
1828
- )
1829
-
1830
- txn_id = result.txn_id
1831
- state = result.state
1832
-
1833
- txn_span["txn_id"] = txn_id
1834
- create_span["txn_id"] = txn_id
1835
- debugging.event("transaction_created", txn_span, txn_id=txn_id)
1836
-
1837
- # Set the transaction ID now that we have it, to update the progress text
1838
- poller.txn_id = txn_id
1839
-
1840
- # fast path: transaction already finished
1841
- if state in ["COMPLETED", "ABORTED"]:
1842
- if txn_id in self._pending_transactions:
1843
- self._pending_transactions.remove(txn_id)
1844
-
1845
- artifact_info = result.artifact_info
1846
-
1847
- # Slow path: transaction not done yet; start polling
1848
- else:
1849
- self._pending_transactions.append(txn_id)
1850
- # Use the interactive poller for transaction status
1851
- with debugging.span("wait", txn_id=txn_id):
1852
- poller.poll()
1853
-
1854
- artifact_info = self._list_exec_async_artifacts(txn_id, headers=request_headers)
1855
-
1856
- with debugging.span("fetch"):
1857
- return self._download_results(artifact_info, txn_id, state)
1858
-
1859
- def get_user_based_engine_name(self):
1860
- if not self._session:
1861
- self._session = self.get_sf_session()
1862
- user_table = self._session.sql("select current_user()").collect()
1863
- user = user_table[0][0]
1864
- assert isinstance(user, str), f"current_user() must return a string, not {type(user)}"
1865
- return _sanitize_user_name(user)
1866
-
1867
- def is_engine_ready(self, engine_name: str, type: str = EngineType.LOGIC):
1868
- engine = self.get_engine(engine_name, type)
1869
- return engine and engine["state"] == "READY"
1870
-
1871
- def auto_create_engine(
1872
- self,
1873
- name: str | None = None,
1874
- type: str = EngineType.LOGIC,
1875
- size: str | None = None,
1876
- headers: Dict | None = None,
1877
- ):
1878
- """Synchronously create/ensure an engine is ready, blocking until ready."""
1879
- with debugging.span("auto_create_engine", active=self._active_engine) as span:
1880
- active = self._get_active_engine()
1881
- if active:
1882
- return active
1883
-
1884
- # Resolve and validate parameters
1885
- name, size = self._prepare_engine_params(name, size)
1886
-
1887
- try:
1888
- # Get current engine state
1889
- engine = self.get_engine(name, type)
1890
- if engine:
1891
- span.update(cast(dict, engine))
1892
-
1893
- # Create context for state handling
1894
- context = EngineContext(
1895
- name=name,
1896
- size=size,
1897
- type=type,
1898
- headers=headers,
1899
- requested_size=size,
1900
- span=span,
1901
- )
1902
-
1903
- # Process engine state using sync handlers
1904
- self._process_engine_state(engine, context, self._sync_engine_state_handlers)
1905
-
1906
- except Exception as e:
1907
- self._handle_engine_creation_errors(e, name)
1908
-
1909
- return name
1910
-
1911
- def auto_create_engine_async(self, name: str | None = None, type: str | None = None):
1912
- """Asynchronously create/ensure an engine, returns immediately."""
1913
- if type is None:
1914
- type = EngineType.LOGIC
1915
- active = self._get_active_engine()
1916
- if active and (active == name or name is None):
1917
- return active
1918
-
1919
- with Spinner(
1920
- "Checking engine status",
1921
- leading_newline=True,
1922
- ) as spinner:
1923
- with debugging.span("auto_create_engine_async", active=self._active_engine):
1924
- # Resolve and validate parameters (use_default_size=True for async)
1925
- name, size = self._prepare_engine_params(name, None, use_default_size=True)
1926
-
1927
- try:
1928
- # Get current engine state
1929
- engine = self.get_engine(name, type)
1930
-
1931
- # Create context for state handling
1932
- context = EngineContext(
1933
- name=name,
1934
- size=size,
1935
- type=type,
1936
- headers=None,
1937
- requested_size=None,
1938
- spinner=spinner,
1939
- )
1940
-
1941
- # Process engine state using async handlers
1942
- self._process_engine_state(engine, context, self._async_engine_state_handlers, set_active_on_success=True)
1943
-
1944
- except Exception as e:
1945
- spinner.update_messages({
1946
- "finished_message": f"Failed to create engine {name}",
1947
- })
1948
- self._handle_engine_creation_errors(e, name, preserve_rai_exception=True)
1949
-
1950
- return name
1951
-
1952
- #--------------------------------------------------
1953
- # Exec
1954
- #--------------------------------------------------
1955
-
1956
- def _execute_code(
1957
- self,
1958
- database: str,
1959
- engine: str | None,
1960
- raw_code: str,
1961
- inputs: Dict | None,
1962
- readonly: bool,
1963
- nowait_durable: bool,
1964
- headers: Dict | None,
1965
- bypass_index: bool,
1966
- language: str,
1967
- query_timeout_mins: int | None,
1968
- ) -> Any:
1969
- """
1970
- Template method for code execution - can be overridden by child classes.
1971
-
1972
- This is a template method that provides a hook for child classes to add
1973
- execution logic (like retry mechanisms). The base implementation simply
1974
- calls _exec_async_v2 directly.
1975
-
1976
- UseIndexResources overrides this method to use _exec_with_gi_retry, which
1977
- adds automatic use_index polling on engine/database errors.
1978
-
1979
- This method is called by exec_lqp() and exec_raw() to provide a single
1980
- execution point that can be customized per resource class.
1981
-
1982
- Args:
1983
- database: Database/model name
1984
- engine: Engine name (optional)
1985
- raw_code: Code to execute (already processed/encoded)
1986
- inputs: Input parameters for the query
1987
- readonly: Whether the transaction is read-only
1988
- nowait_durable: Whether to wait for durable writes
1989
- headers: Optional HTTP headers
1990
- bypass_index: Whether to bypass graph index setup
1991
- language: Query language ("rel" or "lqp")
1992
- query_timeout_mins: Optional query timeout in minutes
1993
-
1994
- Returns:
1995
- Query results
1996
- """
1997
- return self._exec_async_v2(
1998
- database, engine, raw_code, inputs, readonly, nowait_durable,
1999
- headers=headers, bypass_index=bypass_index, language=language,
2000
- query_timeout_mins=query_timeout_mins, gi_setup_skipped=True,
2001
- )
2002
-
2003
- def exec_lqp(
2004
- self,
2005
- database: str,
2006
- engine: str | None,
2007
- raw_code: bytes,
2008
- readonly=True,
2009
- *,
2010
- inputs: Dict | None = None,
2011
- nowait_durable=False,
2012
- headers: Dict | None = None,
2013
- bypass_index=False,
2014
- query_timeout_mins: int | None = None,
2015
- ):
2016
- """Execute LQP code."""
2017
- raw_code_b64 = base64.b64encode(raw_code).decode("utf-8")
2018
- return self._execute_code(
2019
- database, engine, raw_code_b64, inputs, readonly, nowait_durable,
2020
- headers, bypass_index, 'lqp', query_timeout_mins
2021
- )
2022
-
2023
- def exec_raw(
2024
- self,
2025
- database: str,
2026
- engine: str | None,
2027
- raw_code: str,
2028
- readonly=True,
2029
- *,
2030
- inputs: Dict | None = None,
2031
- nowait_durable=False,
2032
- headers: Dict | None = None,
2033
- bypass_index=False,
2034
- query_timeout_mins: int | None = None,
2035
- ):
2036
- """Execute raw code."""
2037
- raw_code = raw_code.replace("'", "\\'")
2038
- return self._execute_code(
2039
- database, engine, raw_code, inputs, readonly, nowait_durable,
2040
- headers, bypass_index, 'rel', query_timeout_mins
2041
- )
2042
-
2043
-
2044
- def format_results(self, results, task:m.Task|None=None) -> Tuple[DataFrame, List[Any]]:
2045
- return result_helpers.format_results(results, task)
2046
-
2047
- #--------------------------------------------------
2048
- # Exec format
2049
- #--------------------------------------------------
2050
-
2051
- def exec_format(
2052
- self,
2053
- database: str,
2054
- engine: str,
2055
- raw_code: str,
2056
- cols: List[str],
2057
- format: str,
2058
- inputs: Dict | None = None,
2059
- readonly=True,
2060
- nowait_durable=False,
2061
- skip_invalid_data=False,
2062
- headers: Dict | None = None,
2063
- query_timeout_mins: int | None = None,
2064
- ):
2065
- if inputs is None:
2066
- inputs = {}
2067
- if headers is None:
2068
- headers = {}
2069
- if 'user-agent' not in headers:
2070
- headers['user-agent'] = get_pyrel_version(self.generation)
2071
- if query_timeout_mins is None and (timeout_value := self.config.get("query_timeout_mins", DEFAULT_QUERY_TIMEOUT_MINS)) is not None:
2072
- query_timeout_mins = int(timeout_value)
2073
- # TODO: add headers
2074
- start = time.perf_counter()
2075
- output_table = "out" + str(uuid.uuid4()).replace("-", "_")
2076
- temp_table = f"temp_{output_table}"
2077
- use_graph_index = self.config.get("use_graph_index", USE_GRAPH_INDEX)
2078
- txn_id = None
2079
- rejected_rows = None
2080
- col_names_map = None
2081
- artifacts = None
2082
- assert self._session
2083
- temp = self._session.createDataFrame([], StructType([StructField(name, StringType()) for name in cols]))
2084
- with debugging.span("transaction") as txn_span:
2085
- try:
2086
- # In the graph index case we need to use the new exec_into_table proc as it obfuscates the db name
2087
- with debugging.span("exec_format"):
2088
- if use_graph_index:
2089
- # we do not provide a default value for query_timeout_mins so that we can control the default on app level
2090
- if query_timeout_mins is not None:
2091
- res = self._exec(f"call {APP_NAME}.api.exec_into_table(?, ?, ?, ?, ?, NULL, ?, {headers}, ?, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data, query_timeout_mins])
2092
- else:
2093
- res = self._exec(f"call {APP_NAME}.api.exec_into_table(?, ?, ?, ?, ?, NULL, ?, {headers}, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data])
2094
- txn_id = json.loads(res[0]["EXEC_INTO_TABLE"])["rai_transaction_id"]
2095
- rejected_rows = json.loads(res[0]["EXEC_INTO_TABLE"]).get("rejected_rows", [])
2096
- rejected_rows_count = json.loads(res[0]["EXEC_INTO_TABLE"]).get("rejected_rows_count", 0)
2097
- else:
2098
- if query_timeout_mins is not None:
2099
- res = self._exec(f"call {APP_NAME}.api.exec_into(?, ?, ?, ?, ?, {inputs}, ?, {headers}, ?, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data, query_timeout_mins])
2100
- else:
2101
- res = self._exec(f"call {APP_NAME}.api.exec_into(?, ?, ?, ?, ?, {inputs}, ?, {headers}, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data])
2102
- txn_id = json.loads(res[0]["EXEC_INTO"])["rai_transaction_id"]
2103
- rejected_rows = json.loads(res[0]["EXEC_INTO"]).get("rejected_rows", [])
2104
- rejected_rows_count = json.loads(res[0]["EXEC_INTO"]).get("rejected_rows_count", 0)
2105
- debugging.event("transaction_created", txn_span, txn_id=txn_id)
2106
- debugging.time("exec_format", time.perf_counter() - start, DataFrame())
2107
-
2108
- with debugging.span("temp_table_swap", txn_id=txn_id):
2109
- out_sample = self._exec(f"select * from {APP_NAME}.results.{output_table} limit 1;")
2110
- if out_sample:
2111
- keys = set([k.lower() for k in out_sample[0].as_dict().keys()])
2112
- col_names_map = {}
2113
- for ix, name in enumerate(cols):
2114
- col_key = f"col{ix:03}"
2115
- if col_key in keys:
2116
- col_names_map[col_key] = IdentityParser(name).identity
2117
- else:
2118
- col_names_map[col_key] = name
2119
-
2120
- names = ", ".join([
2121
- f"{col_key} as {alias}" if col_key in keys else f"NULL as {alias}"
2122
- for col_key, alias in col_names_map.items()
2123
- ])
2124
- self._exec(f"CREATE TEMPORARY TABLE {APP_NAME}.results.{temp_table} AS SELECT {names} FROM {APP_NAME}.results.{output_table};")
2125
- self._exec(f"call {APP_NAME}.api.drop_result_table(?)", [output_table])
2126
- temp = cast(snowflake.snowpark.DataFrame, self._exec(f"select * from {APP_NAME}.results.{temp_table}", raw=True))
2127
- if rejected_rows:
2128
- debugging.warn(RowsDroppedFromTargetTableWarning(rejected_rows, rejected_rows_count, col_names_map))
2129
- except Exception as e:
2130
- messages = collect_error_messages(e)
2131
- if any("no columns returned" in msg or "columns of results could not be determined" in msg for msg in messages):
2132
- pass
2133
- else:
2134
- raise e
2135
- if txn_id:
2136
- artifact_info = self._list_exec_async_artifacts(txn_id)
2137
- with debugging.span("fetch"):
2138
- artifacts = self._download_results(artifact_info, txn_id, "ABORTED")
2139
- return (temp, artifacts)
2140
-
2141
- #--------------------------------------------------
2142
- # Custom model types
2143
- #--------------------------------------------------
2144
-
2145
- def _get_ns(self, model:dsl.Graph):
2146
- if model not in self._ns_cache:
2147
- self._ns_cache[model] = _Snowflake(model)
2148
- return self._ns_cache[model]
2149
-
2150
- def to_model_type(self, model:dsl.Graph, name: str, source:str):
2151
- parser = IdentityParser(source)
2152
- if not parser.is_complete:
2153
- raise SnowflakeInvalidSource(Errors.call_source(), source)
2154
- ns = self._get_ns(model)
2155
- # skip the last item in the list (the full identifier)
2156
- for part in parser.to_list()[:-1]:
2157
- ns = ns._safe_get(part)
2158
- assert parser.identity, f"Error parsing source in to_model_type: {source}"
2159
- self.sources.add(parser.identity)
2160
- return ns
2161
-
2162
- #--------------------------------------------------
2163
- # Source Management
2164
- #--------------------------------------------------
2165
-
2166
- def _check_source_updates(self, sources: Iterable[str]):
2167
- if not sources:
2168
- return {}
2169
- app_name = self.get_app_name()
2170
-
2171
- source_types = dict[str, SourceInfo]()
2172
- partitioned_sources: dict[str, dict[str, list[dict[str, str]]]] = defaultdict(
2173
- lambda: defaultdict(list)
2174
- )
2175
- fqn_to_parts: dict[str, tuple[str, str, str]] = {}
2176
-
2177
- for source in sources:
2178
- parser = IdentityParser(source, True)
2179
- parsed = parser.to_list()
2180
- assert len(parsed) == 4, f"Invalid source: {source}"
2181
- db, schema, entity, identity = parsed
2182
- assert db and schema and entity and identity, f"Invalid source: {source}"
2183
- source_types[identity] = cast(
2184
- SourceInfo,
2185
- {
2186
- "type": None,
2187
- "state": "",
2188
- "columns_hash": None,
2189
- "table_created_at": None,
2190
- "stream_created_at": None,
2191
- "last_ddl": None,
2192
- },
2193
- )
2194
- partitioned_sources[db][schema].append({"entity": entity, "identity": identity})
2195
- fqn_to_parts[identity] = (db, schema, entity)
2196
-
2197
- if not partitioned_sources:
2198
- return source_types
2199
-
2200
- state_queries: list[str] = []
2201
- for db, schemas in partitioned_sources.items():
2202
- select_rows: list[str] = []
2203
- for schema, tables in schemas.items():
2204
- for table_info in tables:
2205
- select_rows.append(
2206
- "SELECT "
2207
- f"{IdentityParser.to_sql_value(db)} AS catalog_name, "
2208
- f"{IdentityParser.to_sql_value(schema)} AS schema_name, "
2209
- f"{IdentityParser.to_sql_value(table_info['entity'])} AS table_name"
2210
- )
2211
-
2212
- if not select_rows:
2213
- continue
2214
-
2215
- target_entities_clause = "\n UNION ALL\n ".join(select_rows)
2216
- # Main query:
2217
- # 1. Enumerate the target tables via target_entities.
2218
- # 2. Pull their metadata (last_altered, type) from INFORMATION_SCHEMA.TABLES.
2219
- # 3. Look up the most recent stream activity for those FQNs only.
2220
- # 4. Capture creation timestamps and use last_ddl vs created_at to classify each target,
2221
- # so we mark tables as stale when they were recreated even if column hashes still match.
2222
- state_queries.append(
2223
- f"""WITH target_entities AS (
2224
- {target_entities_clause}
2225
- ),
2226
- table_info AS (
2227
- SELECT
2228
- {app_name}.api.normalize_fq_ids(
2229
- ARRAY_CONSTRUCT(
2230
- CASE
2231
- WHEN t.table_catalog = UPPER(t.table_catalog) THEN t.table_catalog
2232
- ELSE '"' || t.table_catalog || '"'
2233
- END || '.' ||
2234
- CASE
2235
- WHEN t.table_schema = UPPER(t.table_schema) THEN t.table_schema
2236
- ELSE '"' || t.table_schema || '"'
2237
- END || '.' ||
2238
- CASE
2239
- WHEN t.table_name = UPPER(t.table_name) THEN t.table_name
2240
- ELSE '"' || t.table_name || '"'
2241
- END
2242
- )
2243
- )[0]:identifier::string AS fqn,
2244
- CONVERT_TIMEZONE('UTC', t.last_altered) AS last_ddl,
2245
- CONVERT_TIMEZONE('UTC', t.created) AS table_created_at,
2246
- t.table_type AS kind
2247
- FROM {db}.INFORMATION_SCHEMA.tables t
2248
- JOIN target_entities te
2249
- ON t.table_catalog = te.catalog_name
2250
- AND t.table_schema = te.schema_name
2251
- AND t.table_name = te.table_name
2252
- ),
2253
- stream_activity AS (
2254
- SELECT
2255
- sa.fqn,
2256
- MAX(sa.created_at) AS created_at
2257
- FROM (
2258
- SELECT
2259
- {app_name}.api.normalize_fq_ids(ARRAY_CONSTRUCT(fq_object_name))[0]:identifier::string AS fqn,
2260
- created_at
2261
- FROM {app_name}.api.data_streams
2262
- WHERE rai_database = '{PYREL_ROOT_DB}'
2263
- ) sa
2264
- JOIN table_info ti
2265
- ON sa.fqn = ti.fqn
2266
- GROUP BY sa.fqn
2267
- )
2268
- SELECT
2269
- ti.fqn,
2270
- ti.kind,
2271
- ti.last_ddl,
2272
- ti.table_created_at,
2273
- sa.created_at AS stream_created_at,
2274
- IFF(
2275
- DATEDIFF(second, sa.created_at::timestamp, ti.last_ddl::timestamp) > 0,
2276
- 'STALE',
2277
- 'CURRENT'
2278
- ) AS state
2279
- FROM table_info ti
2280
- LEFT JOIN stream_activity sa
2281
- ON sa.fqn = ti.fqn
2282
- """
2283
- )
2284
-
2285
- stale_fqns: list[str] = []
2286
- for state_query in state_queries:
2287
- for row in self._exec(state_query):
2288
- row_dict = row.as_dict() if hasattr(row, "as_dict") else dict(row)
2289
- row_fqn = row_dict["FQN"]
2290
- parser = IdentityParser(row_fqn, True)
2291
- fqn = parser.identity
2292
- assert fqn, f"Error parsing returned FQN: {row_fqn}"
2293
-
2294
- source_types[fqn]["type"] = (
2295
- "TABLE" if row_dict["KIND"] == "BASE TABLE" else row_dict["KIND"]
2296
- )
2297
- source_types[fqn]["state"] = row_dict["STATE"]
2298
- source_types[fqn]["last_ddl"] = normalize_datetime(row_dict.get("LAST_DDL"))
2299
- source_types[fqn]["table_created_at"] = normalize_datetime(row_dict.get("TABLE_CREATED_AT"))
2300
- source_types[fqn]["stream_created_at"] = normalize_datetime(row_dict.get("STREAM_CREATED_AT"))
2301
- if row_dict["STATE"] == "STALE":
2302
- stale_fqns.append(fqn)
2303
-
2304
- if not stale_fqns:
2305
- return source_types
2306
-
2307
- # We batch stale tables by database/schema so each Snowflake query can hash
2308
- # multiple objects at once instead of issuing one statement per table.
2309
- stale_partitioned: dict[str, dict[str, list[dict[str, str]]]] = defaultdict(
2310
- lambda: defaultdict(list)
2311
- )
2312
- for fqn in stale_fqns:
2313
- db, schema, table = fqn_to_parts[fqn]
2314
- stale_partitioned[db][schema].append({"table": table, "identity": fqn})
2315
-
2316
- # Build one hash query per database, grouping schemas/tables inside so we submit
2317
- # at most a handful of set-based statements to Snowflake.
2318
- for db, schemas in stale_partitioned.items():
2319
- column_select_rows: list[str] = []
2320
- for schema, tables in schemas.items():
2321
- for table_info in tables:
2322
- # Build the literal rows for this db/schema so we can join back
2323
- # against INFORMATION_SCHEMA.COLUMNS in a single statement.
2324
- column_select_rows.append(
2325
- "SELECT "
2326
- f"{IdentityParser.to_sql_value(db)} AS catalog_name, "
2327
- f"{IdentityParser.to_sql_value(schema)} AS schema_name, "
2328
- f"{IdentityParser.to_sql_value(table_info['table'])} AS table_name"
2329
- )
2330
-
2331
- if not column_select_rows:
2332
- continue
2333
-
2334
- target_entities_clause = "\n UNION ALL\n ".join(column_select_rows)
2335
- # Main query: compute deterministic column hashes for every stale table
2336
- # in this database/schema batch so we can compare schemas without a round trip per table.
2337
- column_query = f"""WITH target_entities AS (
2338
- {target_entities_clause}
2339
- ),
2340
- column_info AS (
2341
- SELECT
2342
- {app_name}.api.normalize_fq_ids(
2343
- ARRAY_CONSTRUCT(
2344
- CASE
2345
- WHEN c.table_catalog = UPPER(c.table_catalog) THEN c.table_catalog
2346
- ELSE '"' || c.table_catalog || '"'
2347
- END || '.' ||
2348
- CASE
2349
- WHEN c.table_schema = UPPER(c.table_schema) THEN c.table_schema
2350
- ELSE '"' || c.table_schema || '"'
2351
- END || '.' ||
2352
- CASE
2353
- WHEN c.table_name = UPPER(c.table_name) THEN c.table_name
2354
- ELSE '"' || c.table_name || '"'
2355
- END
2356
- )
2357
- )[0]:identifier::string AS fqn,
2358
- c.column_name,
2359
- CASE
2360
- WHEN c.numeric_precision IS NOT NULL AND c.numeric_scale IS NOT NULL
2361
- THEN c.data_type || '(' || c.numeric_precision || ',' || c.numeric_scale || ')'
2362
- WHEN c.datetime_precision IS NOT NULL
2363
- THEN c.data_type || '(0,' || c.datetime_precision || ')'
2364
- WHEN c.character_maximum_length IS NOT NULL
2365
- THEN c.data_type || '(' || c.character_maximum_length || ')'
2366
- ELSE c.data_type
2367
- END AS type_signature,
2368
- IFF(c.is_nullable = 'YES', 'YES', 'NO') AS nullable_flag
2369
- FROM {db}.INFORMATION_SCHEMA.COLUMNS c
2370
- JOIN target_entities te
2371
- ON c.table_catalog = te.catalog_name
2372
- AND c.table_schema = te.schema_name
2373
- AND c.table_name = te.table_name
2374
- )
2375
- SELECT
2376
- fqn,
2377
- HEX_ENCODE(
2378
- HASH_AGG(
2379
- HASH(
2380
- column_name,
2381
- type_signature,
2382
- nullable_flag
2383
- )
2384
- )
2385
- ) AS columns_hash
2386
- FROM column_info
2387
- GROUP BY fqn
2388
- """
2389
-
2390
- for row in self._exec(column_query):
2391
- row_fqn = row["FQN"]
2392
- parser = IdentityParser(row_fqn, True)
2393
- fqn = parser.identity
2394
- assert fqn, f"Error parsing returned FQN: {row_fqn}"
2395
- source_types[fqn]["columns_hash"] = row["COLUMNS_HASH"]
2396
-
2397
- return source_types
2398
-
2399
- def _get_source_references(self, source_info: dict[str, SourceInfo]):
2400
- app_name = self.get_app_name()
2401
- missing_sources = []
2402
- invalid_sources = {}
2403
- source_references = []
2404
- for source, info in source_info.items():
2405
- source_type = info.get("type")
2406
- if source_type is None:
2407
- missing_sources.append(source)
2408
- elif source_type not in ("TABLE", "VIEW"):
2409
- invalid_sources[source] = source_type
2410
- else:
2411
- source_references.append(f"{app_name}.api.object_reference('{source_type}', '{source}')")
2412
-
2413
- if missing_sources:
2414
- current_role = self.get_sf_session().get_current_role()
2415
- if current_role is None:
2416
- current_role = self.config.get("role", None)
2417
- debugging.warn(UnknownSourceWarning(missing_sources, current_role))
2418
-
2419
- if invalid_sources:
2420
- debugging.warn(InvalidSourceTypeWarning(invalid_sources))
2421
-
2422
- self.source_references = source_references
2423
- return source_references
2424
-
2425
- #--------------------------------------------------
2426
- # Transactions
2427
- #--------------------------------------------------
2428
-
2429
- def get_transaction(self, transaction_id):
2430
- results = self._exec(
2431
- f"CALL {APP_NAME}.api.get_transaction(?);", [transaction_id])
2432
- if not results:
2433
- return None
2434
-
2435
- results = txn_list_to_dicts(results)
2436
-
2437
- txn = {field: results[0][field] for field in GET_TXN_SQL_FIELDS}
2438
-
2439
- state = txn.get("state")
2440
- created_on = txn.get("created_on")
2441
- finished_at = txn.get("finished_at")
2442
- if created_on:
2443
- # Transaction is still running
2444
- if state not in TERMINAL_TXN_STATES:
2445
- tz_info = created_on.tzinfo
2446
- txn['duration'] = datetime.now(tz_info) - created_on
2447
- # Transaction is terminal
2448
- elif finished_at:
2449
- txn['duration'] = finished_at - created_on
2450
- # Transaction is still running and we have no state or finished_at
2451
- else:
2452
- txn['duration'] = timedelta(0)
2453
- return txn
2454
-
2455
- def list_transactions(self, **kwargs):
2456
- id = kwargs.get("id", None)
2457
- state = kwargs.get("state", None)
2458
- engine = kwargs.get("engine", None)
2459
- limit = kwargs.get("limit", 100)
2460
- all_users = kwargs.get("all_users", False)
2461
- created_by = kwargs.get("created_by", None)
2462
- only_active = kwargs.get("only_active", False)
2463
- where_clause_arr = []
2464
-
2465
- if id:
2466
- where_clause_arr.append(f"id = '{id}'")
2467
- if state:
2468
- where_clause_arr.append(f"state = '{state.upper()}'")
2469
- if engine:
2470
- where_clause_arr.append(f"LOWER(engine_name) = '{engine.lower()}'")
2471
- else:
2472
- if only_active:
2473
- where_clause_arr.append("state in ('CREATED', 'RUNNING', 'PENDING')")
2474
- if not all_users and created_by is not None:
2475
- where_clause_arr.append(f"LOWER(created_by) = '{created_by.lower()}'")
2476
-
2477
- if len(where_clause_arr):
2478
- where_clause = f'WHERE {" AND ".join(where_clause_arr)}'
2479
- else:
2480
- where_clause = ""
2481
-
2482
- sql_fields = ", ".join(LIST_TXN_SQL_FIELDS)
2483
- query = f"SELECT {sql_fields} from {APP_NAME}.api.transactions {where_clause} ORDER BY created_on DESC LIMIT ?"
2484
- results = self._exec(query, [limit])
2485
- if not results:
2486
- return []
2487
- return txn_list_to_dicts(results)
2488
-
2489
- def cancel_transaction(self, transaction_id):
2490
- self._exec(f"CALL {APP_NAME}.api.cancel_own_transaction(?);", [transaction_id])
2491
- if transaction_id in self._pending_transactions:
2492
- self._pending_transactions.remove(transaction_id)
2493
-
2494
- def cancel_pending_transactions(self):
2495
- for txn_id in self._pending_transactions:
2496
- self.cancel_transaction(txn_id)
2497
-
2498
- def get_transaction_events(self, transaction_id: str, continuation_token:str=''):
2499
- results = self._exec(
2500
- f"SELECT {APP_NAME}.api.get_own_transaction_events(?, ?);",
2501
- [transaction_id, continuation_token],
2502
- )
2503
- if not results:
2504
- return {
2505
- "events": [],
2506
- "continuation_token": None
2507
- }
2508
- row = results[0][0]
2509
- return json.loads(row)
2510
-
2511
- #--------------------------------------------------
2512
- # Snowflake specific
2513
- #--------------------------------------------------
2514
-
2515
- def get_version(self):
2516
- results = self._exec(f"SELECT {APP_NAME}.app.get_release()")
2517
- if not results:
2518
- return None
2519
- return results[0][0]
2520
-
2521
- # CLI methods (list_warehouses, list_compute_pools, list_roles, list_apps,
2522
- # list_databases, list_sf_schemas, list_tables) are now in CLIResources class
2523
- # schema_info is kept in base Resources class since it's used by SnowflakeSchema._fetch_info()
2524
-
2525
- def schema_info(self, database: str, schema: str, tables: Iterable[str]):
2526
- """Get detailed schema information including primary keys, foreign keys, and columns."""
2527
- app_name = self.get_app_name()
2528
- # Only pass the db + schema as the identifier so that the resulting identity is correct
2529
- parser = IdentityParser(f"{database}.{schema}")
2530
-
2531
- with debugging.span("schema_info"):
2532
- with debugging.span("primary_keys") as span:
2533
- pk_query = f"SHOW PRIMARY KEYS IN SCHEMA {parser.identity};"
2534
- pks = self._exec(pk_query)
2535
- span["sql"] = pk_query
2536
-
2537
- with debugging.span("foreign_keys") as span:
2538
- fk_query = f"SHOW IMPORTED KEYS IN SCHEMA {parser.identity};"
2539
- fks = self._exec(fk_query)
2540
- span["sql"] = fk_query
2541
-
2542
- # IdentityParser will parse a single value (with no ".") and store it in this case in the db field
2543
- with debugging.span("columns") as span:
2544
- tables_str = ", ".join([f"'{IdentityParser(t).db}'" for t in tables])
2545
- query = textwrap.dedent(f"""
2546
- begin
2547
- SHOW COLUMNS IN SCHEMA {parser.identity};
2548
- let r resultset := (
2549
- SELECT
2550
- CASE
2551
- WHEN "table_name" = UPPER("table_name") THEN "table_name"
2552
- ELSE '"' || "table_name" || '"'
2553
- END as "table_name",
2554
- "column_name",
2555
- "data_type",
2556
- CASE
2557
- WHEN ARRAY_CONTAINS(PARSE_JSON("data_type"):"type", {app_name}.app.get_supported_column_types()) THEN TRUE
2558
- ELSE FALSE
2559
- END as "supported_type"
2560
- FROM table(result_scan(-1)) as t
2561
- WHERE "table_name" in ({tables_str})
2562
- );
2563
- return table(r);
2564
- end;
2565
- """)
2566
- span["sql"] = query
2567
- columns = self._exec(query)
2568
-
2569
- results = defaultdict(lambda: {"pks": [], "fks": {}, "columns": {}, "invalid_columns": {}})
2570
- if pks:
2571
- for row in pks:
2572
- results[row[3]]["pks"].append(row[4]) # type: ignore
2573
- if fks:
2574
- for row in fks:
2575
- results[row[7]]["fks"][row[8]] = row[3]
2576
- if columns:
2577
- # It seems that a SF parameter (QUOTED_IDENTIFIERS_IGNORE_CASE) can control
2578
- # whether snowflake will ignore case on `row.data_type`,
2579
- # so we have to use column indexes instead :(
2580
- for row in columns:
2581
- table_name = row[0]
2582
- column_name = row[1]
2583
- data_type = row[2]
2584
- supported_type = row[3]
2585
- # Filter out unsupported types
2586
- if supported_type:
2587
- results[table_name]["columns"][column_name] = data_type
2588
- else:
2589
- results[table_name]["invalid_columns"][column_name] = data_type
2590
- return results
2591
-
2592
- def get_cloud_provider(self) -> str:
2593
- """
2594
- Detect whether this is Snowflake on Azure, or AWS using Snowflake's CURRENT_REGION().
2595
- Returns 'azure' or 'aws'.
2596
- """
2597
- if self._session:
2598
- try:
2599
- # Query Snowflake's current region using the built-in function
2600
- result = self._session.sql("SELECT CURRENT_REGION()").collect()
2601
- if result:
2602
- region_info = result[0][0]
2603
- # Check if the region string contains the cloud provider name
2604
- if isinstance(region_info, str):
2605
- region_str = region_info.lower()
2606
- # Check for cloud providers in the region string
2607
- if 'azure' in region_str:
2608
- return 'azure'
2609
- else:
2610
- return 'aws'
2611
- except Exception:
2612
- pass
2613
-
2614
- # Fallback to AWS as default if detection fails
2615
- return 'aws'
2616
-
2617
- #--------------------------------------------------
2618
- # Snowflake Wrapper
2619
- #--------------------------------------------------
2620
-
2621
- class PrimaryKey:
2622
- pass
2623
-
2624
- class _Snowflake:
2625
- def __init__(self, model, auto_import=False):
2626
- self._model = model
2627
- self._auto_import = auto_import
2628
- if not isinstance(model._client.resources, Resources):
2629
- raise ValueError("Snowflake model must be used with a snowflake config")
2630
- self._dbs = {}
2631
- imports = model._client.resources.list_imports(model=model.name)
2632
- self._import_structure(imports)
2633
-
2634
- def _import_structure(self, imports: list[Import]):
2635
- tree = self._dbs
2636
- # pre-create existing imports
2637
- schemas = set()
2638
- for item in imports:
2639
- parser = IdentityParser(item["name"])
2640
- database_name, schema_name, table_name = parser.to_list()[:-1]
2641
- database = getattr(self, database_name)
2642
- schema = getattr(database, schema_name)
2643
- schemas.add(schema)
2644
- schema._add(table_name, is_imported=True)
2645
- return tree
2646
-
2647
- def _safe_get(self, name:str) -> 'SnowflakeDB':
2648
- name = name
2649
- if name in self._dbs:
2650
- return self._dbs[name]
2651
- self._dbs[name] = SnowflakeDB(self, name)
2652
- return self._dbs[name]
2653
-
2654
- def __getattr__(self, name: str) -> 'SnowflakeDB':
2655
- return self._safe_get(name)
2656
-
2657
-
2658
- class Snowflake(_Snowflake):
2659
- def __init__(self, model: dsl.Graph, auto_import=False):
2660
- if model._config.get_bool("use_graph_index", USE_GRAPH_INDEX):
2661
- raise SnowflakeProxySourceError()
2662
- else:
2663
- debugging.warn(SnowflakeProxyAPIDeprecationWarning())
2664
-
2665
- super().__init__(model, auto_import)
2666
-
2667
- class SnowflakeDB:
2668
- def __init__(self, parent, name):
2669
- self._name = name
2670
- self._parent = parent
2671
- self._model = parent._model
2672
- self._schemas = {}
2673
-
2674
- def _safe_get(self, name: str) -> 'SnowflakeSchema':
2675
- name = name
2676
- if name in self._schemas:
2677
- return self._schemas[name]
2678
- self._schemas[name] = SnowflakeSchema(self, name)
2679
- return self._schemas[name]
2680
-
2681
- def __getattr__(self, name: str) -> 'SnowflakeSchema':
2682
- return self._safe_get(name)
2683
-
2684
- class SnowflakeSchema:
2685
- def __init__(self, parent, name):
2686
- self._name = name
2687
- self._parent = parent
2688
- self._model = parent._model
2689
- self._tables = {}
2690
- self._imported = set()
2691
- self._table_info = defaultdict(lambda: {"pks": [], "fks": {}, "columns": {}, "invalid_columns": {}})
2692
- self._dirty = True
2693
-
2694
- def _fetch_info(self):
2695
- if not self._dirty:
2696
- return
2697
- self._table_info = self._model._client.resources.schema_info(self._parent._name, self._name, list(self._tables.keys()))
2698
-
2699
- check_column_types = self._model._config.get("check_column_types", True)
2700
-
2701
- if check_column_types:
2702
- self._check_and_confirm_invalid_columns()
2703
-
2704
- self._dirty = False
2705
-
2706
- def _check_and_confirm_invalid_columns(self):
2707
- """Check for invalid columns across the schema's tables."""
2708
- tables_with_invalid_columns = {}
2709
- for table_name, table_info in self._table_info.items():
2710
- if table_info.get("invalid_columns"):
2711
- tables_with_invalid_columns[table_name] = table_info["invalid_columns"]
2712
-
2713
- if tables_with_invalid_columns:
2714
- from relationalai.errors import UnsupportedColumnTypesWarning
2715
- UnsupportedColumnTypesWarning(tables_with_invalid_columns)
2716
-
2717
- def _add(self, name, is_imported=False):
2718
- if name in self._tables:
2719
- return self._tables[name]
2720
- self._dirty = True
2721
- if is_imported:
2722
- self._imported.add(name)
2723
- else:
2724
- self._tables[name] = SnowflakeTable(self, name)
2725
- return self._tables.get(name)
2726
-
2727
- def _safe_get(self, name: str) -> 'SnowflakeTable | None':
2728
- table = self._add(name)
2729
- return table
2730
-
2731
- def __getattr__(self, name: str) -> 'SnowflakeTable | None':
2732
- return self._safe_get(name)
2733
-
2734
-
2735
- class SnowflakeTable(dsl.Type):
2736
- def __init__(self, parent, name):
2737
- super().__init__(parent._model, f"sf_{name}")
2738
- # hack to make this work for pathfinder
2739
- self._type.parents.append(m.Builtins.PQFilterAnnotation)
2740
- self._name = name
2741
- self._model = parent._model
2742
- self._parent = parent
2743
- self._aliases = {}
2744
- self._finalzed = False
2745
- self._source = runtime_env.get_source()
2746
- relation_name = to_fqn_relation_name(self.fqname())
2747
- self._model.install_raw(f"declare {relation_name}")
2748
-
2749
- def __call__(self, *args, **kwargs):
2750
- self._lazy_init()
2751
- return super().__call__(*args, **kwargs)
2752
-
2753
- def add(self, *args, **kwargs):
2754
- self._lazy_init()
2755
- return super().add(*args, **kwargs)
2756
-
2757
- def extend(self, *args, **kwargs):
2758
- self._lazy_init()
2759
- return super().extend(*args, **kwargs)
2760
-
2761
- def known_properties(self):
2762
- self._lazy_init()
2763
- return super().known_properties()
2764
-
2765
- def _lazy_init(self):
2766
- if self._finalzed:
2767
- return
2768
-
2769
- parent = self._parent
2770
- name = self._name
2771
- use_graph_index = self._model._config.get("use_graph_index", USE_GRAPH_INDEX)
2772
-
2773
- if not use_graph_index and name not in parent._imported:
2774
- if self._parent._parent._parent._auto_import:
2775
- with Spinner(f"Creating stream for {self.fqname()}", f"Stream for {self.fqname()} created successfully"):
2776
- db_name = parent._parent._name
2777
- schema_name = parent._name
2778
- self._model._client.resources.create_import_stream(ImportSourceTable(db_name, schema_name, name), self._model.name)
2779
- print("")
2780
- parent._imported.add(name)
2781
- else:
2782
- imports = self._model._client.resources.list_imports(model=self._model.name)
2783
- for item in imports:
2784
- cur_name = item["name"].lower().split(".")[-1]
2785
- parent._imported.add(cur_name)
2786
- if name not in parent._imported:
2787
- exception = SnowflakeImportMissingException(runtime_env.get_source(), self.fqname(), self._model.name)
2788
- raise exception from None
2789
-
2790
- parent._fetch_info()
2791
- self._finalize()
2792
-
2793
- def _finalize(self):
2794
- if self._finalzed:
2795
- return
2796
-
2797
- self._finalzed = True
2798
- self._schema = self._parent._table_info[self._name]
2799
-
2800
- # Set the relation name to the sanitized version of the fully qualified name
2801
- relation_name = to_fqn_relation_name(self.fqname())
2802
-
2803
- model:dsl.Graph = self._model
2804
- edb = getattr(std.rel, relation_name)
2805
- edb._rel.parents.append(m.Builtins.EDB)
2806
- id_rel = getattr(std.rel, f"{relation_name}_pyrel_id")
2807
-
2808
- with model.rule(globalize=True, source=self._source):
2809
- id, val = dsl.create_vars(2)
2810
- edb(dsl.Symbol("METADATA$ROW_ID"), id, val)
2811
- std.rel.SHA1(id)
2812
- id_rel.add(id)
2813
-
2814
- with model.rule(dynamic=True, globalize=True, source=self._source):
2815
- prop, id, val = dsl.create_vars(3)
2816
- id_rel(id)
2817
- std.rel.SHA1(id)
2818
- self.add(snowflake_id=id)
2819
-
2820
- for prop, prop_type in self._schema["columns"].items():
2821
- _prop = prop
2822
- if _prop.startswith("_"):
2823
- _prop = "col" + prop
2824
-
2825
- prop_ident = sanitize_identifier(_prop.lower())
2826
-
2827
- with model.rule(dynamic=True, globalize=True, source=self._source):
2828
- id, val = dsl.create_vars(2)
2829
- edb(dsl.Symbol(prop), id, val)
2830
- std.rel.SHA1(id)
2831
- _prop = getattr(self, prop_ident)
2832
- if not _prop:
2833
- raise ValueError(f"Property {_prop} couldn't be accessed on {self.fqname()}")
2834
- if _prop.is_multi_valued:
2835
- inst = self(snowflake_id=id)
2836
- getattr(inst, prop_ident).add(val)
2837
- else:
2838
- self(snowflake_id=id).set(**{prop_ident: val})
2839
-
2840
- # Because we're bypassing a bunch of the normal Type.add machinery here,
2841
- # we need to manually account for the case where people are using value types.
2842
- def wrapped(x):
2843
- if not model._config.get("compiler.use_value_types", False):
2844
- return x
2845
- other_id = dsl.create_var()
2846
- model._action(dsl.build.construct(self._type, [x, other_id]))
2847
- return other_id
2848
-
2849
- # new UInt128 schema mapping rules
2850
- with model.rule(dynamic=True, globalize=True, source=self._source):
2851
- id = dsl.create_var()
2852
- # This will generate an arity mismatch warning when used with the old SHA-1 Data Streams.
2853
- # Ideally we have the `@no_diagnostics(:ARITY_MISMATCH)` attribute on the relation using
2854
- # the METADATA$KEY column but that ended up being a more involved change then expected
2855
- # for avoiding a non-blocking warning
2856
- edb(dsl.Symbol("METADATA$KEY"), id)
2857
- std.rel.UInt128(id)
2858
- self.add(wrapped(id), snowflake_id=id)
2859
-
2860
- for prop, prop_type in self._schema["columns"].items():
2861
- _prop = prop
2862
- if _prop.startswith("_"):
2863
- _prop = "col" + prop
2864
-
2865
- prop_ident = sanitize_identifier(_prop.lower())
2866
- with model.rule(dynamic=True, globalize=True, source=self._source):
2867
- id, val = dsl.create_vars(2)
2868
- edb(dsl.Symbol(prop), id, val)
2869
- std.rel.UInt128(id)
2870
- _prop = getattr(self, prop_ident)
2871
- if not _prop:
2872
- raise ValueError(f"Property {_prop} couldn't be accessed on {self.fqname()}")
2873
- if _prop.is_multi_valued:
2874
- inst = self(id)
2875
- getattr(inst, prop_ident).add(val)
2876
- else:
2877
- model._check_property(_prop._prop)
2878
- raw_relation = getattr(std.rel, prop_ident)
2879
- dsl.tag(raw_relation, dsl.Builtins.FunctionAnnotation)
2880
- raw_relation.add(wrapped(id), val)
2881
-
2882
- def namespace(self):
2883
- return f"{self._parent._parent._name}.{self._parent._name}"
2884
-
2885
- def fqname(self):
2886
- return f"{self.namespace()}.{self._name}"
2887
-
2888
- def describe(self, **kwargs):
2889
- model = self._model
2890
- for k, v in kwargs.items():
2891
- if v is PrimaryKey:
2892
- self._schema["pks"] = [k]
2893
- elif isinstance(v, tuple):
2894
- (table, name) = v
2895
- if isinstance(table, SnowflakeTable):
2896
- fk_table = table
2897
- pk = fk_table._schema["pks"]
2898
- with model.rule():
2899
- inst = fk_table()
2900
- me = self()
2901
- getattr(inst, pk[0]) == getattr(me, k)
2902
- if getattr(self, name).is_multi_valued:
2903
- getattr(me, name).add(inst)
2904
- else:
2905
- me.set(**{name: inst})
2906
- else:
2907
- raise ValueError(f"Invalid foreign key {v}")
2908
- else:
2909
- raise ValueError(f"Invalid column {k}={v}")
2910
- return self
2911
-
2912
- class Provider(ProviderBase):
2913
- def __init__(
2914
- self,
2915
- profile: str | None = None,
2916
- config: Config | None = None,
2917
- resources: Resources | None = None,
2918
- generation: Generation | None = None,
2919
- ):
2920
- if resources:
2921
- self.resources = resources
2922
- else:
2923
- from .resources_factory import create_resources_instance
2924
- self.resources = create_resources_instance(
2925
- config=config,
2926
- profile=profile,
2927
- generation=generation or Generation.V0,
2928
- dry_run=False,
2929
- language="rel",
2930
- )
2931
-
2932
- def list_streams(self, model:str):
2933
- return self.resources.list_imports(model=model)
2934
-
2935
- def create_streams(self, sources:List[str], model:str, force=False):
2936
- if not self.resources.get_graph(model):
2937
- self.resources.create_graph(model)
2938
- def parse_source(raw:str):
2939
- parser = IdentityParser(raw)
2940
- assert parser.is_complete, "Snowflake table imports must be in `database.schema.table` format"
2941
- return ImportSourceTable(*parser.to_list())
2942
- for source in sources:
2943
- source_table = parse_source(source)
2944
- try:
2945
- with Spinner(f"Creating stream for {source_table.name}", f"Stream for {source_table.name} created successfully"):
2946
- if force:
2947
- self.resources.delete_import(source_table.name, model, True)
2948
- self.resources.create_import_stream(source_table, model)
2949
- except Exception as e:
2950
- if "stream already exists" in f"{e}":
2951
- raise Exception(f"\n\nStream'{source_table.name.upper()}' already exists.")
2952
- elif "engine not found" in f"{e}":
2953
- raise Exception("\n\nNo engines found in a READY state. Please use `engines:create` to create an engine that will be used to initialize the target relation.")
2954
- else:
2955
- raise e
2956
- with Spinner("Waiting for imports to complete", "Imports complete"):
2957
- self.resources.poll_imports(sources, model)
2958
-
2959
- def delete_stream(self, stream_id: str, model: str):
2960
- return self.resources.delete_import(stream_id, model)
2961
-
2962
- def sql(self, query:str, params:List[Any]=[], format:Literal["list", "pandas", "polars", "lazy"]="list"):
2963
- # note: default format cannot be pandas because .to_pandas() only works on SELECT queries
2964
- result = self.resources._exec(query, params, raw=True, help=False)
2965
- if format == "lazy":
2966
- return cast(snowflake.snowpark.DataFrame, result)
2967
- elif format == "list":
2968
- return cast(list, result.collect())
2969
- elif format == "pandas":
2970
- import pandas as pd
2971
- try:
2972
- # use to_pandas for SELECT queries
2973
- return cast(pd.DataFrame, result.to_pandas())
2974
- except Exception:
2975
- # handle non-SELECT queries like SHOW
2976
- return pd.DataFrame(result.collect())
2977
- elif format == "polars":
2978
- import polars as pl # type: ignore
2979
- return pl.DataFrame(
2980
- [row.as_dict() for row in result.collect()],
2981
- orient="row",
2982
- strict=False,
2983
- infer_schema_length=None
2984
- )
2985
- else:
2986
- raise ValueError(f"Invalid format {format}. Should be one of 'list', 'pandas', 'polars', 'lazy'")
2987
-
2988
- def activate(self):
2989
- with Spinner("Activating RelationalAI app...", "RelationalAI app activated"):
2990
- self.sql("CALL RELATIONALAI.APP.ACTIVATE();")
2991
-
2992
- def deactivate(self):
2993
- with Spinner("Deactivating RelationalAI app...", "RelationalAI app deactivated"):
2994
- self.sql("CALL RELATIONALAI.APP.DEACTIVATE();")
2995
-
2996
- def drop_service(self):
2997
- warnings.warn(
2998
- "The drop_service method has been deprecated in favor of deactivate",
2999
- DeprecationWarning,
3000
- stacklevel=2,
3001
- )
3002
- self.deactivate()
3003
-
3004
- def resume_service(self):
3005
- warnings.warn(
3006
- "The resume_service method has been deprecated in favor of activate",
3007
- DeprecationWarning,
3008
- stacklevel=2,
3009
- )
3010
- self.activate()
3011
-
3012
-
3013
- #--------------------------------------------------
3014
- # SnowflakeClient
3015
- #--------------------------------------------------
3016
- class SnowflakeClient(Client):
3017
- def create_database(self, isolated=True, nowait_durable=True, headers: Dict | None = None):
3018
- from relationalai.tools.cli_helpers import validate_engine_name
3019
-
3020
- assert isinstance(self.resources, Resources)
3021
-
3022
- if self.last_database_version == len(self.resources.sources):
3023
- return
3024
-
3025
- model = self._source_database
3026
- app_name = self.resources.get_app_name()
3027
- engine_name = self.resources.get_default_engine_name()
3028
- engine_size = self.resources.config.get_default_engine_size()
3029
-
3030
- # Validate engine name
3031
- is_name_valid, _ = validate_engine_name(engine_name)
3032
- if not is_name_valid:
3033
- raise EngineNameValidationException(engine_name)
3034
-
3035
- # Validate engine size
3036
- valid_sizes = self.resources.get_engine_sizes()
3037
- if not isinstance(engine_size, str) or engine_size not in valid_sizes:
3038
- raise InvalidEngineSizeError(str(engine_size), valid_sizes)
3039
-
3040
- program_span_id = debugging.get_program_span_id()
3041
-
3042
- query_attrs_dict = json.loads(headers.get("X-Query-Attributes", "{}")) if headers else {}
3043
- with debugging.span("poll_use_index", sources=self.resources.sources, model=model, engine=engine_name, **query_attrs_dict):
3044
- self.maybe_poll_use_index(
3045
- app_name=app_name,
3046
- sources=self.resources.sources,
3047
- model=model,
3048
- engine_name=engine_name,
3049
- engine_size=engine_size,
3050
- program_span_id=program_span_id,
3051
- headers=headers
3052
- )
3053
-
3054
- self.last_database_version = len(self.resources.sources)
3055
- self._manage_packages()
3056
-
3057
- if isolated and not self.keep_model:
3058
- atexit.register(self.delete_database)
3059
-
3060
- def maybe_poll_use_index(
3061
- self,
3062
- app_name: str,
3063
- sources: Iterable[str],
3064
- model: str,
3065
- engine_name: str,
3066
- engine_size: str | None = None,
3067
- program_span_id: str | None = None,
3068
- headers: Dict | None = None,
3069
- ):
3070
- """Only call _poll_use_index if there are sources to process."""
3071
- assert isinstance(self.resources, Resources)
3072
- return self.resources.maybe_poll_use_index(
3073
- app_name=app_name,
3074
- sources=sources,
3075
- model=model,
3076
- engine_name=engine_name,
3077
- engine_size=engine_size,
3078
- program_span_id=program_span_id,
3079
- headers=headers
3080
- )
3081
-
3082
-
3083
- #--------------------------------------------------
3084
- # Graph
3085
- #--------------------------------------------------
3086
-
3087
- def Graph(
3088
- name,
3089
- *,
3090
- profile: str | None = None,
3091
- config: Config,
3092
- dry_run: bool = False,
3093
- isolated: bool = True,
3094
- connection: Session | None = None,
3095
- keep_model: bool = False,
3096
- nowait_durable: bool = True,
3097
- format: str = "default",
3098
- ):
3099
- from .resources_factory import create_resources_instance
3100
- from .use_index_resources import UseIndexResources
3101
-
3102
- use_graph_index = config.get("use_graph_index", USE_GRAPH_INDEX)
3103
- use_monotype_operators = config.get("compiler.use_monotype_operators", False)
3104
-
3105
- # Create resources instance using factory
3106
- resources = create_resources_instance(
3107
- config=config,
3108
- profile=profile,
3109
- connection=connection,
3110
- generation=Generation.V0,
3111
- dry_run=False, # Resources instance dry_run is separate from client dry_run
3112
- language="rel",
3113
- )
3114
-
3115
- # Determine client class based on resources type and config
3116
- # SnowflakeClient is used for resources that support use_index functionality
3117
- if use_graph_index or isinstance(resources, UseIndexResources):
3118
- client_class = SnowflakeClient
3119
- else:
3120
- client_class = Client
3121
-
3122
- client = client_class(
3123
- resources,
3124
- rel.Compiler(config),
3125
- name,
3126
- config,
3127
- dry_run=dry_run,
3128
- isolated=isolated,
3129
- keep_model=keep_model,
3130
- nowait_durable=nowait_durable
3131
- )
3132
- base_rel = """
3133
- @inline
3134
- def make_identity(x..., z):
3135
- rel_primitive_hash_tuple_uint128(x..., z)
3136
-
3137
- @inline
3138
- def pyrel_default({F}, c, k..., v):
3139
- F(k..., v) or (not F(k..., _) and v = c)
3140
-
3141
- @inline
3142
- def pyrel_unwrap(x in UInt128, y): y = x
3143
-
3144
- @inline
3145
- def pyrel_dates_period_days(x in Date, y in Date, z in Int):
3146
- exists((u) | dates_period_days(x, y , u) and u = ::std::common::^Day[z])
3147
-
3148
- @inline
3149
- def pyrel_datetimes_period_milliseconds(x in DateTime, y in DateTime, z in Int):
3150
- exists((u) | datetimes_period_milliseconds(x, y , u) and u = ^Millisecond[z])
3151
-
3152
- @inline
3153
- def pyrel_bool_filter(a, b, {F}, z): { z = if_then_else[F(a, b), boolean_true, boolean_false] }
3154
-
3155
- @inline
3156
- def pyrel_strftime(v, fmt, tz in String, s in String):
3157
- (Date(v) and s = format_date[v, fmt])
3158
- or (DateTime(v) and s = format_datetime[v, fmt, tz])
3159
-
3160
- @inline
3161
- def pyrel_regex_match_all(pattern, string in String, pos in Int, offset in Int, match in String):
3162
- regex_match_all(pattern, string, offset, match) and offset >= pos
3163
-
3164
- @inline
3165
- def pyrel_regex_match(pattern, string in String, pos in Int, offset in Int, match in String):
3166
- pyrel_regex_match_all(pattern, string, pos, offset, match) and offset = pos
3167
-
3168
- @inline
3169
- def pyrel_regex_search(pattern, string in String, pos in Int, offset in Int, match in String):
3170
- enumerate(pyrel_regex_match_all[pattern, string, pos], 1, offset, match)
3171
-
3172
- @inline
3173
- def pyrel_regex_sub(pattern, repl in String, string in String, result in String):
3174
- string_replace_multiple(string, {(last[regex_match_all[pattern, string]], repl)}, result)
3175
-
3176
- @inline
3177
- def pyrel_capture_group(regex in Pattern, string in String, pos in Int, index, match in String):
3178
- (Integer(index) and capture_group_by_index(regex, string, pos, index, match)) or
3179
- (String(index) and capture_group_by_name(regex, string, pos, index, match))
3180
-
3181
- declare __resource
3182
- declare __compiled_patterns
3183
- """
3184
- if use_monotype_operators:
3185
- base_rel += """
3186
-
3187
- // use monotyped operators
3188
- from ::std::monotype import +, -, *, /, <, <=, >, >=
3189
- """
3190
- pyrel_base = dsl.build.raw_task(base_rel)
3191
- debugging.set_source(pyrel_base)
3192
- client.install("pyrel_base", pyrel_base)
3193
- return dsl.Graph(client, name, format=format)