relationalai 0.13.0__py3-none-any.whl → 0.13.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (836) hide show
  1. relationalai/__init__.py +1 -256
  2. relationalai/config/__init__.py +56 -0
  3. relationalai/config/config.py +289 -0
  4. relationalai/config/config_fields.py +86 -0
  5. relationalai/config/connections/__init__.py +46 -0
  6. relationalai/config/connections/base.py +23 -0
  7. relationalai/config/connections/duckdb.py +29 -0
  8. relationalai/config/connections/snowflake.py +243 -0
  9. relationalai/config/external/__init__.py +17 -0
  10. relationalai/config/external/dbt_converter.py +101 -0
  11. relationalai/config/external/dbt_models.py +93 -0
  12. relationalai/config/external/snowflake_converter.py +41 -0
  13. relationalai/config/external/snowflake_models.py +85 -0
  14. relationalai/config/external/utils.py +19 -0
  15. relationalai/semantics/__init__.py +146 -22
  16. relationalai/semantics/backends/lqp/annotations.py +11 -0
  17. relationalai/semantics/backends/sql/sql_compiler.py +327 -0
  18. relationalai/semantics/frontend/base.py +1707 -0
  19. relationalai/semantics/frontend/core.py +179 -0
  20. relationalai/semantics/frontend/front_compiler.py +1313 -0
  21. relationalai/semantics/frontend/pprint.py +408 -0
  22. relationalai/semantics/metamodel/__init__.py +6 -40
  23. relationalai/semantics/metamodel/builtins.py +205 -771
  24. relationalai/semantics/metamodel/metamodel.py +437 -0
  25. relationalai/semantics/metamodel/metamodel_analyzer.py +519 -0
  26. relationalai/semantics/metamodel/pprint.py +412 -0
  27. relationalai/semantics/metamodel/rewriter.py +266 -0
  28. relationalai/semantics/metamodel/typer.py +1378 -0
  29. relationalai/semantics/std/__init__.py +60 -40
  30. relationalai/semantics/std/aggregates.py +149 -0
  31. relationalai/semantics/std/common.py +44 -0
  32. relationalai/semantics/std/constraints.py +37 -43
  33. relationalai/semantics/std/datetime.py +246 -135
  34. relationalai/semantics/std/decimals.py +45 -52
  35. relationalai/semantics/std/floats.py +13 -5
  36. relationalai/semantics/std/integers.py +26 -11
  37. relationalai/semantics/std/math.py +183 -112
  38. relationalai/semantics/std/numbers.py +86 -0
  39. relationalai/semantics/std/re.py +80 -62
  40. relationalai/semantics/std/strings.py +117 -60
  41. relationalai/shims/executor.py +147 -0
  42. relationalai/shims/helpers.py +126 -0
  43. relationalai/shims/hoister.py +221 -0
  44. relationalai/shims/mm2v0.py +1290 -0
  45. relationalai/tools/cli/__init__.py +6 -0
  46. relationalai/tools/cli/cli.py +90 -0
  47. relationalai/tools/cli/components/__init__.py +5 -0
  48. relationalai/tools/cli/components/progress_reader.py +1524 -0
  49. relationalai/tools/cli/components/utils.py +58 -0
  50. relationalai/tools/cli/config_template.py +45 -0
  51. relationalai/tools/cli/dev.py +19 -0
  52. relationalai/tools/debugger.py +289 -183
  53. relationalai/tools/typer_debugger.py +93 -0
  54. relationalai/util/dataclasses.py +43 -0
  55. relationalai/util/docutils.py +40 -0
  56. relationalai/util/error.py +199 -0
  57. relationalai/util/format.py +48 -106
  58. relationalai/util/naming.py +145 -0
  59. relationalai/util/python.py +35 -0
  60. relationalai/util/runtime.py +156 -0
  61. relationalai/util/schema.py +197 -0
  62. relationalai/util/source.py +185 -0
  63. relationalai/util/structures.py +163 -0
  64. relationalai/util/tracing.py +261 -0
  65. relationalai-0.13.0.dev0.dist-info/METADATA +46 -0
  66. relationalai-0.13.0.dev0.dist-info/RECORD +488 -0
  67. relationalai-0.13.0.dev0.dist-info/WHEEL +5 -0
  68. relationalai-0.13.0.dev0.dist-info/entry_points.txt +3 -0
  69. relationalai-0.13.0.dev0.dist-info/top_level.txt +2 -0
  70. v0/relationalai/__init__.py +216 -0
  71. v0/relationalai/clients/__init__.py +5 -0
  72. v0/relationalai/clients/azure.py +477 -0
  73. v0/relationalai/clients/client.py +912 -0
  74. v0/relationalai/clients/config.py +673 -0
  75. v0/relationalai/clients/direct_access_client.py +118 -0
  76. v0/relationalai/clients/hash_util.py +31 -0
  77. v0/relationalai/clients/local.py +571 -0
  78. v0/relationalai/clients/profile_polling.py +73 -0
  79. v0/relationalai/clients/result_helpers.py +420 -0
  80. v0/relationalai/clients/snowflake.py +3869 -0
  81. v0/relationalai/clients/types.py +113 -0
  82. v0/relationalai/clients/use_index_poller.py +980 -0
  83. v0/relationalai/clients/util.py +356 -0
  84. v0/relationalai/debugging.py +389 -0
  85. v0/relationalai/dsl.py +1749 -0
  86. v0/relationalai/early_access/builder/__init__.py +30 -0
  87. v0/relationalai/early_access/builder/builder/__init__.py +35 -0
  88. v0/relationalai/early_access/builder/snowflake/__init__.py +12 -0
  89. v0/relationalai/early_access/builder/std/__init__.py +25 -0
  90. v0/relationalai/early_access/builder/std/decimals/__init__.py +12 -0
  91. v0/relationalai/early_access/builder/std/integers/__init__.py +12 -0
  92. v0/relationalai/early_access/builder/std/math/__init__.py +12 -0
  93. v0/relationalai/early_access/builder/std/strings/__init__.py +14 -0
  94. v0/relationalai/early_access/devtools/__init__.py +12 -0
  95. v0/relationalai/early_access/devtools/benchmark_lqp/__init__.py +12 -0
  96. v0/relationalai/early_access/devtools/extract_lqp/__init__.py +12 -0
  97. v0/relationalai/early_access/dsl/adapters/orm/adapter_qb.py +427 -0
  98. v0/relationalai/early_access/dsl/adapters/orm/parser.py +636 -0
  99. v0/relationalai/early_access/dsl/adapters/owl/adapter.py +176 -0
  100. v0/relationalai/early_access/dsl/adapters/owl/parser.py +160 -0
  101. v0/relationalai/early_access/dsl/bindings/common.py +402 -0
  102. v0/relationalai/early_access/dsl/bindings/csv.py +170 -0
  103. v0/relationalai/early_access/dsl/bindings/legacy/binding_models.py +143 -0
  104. v0/relationalai/early_access/dsl/bindings/snowflake.py +64 -0
  105. v0/relationalai/early_access/dsl/codegen/binder.py +411 -0
  106. v0/relationalai/early_access/dsl/codegen/common.py +79 -0
  107. v0/relationalai/early_access/dsl/codegen/helpers.py +23 -0
  108. v0/relationalai/early_access/dsl/codegen/relations.py +700 -0
  109. v0/relationalai/early_access/dsl/codegen/weaver.py +417 -0
  110. v0/relationalai/early_access/dsl/core/builders/__init__.py +47 -0
  111. v0/relationalai/early_access/dsl/core/builders/logic.py +19 -0
  112. v0/relationalai/early_access/dsl/core/builders/scalar_constraint.py +11 -0
  113. v0/relationalai/early_access/dsl/core/constraints/predicate/atomic.py +455 -0
  114. v0/relationalai/early_access/dsl/core/constraints/predicate/universal.py +73 -0
  115. v0/relationalai/early_access/dsl/core/constraints/scalar.py +310 -0
  116. v0/relationalai/early_access/dsl/core/context.py +13 -0
  117. v0/relationalai/early_access/dsl/core/cset.py +132 -0
  118. v0/relationalai/early_access/dsl/core/exprs/__init__.py +116 -0
  119. v0/relationalai/early_access/dsl/core/exprs/relational.py +18 -0
  120. v0/relationalai/early_access/dsl/core/exprs/scalar.py +412 -0
  121. v0/relationalai/early_access/dsl/core/instances.py +44 -0
  122. v0/relationalai/early_access/dsl/core/logic/__init__.py +193 -0
  123. v0/relationalai/early_access/dsl/core/logic/aggregation.py +98 -0
  124. v0/relationalai/early_access/dsl/core/logic/exists.py +223 -0
  125. v0/relationalai/early_access/dsl/core/logic/helper.py +163 -0
  126. v0/relationalai/early_access/dsl/core/namespaces.py +32 -0
  127. v0/relationalai/early_access/dsl/core/relations.py +276 -0
  128. v0/relationalai/early_access/dsl/core/rules.py +112 -0
  129. v0/relationalai/early_access/dsl/core/std/__init__.py +45 -0
  130. v0/relationalai/early_access/dsl/core/temporal/recall.py +6 -0
  131. v0/relationalai/early_access/dsl/core/types/__init__.py +270 -0
  132. v0/relationalai/early_access/dsl/core/types/concepts.py +128 -0
  133. v0/relationalai/early_access/dsl/core/types/constrained/__init__.py +267 -0
  134. v0/relationalai/early_access/dsl/core/types/constrained/nominal.py +143 -0
  135. v0/relationalai/early_access/dsl/core/types/constrained/subtype.py +124 -0
  136. v0/relationalai/early_access/dsl/core/types/standard.py +92 -0
  137. v0/relationalai/early_access/dsl/core/types/unconstrained.py +50 -0
  138. v0/relationalai/early_access/dsl/core/types/variables.py +203 -0
  139. v0/relationalai/early_access/dsl/ir/compiler.py +318 -0
  140. v0/relationalai/early_access/dsl/ir/executor.py +260 -0
  141. v0/relationalai/early_access/dsl/ontologies/constraints.py +88 -0
  142. v0/relationalai/early_access/dsl/ontologies/export.py +30 -0
  143. v0/relationalai/early_access/dsl/ontologies/models.py +453 -0
  144. v0/relationalai/early_access/dsl/ontologies/python_printer.py +303 -0
  145. v0/relationalai/early_access/dsl/ontologies/readings.py +60 -0
  146. v0/relationalai/early_access/dsl/ontologies/relationships.py +322 -0
  147. v0/relationalai/early_access/dsl/ontologies/roles.py +87 -0
  148. v0/relationalai/early_access/dsl/ontologies/subtyping.py +55 -0
  149. v0/relationalai/early_access/dsl/orm/constraints.py +438 -0
  150. v0/relationalai/early_access/dsl/orm/measures/dimensions.py +200 -0
  151. v0/relationalai/early_access/dsl/orm/measures/initializer.py +16 -0
  152. v0/relationalai/early_access/dsl/orm/measures/measure_rules.py +275 -0
  153. v0/relationalai/early_access/dsl/orm/measures/measures.py +299 -0
  154. v0/relationalai/early_access/dsl/orm/measures/role_exprs.py +268 -0
  155. v0/relationalai/early_access/dsl/orm/models.py +256 -0
  156. v0/relationalai/early_access/dsl/orm/object_oriented_printer.py +344 -0
  157. v0/relationalai/early_access/dsl/orm/printer.py +469 -0
  158. v0/relationalai/early_access/dsl/orm/reasoners.py +480 -0
  159. v0/relationalai/early_access/dsl/orm/relations.py +19 -0
  160. v0/relationalai/early_access/dsl/orm/relationships.py +251 -0
  161. v0/relationalai/early_access/dsl/orm/types.py +42 -0
  162. v0/relationalai/early_access/dsl/orm/utils.py +79 -0
  163. v0/relationalai/early_access/dsl/orm/verb.py +204 -0
  164. v0/relationalai/early_access/dsl/physical_metadata/tables.py +133 -0
  165. v0/relationalai/early_access/dsl/relations.py +170 -0
  166. v0/relationalai/early_access/dsl/rulesets.py +69 -0
  167. v0/relationalai/early_access/dsl/schemas/__init__.py +450 -0
  168. v0/relationalai/early_access/dsl/schemas/builder.py +48 -0
  169. v0/relationalai/early_access/dsl/schemas/comp_names.py +51 -0
  170. v0/relationalai/early_access/dsl/schemas/components.py +203 -0
  171. v0/relationalai/early_access/dsl/schemas/contexts.py +156 -0
  172. v0/relationalai/early_access/dsl/schemas/exprs.py +89 -0
  173. v0/relationalai/early_access/dsl/schemas/fragments.py +464 -0
  174. v0/relationalai/early_access/dsl/serialization.py +79 -0
  175. v0/relationalai/early_access/dsl/serialize/exporter.py +163 -0
  176. v0/relationalai/early_access/dsl/snow/api.py +104 -0
  177. v0/relationalai/early_access/dsl/snow/common.py +76 -0
  178. v0/relationalai/early_access/dsl/state_mgmt/__init__.py +129 -0
  179. v0/relationalai/early_access/dsl/state_mgmt/state_charts.py +125 -0
  180. v0/relationalai/early_access/dsl/state_mgmt/transitions.py +130 -0
  181. v0/relationalai/early_access/dsl/types/__init__.py +40 -0
  182. v0/relationalai/early_access/dsl/types/concepts.py +12 -0
  183. v0/relationalai/early_access/dsl/types/entities.py +135 -0
  184. v0/relationalai/early_access/dsl/types/values.py +17 -0
  185. v0/relationalai/early_access/dsl/utils.py +102 -0
  186. v0/relationalai/early_access/graphs/__init__.py +13 -0
  187. v0/relationalai/early_access/lqp/__init__.py +12 -0
  188. v0/relationalai/early_access/lqp/compiler/__init__.py +12 -0
  189. v0/relationalai/early_access/lqp/constructors/__init__.py +18 -0
  190. v0/relationalai/early_access/lqp/executor/__init__.py +12 -0
  191. v0/relationalai/early_access/lqp/ir/__init__.py +12 -0
  192. v0/relationalai/early_access/lqp/passes/__init__.py +12 -0
  193. v0/relationalai/early_access/lqp/pragmas/__init__.py +12 -0
  194. v0/relationalai/early_access/lqp/primitives/__init__.py +12 -0
  195. v0/relationalai/early_access/lqp/types/__init__.py +12 -0
  196. v0/relationalai/early_access/lqp/utils/__init__.py +12 -0
  197. v0/relationalai/early_access/lqp/validators/__init__.py +12 -0
  198. v0/relationalai/early_access/metamodel/__init__.py +58 -0
  199. v0/relationalai/early_access/metamodel/builtins/__init__.py +12 -0
  200. v0/relationalai/early_access/metamodel/compiler/__init__.py +12 -0
  201. v0/relationalai/early_access/metamodel/dependency/__init__.py +12 -0
  202. v0/relationalai/early_access/metamodel/factory/__init__.py +17 -0
  203. v0/relationalai/early_access/metamodel/helpers/__init__.py +12 -0
  204. v0/relationalai/early_access/metamodel/ir/__init__.py +14 -0
  205. v0/relationalai/early_access/metamodel/rewrite/__init__.py +7 -0
  206. v0/relationalai/early_access/metamodel/typer/__init__.py +3 -0
  207. v0/relationalai/early_access/metamodel/typer/typer/__init__.py +12 -0
  208. v0/relationalai/early_access/metamodel/types/__init__.py +15 -0
  209. v0/relationalai/early_access/metamodel/util/__init__.py +15 -0
  210. v0/relationalai/early_access/metamodel/visitor/__init__.py +12 -0
  211. v0/relationalai/early_access/rel/__init__.py +12 -0
  212. v0/relationalai/early_access/rel/executor/__init__.py +12 -0
  213. v0/relationalai/early_access/rel/rel_utils/__init__.py +12 -0
  214. v0/relationalai/early_access/rel/rewrite/__init__.py +7 -0
  215. v0/relationalai/early_access/solvers/__init__.py +19 -0
  216. v0/relationalai/early_access/sql/__init__.py +11 -0
  217. v0/relationalai/early_access/sql/executor/__init__.py +3 -0
  218. v0/relationalai/early_access/sql/rewrite/__init__.py +3 -0
  219. v0/relationalai/early_access/tests/logging/__init__.py +12 -0
  220. v0/relationalai/early_access/tests/test_snapshot_base/__init__.py +12 -0
  221. v0/relationalai/early_access/tests/utils/__init__.py +12 -0
  222. v0/relationalai/environments/__init__.py +35 -0
  223. v0/relationalai/environments/base.py +381 -0
  224. v0/relationalai/environments/colab.py +14 -0
  225. v0/relationalai/environments/generic.py +71 -0
  226. v0/relationalai/environments/ipython.py +68 -0
  227. v0/relationalai/environments/jupyter.py +9 -0
  228. v0/relationalai/environments/snowbook.py +169 -0
  229. v0/relationalai/errors.py +2455 -0
  230. v0/relationalai/experimental/SF.py +38 -0
  231. v0/relationalai/experimental/inspect.py +47 -0
  232. v0/relationalai/experimental/pathfinder/__init__.py +158 -0
  233. v0/relationalai/experimental/pathfinder/api.py +160 -0
  234. v0/relationalai/experimental/pathfinder/automaton.py +584 -0
  235. v0/relationalai/experimental/pathfinder/bridge.py +226 -0
  236. v0/relationalai/experimental/pathfinder/compiler.py +416 -0
  237. v0/relationalai/experimental/pathfinder/datalog.py +214 -0
  238. v0/relationalai/experimental/pathfinder/diagnostics.py +56 -0
  239. v0/relationalai/experimental/pathfinder/filter.py +236 -0
  240. v0/relationalai/experimental/pathfinder/glushkov.py +439 -0
  241. v0/relationalai/experimental/pathfinder/options.py +265 -0
  242. v0/relationalai/experimental/pathfinder/rpq.py +344 -0
  243. v0/relationalai/experimental/pathfinder/transition.py +200 -0
  244. v0/relationalai/experimental/pathfinder/utils.py +26 -0
  245. v0/relationalai/experimental/paths/api.py +143 -0
  246. v0/relationalai/experimental/paths/benchmarks/grid_graph.py +37 -0
  247. v0/relationalai/experimental/paths/examples/basic_example.py +40 -0
  248. v0/relationalai/experimental/paths/examples/minimal_engine_warmup.py +3 -0
  249. v0/relationalai/experimental/paths/examples/movie_example.py +77 -0
  250. v0/relationalai/experimental/paths/examples/paths_benchmark.py +115 -0
  251. v0/relationalai/experimental/paths/examples/paths_example.py +116 -0
  252. v0/relationalai/experimental/paths/examples/pattern_to_automaton.py +28 -0
  253. v0/relationalai/experimental/paths/find_paths_via_automaton.py +85 -0
  254. v0/relationalai/experimental/paths/graph.py +185 -0
  255. v0/relationalai/experimental/paths/path_algorithms/find_paths.py +280 -0
  256. v0/relationalai/experimental/paths/path_algorithms/one_sided_ball_repetition.py +26 -0
  257. v0/relationalai/experimental/paths/path_algorithms/one_sided_ball_upto.py +111 -0
  258. v0/relationalai/experimental/paths/path_algorithms/single.py +59 -0
  259. v0/relationalai/experimental/paths/path_algorithms/two_sided_balls_repetition.py +39 -0
  260. v0/relationalai/experimental/paths/path_algorithms/two_sided_balls_upto.py +103 -0
  261. v0/relationalai/experimental/paths/path_algorithms/usp-old.py +130 -0
  262. v0/relationalai/experimental/paths/path_algorithms/usp-tuple.py +183 -0
  263. v0/relationalai/experimental/paths/path_algorithms/usp.py +150 -0
  264. v0/relationalai/experimental/paths/product_graph.py +93 -0
  265. v0/relationalai/experimental/paths/rpq/automaton.py +584 -0
  266. v0/relationalai/experimental/paths/rpq/diagnostics.py +56 -0
  267. v0/relationalai/experimental/paths/rpq/rpq.py +378 -0
  268. v0/relationalai/experimental/paths/tests/tests_limit_sp_max_length.py +90 -0
  269. v0/relationalai/experimental/paths/tests/tests_limit_sp_multiple.py +119 -0
  270. v0/relationalai/experimental/paths/tests/tests_limit_sp_single.py +104 -0
  271. v0/relationalai/experimental/paths/tests/tests_limit_walks_multiple.py +113 -0
  272. v0/relationalai/experimental/paths/tests/tests_limit_walks_single.py +149 -0
  273. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_multiple.py +70 -0
  274. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_single.py +64 -0
  275. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_upto_multiple.py +115 -0
  276. v0/relationalai/experimental/paths/tests/tests_one_sided_ball_upto_single.py +75 -0
  277. v0/relationalai/experimental/paths/tests/tests_single_paths.py +152 -0
  278. v0/relationalai/experimental/paths/tests/tests_single_walks.py +208 -0
  279. v0/relationalai/experimental/paths/tests/tests_single_walks_undirected.py +297 -0
  280. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_multiple.py +107 -0
  281. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_single.py +76 -0
  282. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_upto_multiple.py +76 -0
  283. v0/relationalai/experimental/paths/tests/tests_two_sided_balls_upto_single.py +110 -0
  284. v0/relationalai/experimental/paths/tests/tests_usp_nsp_multiple.py +229 -0
  285. v0/relationalai/experimental/paths/tests/tests_usp_nsp_single.py +108 -0
  286. v0/relationalai/experimental/paths/tree_agg.py +168 -0
  287. v0/relationalai/experimental/paths/utilities/iterators.py +27 -0
  288. v0/relationalai/experimental/paths/utilities/prefix_sum.py +91 -0
  289. v0/relationalai/experimental/solvers.py +1087 -0
  290. v0/relationalai/loaders/__init__.py +0 -0
  291. v0/relationalai/loaders/csv.py +195 -0
  292. v0/relationalai/loaders/loader.py +177 -0
  293. v0/relationalai/loaders/types.py +23 -0
  294. v0/relationalai/rel_emitter.py +373 -0
  295. v0/relationalai/rel_utils.py +185 -0
  296. v0/relationalai/semantics/__init__.py +29 -0
  297. v0/relationalai/semantics/devtools/benchmark_lqp.py +536 -0
  298. v0/relationalai/semantics/devtools/compilation_manager.py +294 -0
  299. v0/relationalai/semantics/devtools/extract_lqp.py +110 -0
  300. v0/relationalai/semantics/internal/internal.py +3785 -0
  301. v0/relationalai/semantics/internal/snowflake.py +324 -0
  302. v0/relationalai/semantics/lqp/builtins.py +16 -0
  303. v0/relationalai/semantics/lqp/compiler.py +22 -0
  304. v0/relationalai/semantics/lqp/constructors.py +68 -0
  305. v0/relationalai/semantics/lqp/executor.py +469 -0
  306. v0/relationalai/semantics/lqp/intrinsics.py +24 -0
  307. v0/relationalai/semantics/lqp/model2lqp.py +839 -0
  308. v0/relationalai/semantics/lqp/passes.py +680 -0
  309. v0/relationalai/semantics/lqp/primitives.py +252 -0
  310. v0/relationalai/semantics/lqp/result_helpers.py +202 -0
  311. v0/relationalai/semantics/lqp/rewrite/annotate_constraints.py +57 -0
  312. v0/relationalai/semantics/lqp/rewrite/cdc.py +216 -0
  313. v0/relationalai/semantics/lqp/rewrite/extract_common.py +338 -0
  314. v0/relationalai/semantics/lqp/rewrite/extract_keys.py +449 -0
  315. v0/relationalai/semantics/lqp/rewrite/function_annotations.py +114 -0
  316. v0/relationalai/semantics/lqp/rewrite/functional_dependencies.py +314 -0
  317. v0/relationalai/semantics/lqp/rewrite/quantify_vars.py +296 -0
  318. v0/relationalai/semantics/lqp/rewrite/splinter.py +76 -0
  319. v0/relationalai/semantics/lqp/types.py +101 -0
  320. v0/relationalai/semantics/lqp/utils.py +160 -0
  321. v0/relationalai/semantics/lqp/validators.py +57 -0
  322. v0/relationalai/semantics/metamodel/__init__.py +40 -0
  323. v0/relationalai/semantics/metamodel/builtins.py +774 -0
  324. v0/relationalai/semantics/metamodel/compiler.py +133 -0
  325. v0/relationalai/semantics/metamodel/dependency.py +862 -0
  326. v0/relationalai/semantics/metamodel/executor.py +61 -0
  327. v0/relationalai/semantics/metamodel/factory.py +287 -0
  328. v0/relationalai/semantics/metamodel/helpers.py +361 -0
  329. v0/relationalai/semantics/metamodel/rewrite/discharge_constraints.py +39 -0
  330. v0/relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +210 -0
  331. v0/relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py +78 -0
  332. v0/relationalai/semantics/metamodel/rewrite/flatten.py +549 -0
  333. v0/relationalai/semantics/metamodel/rewrite/format_outputs.py +165 -0
  334. v0/relationalai/semantics/metamodel/typer/checker.py +353 -0
  335. v0/relationalai/semantics/metamodel/typer/typer.py +1395 -0
  336. v0/relationalai/semantics/metamodel/util.py +505 -0
  337. v0/relationalai/semantics/reasoners/__init__.py +10 -0
  338. v0/relationalai/semantics/reasoners/graph/__init__.py +37 -0
  339. v0/relationalai/semantics/reasoners/graph/core.py +9020 -0
  340. v0/relationalai/semantics/reasoners/optimization/__init__.py +68 -0
  341. v0/relationalai/semantics/reasoners/optimization/common.py +88 -0
  342. v0/relationalai/semantics/reasoners/optimization/solvers_dev.py +568 -0
  343. v0/relationalai/semantics/reasoners/optimization/solvers_pb.py +1163 -0
  344. v0/relationalai/semantics/rel/builtins.py +40 -0
  345. v0/relationalai/semantics/rel/compiler.py +989 -0
  346. v0/relationalai/semantics/rel/executor.py +359 -0
  347. v0/relationalai/semantics/rel/rel.py +482 -0
  348. v0/relationalai/semantics/rel/rel_utils.py +276 -0
  349. v0/relationalai/semantics/snowflake/__init__.py +3 -0
  350. v0/relationalai/semantics/sql/compiler.py +2503 -0
  351. v0/relationalai/semantics/sql/executor/duck_db.py +52 -0
  352. v0/relationalai/semantics/sql/executor/result_helpers.py +64 -0
  353. v0/relationalai/semantics/sql/executor/snowflake.py +145 -0
  354. v0/relationalai/semantics/sql/rewrite/denormalize.py +222 -0
  355. v0/relationalai/semantics/sql/rewrite/double_negation.py +49 -0
  356. v0/relationalai/semantics/sql/rewrite/recursive_union.py +127 -0
  357. v0/relationalai/semantics/sql/rewrite/sort_output_query.py +246 -0
  358. v0/relationalai/semantics/sql/sql.py +504 -0
  359. v0/relationalai/semantics/std/__init__.py +54 -0
  360. v0/relationalai/semantics/std/constraints.py +43 -0
  361. v0/relationalai/semantics/std/datetime.py +363 -0
  362. v0/relationalai/semantics/std/decimals.py +62 -0
  363. v0/relationalai/semantics/std/floats.py +7 -0
  364. v0/relationalai/semantics/std/integers.py +22 -0
  365. v0/relationalai/semantics/std/math.py +141 -0
  366. v0/relationalai/semantics/std/pragmas.py +11 -0
  367. v0/relationalai/semantics/std/re.py +83 -0
  368. v0/relationalai/semantics/std/std.py +14 -0
  369. v0/relationalai/semantics/std/strings.py +63 -0
  370. v0/relationalai/semantics/tests/__init__.py +0 -0
  371. v0/relationalai/semantics/tests/test_snapshot_abstract.py +143 -0
  372. v0/relationalai/semantics/tests/test_snapshot_base.py +9 -0
  373. v0/relationalai/semantics/tests/utils.py +46 -0
  374. v0/relationalai/std/__init__.py +70 -0
  375. v0/relationalai/tools/__init__.py +0 -0
  376. v0/relationalai/tools/cli.py +1940 -0
  377. v0/relationalai/tools/cli_controls.py +1826 -0
  378. v0/relationalai/tools/cli_helpers.py +390 -0
  379. v0/relationalai/tools/debugger.py +183 -0
  380. v0/relationalai/tools/debugger_client.py +109 -0
  381. v0/relationalai/tools/debugger_server.py +302 -0
  382. v0/relationalai/tools/dev.py +685 -0
  383. v0/relationalai/tools/qb_debugger.py +425 -0
  384. v0/relationalai/util/clean_up_databases.py +95 -0
  385. v0/relationalai/util/format.py +123 -0
  386. v0/relationalai/util/list_databases.py +9 -0
  387. v0/relationalai/util/otel_configuration.py +25 -0
  388. v0/relationalai/util/otel_handler.py +484 -0
  389. v0/relationalai/util/snowflake_handler.py +88 -0
  390. v0/relationalai/util/span_format_test.py +43 -0
  391. v0/relationalai/util/span_tracker.py +207 -0
  392. v0/relationalai/util/spans_file_handler.py +72 -0
  393. v0/relationalai/util/tracing_handler.py +34 -0
  394. frontend/debugger/dist/.gitignore +0 -2
  395. frontend/debugger/dist/assets/favicon-Dy0ZgA6N.png +0 -0
  396. frontend/debugger/dist/assets/index-Cssla-O7.js +0 -208
  397. frontend/debugger/dist/assets/index-DlHsYx1V.css +0 -9
  398. frontend/debugger/dist/index.html +0 -17
  399. relationalai/clients/__init__.py +0 -18
  400. relationalai/clients/client.py +0 -912
  401. relationalai/clients/config.py +0 -673
  402. relationalai/clients/direct_access_client.py +0 -118
  403. relationalai/clients/hash_util.py +0 -31
  404. relationalai/clients/local.py +0 -571
  405. relationalai/clients/profile_polling.py +0 -73
  406. relationalai/clients/resources/__init__.py +0 -8
  407. relationalai/clients/resources/azure/azure.py +0 -477
  408. relationalai/clients/resources/snowflake/__init__.py +0 -20
  409. relationalai/clients/resources/snowflake/cli_resources.py +0 -87
  410. relationalai/clients/resources/snowflake/direct_access_resources.py +0 -711
  411. relationalai/clients/resources/snowflake/engine_state_handlers.py +0 -309
  412. relationalai/clients/resources/snowflake/error_handlers.py +0 -199
  413. relationalai/clients/resources/snowflake/export_procedure.py.jinja +0 -249
  414. relationalai/clients/resources/snowflake/resources_factory.py +0 -99
  415. relationalai/clients/resources/snowflake/snowflake.py +0 -3083
  416. relationalai/clients/resources/snowflake/use_index_poller.py +0 -1011
  417. relationalai/clients/resources/snowflake/use_index_resources.py +0 -188
  418. relationalai/clients/resources/snowflake/util.py +0 -387
  419. relationalai/clients/result_helpers.py +0 -420
  420. relationalai/clients/types.py +0 -113
  421. relationalai/clients/util.py +0 -356
  422. relationalai/debugging.py +0 -389
  423. relationalai/dsl.py +0 -1749
  424. relationalai/early_access/builder/__init__.py +0 -30
  425. relationalai/early_access/builder/builder/__init__.py +0 -35
  426. relationalai/early_access/builder/snowflake/__init__.py +0 -12
  427. relationalai/early_access/builder/std/__init__.py +0 -25
  428. relationalai/early_access/builder/std/decimals/__init__.py +0 -12
  429. relationalai/early_access/builder/std/integers/__init__.py +0 -12
  430. relationalai/early_access/builder/std/math/__init__.py +0 -12
  431. relationalai/early_access/builder/std/strings/__init__.py +0 -14
  432. relationalai/early_access/devtools/__init__.py +0 -12
  433. relationalai/early_access/devtools/benchmark_lqp/__init__.py +0 -12
  434. relationalai/early_access/devtools/extract_lqp/__init__.py +0 -12
  435. relationalai/early_access/dsl/adapters/orm/adapter_qb.py +0 -427
  436. relationalai/early_access/dsl/adapters/orm/parser.py +0 -636
  437. relationalai/early_access/dsl/adapters/owl/adapter.py +0 -176
  438. relationalai/early_access/dsl/adapters/owl/parser.py +0 -160
  439. relationalai/early_access/dsl/bindings/common.py +0 -402
  440. relationalai/early_access/dsl/bindings/csv.py +0 -170
  441. relationalai/early_access/dsl/bindings/legacy/binding_models.py +0 -143
  442. relationalai/early_access/dsl/bindings/snowflake.py +0 -64
  443. relationalai/early_access/dsl/codegen/binder.py +0 -411
  444. relationalai/early_access/dsl/codegen/common.py +0 -79
  445. relationalai/early_access/dsl/codegen/helpers.py +0 -23
  446. relationalai/early_access/dsl/codegen/relations.py +0 -700
  447. relationalai/early_access/dsl/codegen/weaver.py +0 -417
  448. relationalai/early_access/dsl/core/builders/__init__.py +0 -47
  449. relationalai/early_access/dsl/core/builders/logic.py +0 -19
  450. relationalai/early_access/dsl/core/builders/scalar_constraint.py +0 -11
  451. relationalai/early_access/dsl/core/constraints/predicate/atomic.py +0 -455
  452. relationalai/early_access/dsl/core/constraints/predicate/universal.py +0 -73
  453. relationalai/early_access/dsl/core/constraints/scalar.py +0 -310
  454. relationalai/early_access/dsl/core/context.py +0 -13
  455. relationalai/early_access/dsl/core/cset.py +0 -132
  456. relationalai/early_access/dsl/core/exprs/__init__.py +0 -116
  457. relationalai/early_access/dsl/core/exprs/relational.py +0 -18
  458. relationalai/early_access/dsl/core/exprs/scalar.py +0 -412
  459. relationalai/early_access/dsl/core/instances.py +0 -44
  460. relationalai/early_access/dsl/core/logic/__init__.py +0 -193
  461. relationalai/early_access/dsl/core/logic/aggregation.py +0 -98
  462. relationalai/early_access/dsl/core/logic/exists.py +0 -223
  463. relationalai/early_access/dsl/core/logic/helper.py +0 -163
  464. relationalai/early_access/dsl/core/namespaces.py +0 -32
  465. relationalai/early_access/dsl/core/relations.py +0 -276
  466. relationalai/early_access/dsl/core/rules.py +0 -112
  467. relationalai/early_access/dsl/core/std/__init__.py +0 -45
  468. relationalai/early_access/dsl/core/temporal/recall.py +0 -6
  469. relationalai/early_access/dsl/core/types/__init__.py +0 -270
  470. relationalai/early_access/dsl/core/types/concepts.py +0 -128
  471. relationalai/early_access/dsl/core/types/constrained/__init__.py +0 -267
  472. relationalai/early_access/dsl/core/types/constrained/nominal.py +0 -143
  473. relationalai/early_access/dsl/core/types/constrained/subtype.py +0 -124
  474. relationalai/early_access/dsl/core/types/standard.py +0 -92
  475. relationalai/early_access/dsl/core/types/unconstrained.py +0 -50
  476. relationalai/early_access/dsl/core/types/variables.py +0 -203
  477. relationalai/early_access/dsl/ir/compiler.py +0 -318
  478. relationalai/early_access/dsl/ir/executor.py +0 -260
  479. relationalai/early_access/dsl/ontologies/constraints.py +0 -88
  480. relationalai/early_access/dsl/ontologies/export.py +0 -30
  481. relationalai/early_access/dsl/ontologies/models.py +0 -453
  482. relationalai/early_access/dsl/ontologies/python_printer.py +0 -303
  483. relationalai/early_access/dsl/ontologies/readings.py +0 -60
  484. relationalai/early_access/dsl/ontologies/relationships.py +0 -322
  485. relationalai/early_access/dsl/ontologies/roles.py +0 -87
  486. relationalai/early_access/dsl/ontologies/subtyping.py +0 -55
  487. relationalai/early_access/dsl/orm/constraints.py +0 -438
  488. relationalai/early_access/dsl/orm/measures/dimensions.py +0 -200
  489. relationalai/early_access/dsl/orm/measures/initializer.py +0 -16
  490. relationalai/early_access/dsl/orm/measures/measure_rules.py +0 -275
  491. relationalai/early_access/dsl/orm/measures/measures.py +0 -299
  492. relationalai/early_access/dsl/orm/measures/role_exprs.py +0 -268
  493. relationalai/early_access/dsl/orm/models.py +0 -256
  494. relationalai/early_access/dsl/orm/object_oriented_printer.py +0 -344
  495. relationalai/early_access/dsl/orm/printer.py +0 -469
  496. relationalai/early_access/dsl/orm/reasoners.py +0 -480
  497. relationalai/early_access/dsl/orm/relations.py +0 -19
  498. relationalai/early_access/dsl/orm/relationships.py +0 -251
  499. relationalai/early_access/dsl/orm/types.py +0 -42
  500. relationalai/early_access/dsl/orm/utils.py +0 -79
  501. relationalai/early_access/dsl/orm/verb.py +0 -204
  502. relationalai/early_access/dsl/physical_metadata/tables.py +0 -133
  503. relationalai/early_access/dsl/relations.py +0 -170
  504. relationalai/early_access/dsl/rulesets.py +0 -69
  505. relationalai/early_access/dsl/schemas/__init__.py +0 -450
  506. relationalai/early_access/dsl/schemas/builder.py +0 -48
  507. relationalai/early_access/dsl/schemas/comp_names.py +0 -51
  508. relationalai/early_access/dsl/schemas/components.py +0 -203
  509. relationalai/early_access/dsl/schemas/contexts.py +0 -156
  510. relationalai/early_access/dsl/schemas/exprs.py +0 -89
  511. relationalai/early_access/dsl/schemas/fragments.py +0 -464
  512. relationalai/early_access/dsl/serialization.py +0 -79
  513. relationalai/early_access/dsl/serialize/exporter.py +0 -163
  514. relationalai/early_access/dsl/snow/api.py +0 -105
  515. relationalai/early_access/dsl/snow/common.py +0 -76
  516. relationalai/early_access/dsl/state_mgmt/__init__.py +0 -129
  517. relationalai/early_access/dsl/state_mgmt/state_charts.py +0 -125
  518. relationalai/early_access/dsl/state_mgmt/transitions.py +0 -130
  519. relationalai/early_access/dsl/types/__init__.py +0 -40
  520. relationalai/early_access/dsl/types/concepts.py +0 -12
  521. relationalai/early_access/dsl/types/entities.py +0 -135
  522. relationalai/early_access/dsl/types/values.py +0 -17
  523. relationalai/early_access/dsl/utils.py +0 -102
  524. relationalai/early_access/graphs/__init__.py +0 -13
  525. relationalai/early_access/lqp/__init__.py +0 -12
  526. relationalai/early_access/lqp/compiler/__init__.py +0 -12
  527. relationalai/early_access/lqp/constructors/__init__.py +0 -18
  528. relationalai/early_access/lqp/executor/__init__.py +0 -12
  529. relationalai/early_access/lqp/ir/__init__.py +0 -12
  530. relationalai/early_access/lqp/passes/__init__.py +0 -12
  531. relationalai/early_access/lqp/pragmas/__init__.py +0 -12
  532. relationalai/early_access/lqp/primitives/__init__.py +0 -12
  533. relationalai/early_access/lqp/types/__init__.py +0 -12
  534. relationalai/early_access/lqp/utils/__init__.py +0 -12
  535. relationalai/early_access/lqp/validators/__init__.py +0 -12
  536. relationalai/early_access/metamodel/__init__.py +0 -58
  537. relationalai/early_access/metamodel/builtins/__init__.py +0 -12
  538. relationalai/early_access/metamodel/compiler/__init__.py +0 -12
  539. relationalai/early_access/metamodel/dependency/__init__.py +0 -12
  540. relationalai/early_access/metamodel/factory/__init__.py +0 -17
  541. relationalai/early_access/metamodel/helpers/__init__.py +0 -12
  542. relationalai/early_access/metamodel/ir/__init__.py +0 -14
  543. relationalai/early_access/metamodel/rewrite/__init__.py +0 -7
  544. relationalai/early_access/metamodel/typer/__init__.py +0 -3
  545. relationalai/early_access/metamodel/typer/typer/__init__.py +0 -12
  546. relationalai/early_access/metamodel/types/__init__.py +0 -15
  547. relationalai/early_access/metamodel/util/__init__.py +0 -15
  548. relationalai/early_access/metamodel/visitor/__init__.py +0 -12
  549. relationalai/early_access/rel/__init__.py +0 -12
  550. relationalai/early_access/rel/executor/__init__.py +0 -12
  551. relationalai/early_access/rel/rel_utils/__init__.py +0 -12
  552. relationalai/early_access/rel/rewrite/__init__.py +0 -7
  553. relationalai/early_access/solvers/__init__.py +0 -19
  554. relationalai/early_access/sql/__init__.py +0 -11
  555. relationalai/early_access/sql/executor/__init__.py +0 -3
  556. relationalai/early_access/sql/rewrite/__init__.py +0 -3
  557. relationalai/early_access/tests/logging/__init__.py +0 -12
  558. relationalai/early_access/tests/test_snapshot_base/__init__.py +0 -12
  559. relationalai/early_access/tests/utils/__init__.py +0 -12
  560. relationalai/environments/__init__.py +0 -35
  561. relationalai/environments/base.py +0 -381
  562. relationalai/environments/colab.py +0 -14
  563. relationalai/environments/generic.py +0 -71
  564. relationalai/environments/ipython.py +0 -68
  565. relationalai/environments/jupyter.py +0 -9
  566. relationalai/environments/snowbook.py +0 -169
  567. relationalai/errors.py +0 -2478
  568. relationalai/experimental/SF.py +0 -38
  569. relationalai/experimental/inspect.py +0 -47
  570. relationalai/experimental/pathfinder/__init__.py +0 -158
  571. relationalai/experimental/pathfinder/api.py +0 -160
  572. relationalai/experimental/pathfinder/automaton.py +0 -584
  573. relationalai/experimental/pathfinder/bridge.py +0 -226
  574. relationalai/experimental/pathfinder/compiler.py +0 -416
  575. relationalai/experimental/pathfinder/datalog.py +0 -214
  576. relationalai/experimental/pathfinder/diagnostics.py +0 -56
  577. relationalai/experimental/pathfinder/filter.py +0 -236
  578. relationalai/experimental/pathfinder/glushkov.py +0 -439
  579. relationalai/experimental/pathfinder/options.py +0 -265
  580. relationalai/experimental/pathfinder/pathfinder-v0.7.0.rel +0 -1951
  581. relationalai/experimental/pathfinder/rpq.py +0 -344
  582. relationalai/experimental/pathfinder/transition.py +0 -200
  583. relationalai/experimental/pathfinder/utils.py +0 -26
  584. relationalai/experimental/paths/README.md +0 -107
  585. relationalai/experimental/paths/api.py +0 -143
  586. relationalai/experimental/paths/benchmarks/grid_graph.py +0 -37
  587. relationalai/experimental/paths/code_organization.md +0 -2
  588. relationalai/experimental/paths/examples/Movies.ipynb +0 -16328
  589. relationalai/experimental/paths/examples/basic_example.py +0 -40
  590. relationalai/experimental/paths/examples/minimal_engine_warmup.py +0 -3
  591. relationalai/experimental/paths/examples/movie_example.py +0 -77
  592. relationalai/experimental/paths/examples/movies_data/actedin.csv +0 -193
  593. relationalai/experimental/paths/examples/movies_data/directed.csv +0 -45
  594. relationalai/experimental/paths/examples/movies_data/follows.csv +0 -7
  595. relationalai/experimental/paths/examples/movies_data/movies.csv +0 -39
  596. relationalai/experimental/paths/examples/movies_data/person.csv +0 -134
  597. relationalai/experimental/paths/examples/movies_data/produced.csv +0 -16
  598. relationalai/experimental/paths/examples/movies_data/ratings.csv +0 -10
  599. relationalai/experimental/paths/examples/movies_data/wrote.csv +0 -11
  600. relationalai/experimental/paths/examples/paths_benchmark.py +0 -115
  601. relationalai/experimental/paths/examples/paths_example.py +0 -116
  602. relationalai/experimental/paths/examples/pattern_to_automaton.py +0 -28
  603. relationalai/experimental/paths/find_paths_via_automaton.py +0 -85
  604. relationalai/experimental/paths/graph.py +0 -185
  605. relationalai/experimental/paths/path_algorithms/find_paths.py +0 -280
  606. relationalai/experimental/paths/path_algorithms/one_sided_ball_repetition.py +0 -26
  607. relationalai/experimental/paths/path_algorithms/one_sided_ball_upto.py +0 -111
  608. relationalai/experimental/paths/path_algorithms/single.py +0 -59
  609. relationalai/experimental/paths/path_algorithms/two_sided_balls_repetition.py +0 -39
  610. relationalai/experimental/paths/path_algorithms/two_sided_balls_upto.py +0 -103
  611. relationalai/experimental/paths/path_algorithms/usp-old.py +0 -130
  612. relationalai/experimental/paths/path_algorithms/usp-tuple.py +0 -183
  613. relationalai/experimental/paths/path_algorithms/usp.py +0 -150
  614. relationalai/experimental/paths/product_graph.py +0 -93
  615. relationalai/experimental/paths/rpq/automaton.py +0 -584
  616. relationalai/experimental/paths/rpq/diagnostics.py +0 -56
  617. relationalai/experimental/paths/rpq/rpq.py +0 -378
  618. relationalai/experimental/paths/tests/tests_limit_sp_max_length.py +0 -90
  619. relationalai/experimental/paths/tests/tests_limit_sp_multiple.py +0 -119
  620. relationalai/experimental/paths/tests/tests_limit_sp_single.py +0 -104
  621. relationalai/experimental/paths/tests/tests_limit_walks_multiple.py +0 -113
  622. relationalai/experimental/paths/tests/tests_limit_walks_single.py +0 -149
  623. relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_multiple.py +0 -70
  624. relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_single.py +0 -64
  625. relationalai/experimental/paths/tests/tests_one_sided_ball_upto_multiple.py +0 -115
  626. relationalai/experimental/paths/tests/tests_one_sided_ball_upto_single.py +0 -75
  627. relationalai/experimental/paths/tests/tests_single_paths.py +0 -152
  628. relationalai/experimental/paths/tests/tests_single_walks.py +0 -208
  629. relationalai/experimental/paths/tests/tests_single_walks_undirected.py +0 -297
  630. relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_multiple.py +0 -107
  631. relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_single.py +0 -76
  632. relationalai/experimental/paths/tests/tests_two_sided_balls_upto_multiple.py +0 -76
  633. relationalai/experimental/paths/tests/tests_two_sided_balls_upto_single.py +0 -110
  634. relationalai/experimental/paths/tests/tests_usp_nsp_multiple.py +0 -229
  635. relationalai/experimental/paths/tests/tests_usp_nsp_single.py +0 -108
  636. relationalai/experimental/paths/tree_agg.py +0 -168
  637. relationalai/experimental/paths/utilities/iterators.py +0 -27
  638. relationalai/experimental/paths/utilities/prefix_sum.py +0 -91
  639. relationalai/experimental/solvers.py +0 -1087
  640. relationalai/loaders/csv.py +0 -195
  641. relationalai/loaders/loader.py +0 -177
  642. relationalai/loaders/types.py +0 -23
  643. relationalai/rel_emitter.py +0 -373
  644. relationalai/rel_utils.py +0 -185
  645. relationalai/semantics/designs/query_builder/identify_by.md +0 -106
  646. relationalai/semantics/devtools/benchmark_lqp.py +0 -535
  647. relationalai/semantics/devtools/compilation_manager.py +0 -294
  648. relationalai/semantics/devtools/extract_lqp.py +0 -110
  649. relationalai/semantics/internal/internal.py +0 -3785
  650. relationalai/semantics/internal/snowflake.py +0 -325
  651. relationalai/semantics/lqp/README.md +0 -34
  652. relationalai/semantics/lqp/builtins.py +0 -16
  653. relationalai/semantics/lqp/compiler.py +0 -22
  654. relationalai/semantics/lqp/constructors.py +0 -68
  655. relationalai/semantics/lqp/executor.py +0 -469
  656. relationalai/semantics/lqp/intrinsics.py +0 -24
  657. relationalai/semantics/lqp/model2lqp.py +0 -877
  658. relationalai/semantics/lqp/passes.py +0 -680
  659. relationalai/semantics/lqp/primitives.py +0 -252
  660. relationalai/semantics/lqp/result_helpers.py +0 -202
  661. relationalai/semantics/lqp/rewrite/annotate_constraints.py +0 -57
  662. relationalai/semantics/lqp/rewrite/cdc.py +0 -216
  663. relationalai/semantics/lqp/rewrite/extract_common.py +0 -338
  664. relationalai/semantics/lqp/rewrite/extract_keys.py +0 -506
  665. relationalai/semantics/lqp/rewrite/function_annotations.py +0 -114
  666. relationalai/semantics/lqp/rewrite/functional_dependencies.py +0 -314
  667. relationalai/semantics/lqp/rewrite/quantify_vars.py +0 -296
  668. relationalai/semantics/lqp/rewrite/splinter.py +0 -76
  669. relationalai/semantics/lqp/types.py +0 -101
  670. relationalai/semantics/lqp/utils.py +0 -160
  671. relationalai/semantics/lqp/validators.py +0 -57
  672. relationalai/semantics/metamodel/compiler.py +0 -133
  673. relationalai/semantics/metamodel/dependency.py +0 -862
  674. relationalai/semantics/metamodel/executor.py +0 -61
  675. relationalai/semantics/metamodel/factory.py +0 -287
  676. relationalai/semantics/metamodel/helpers.py +0 -361
  677. relationalai/semantics/metamodel/rewrite/discharge_constraints.py +0 -39
  678. relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +0 -210
  679. relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py +0 -78
  680. relationalai/semantics/metamodel/rewrite/flatten.py +0 -554
  681. relationalai/semantics/metamodel/rewrite/format_outputs.py +0 -165
  682. relationalai/semantics/metamodel/typer/checker.py +0 -353
  683. relationalai/semantics/metamodel/typer/typer.py +0 -1395
  684. relationalai/semantics/metamodel/util.py +0 -506
  685. relationalai/semantics/reasoners/__init__.py +0 -10
  686. relationalai/semantics/reasoners/graph/README.md +0 -620
  687. relationalai/semantics/reasoners/graph/__init__.py +0 -37
  688. relationalai/semantics/reasoners/graph/core.py +0 -9019
  689. relationalai/semantics/reasoners/graph/design/beyond_demand_transform.md +0 -797
  690. relationalai/semantics/reasoners/graph/tests/README.md +0 -21
  691. relationalai/semantics/reasoners/optimization/__init__.py +0 -68
  692. relationalai/semantics/reasoners/optimization/common.py +0 -88
  693. relationalai/semantics/reasoners/optimization/solvers_dev.py +0 -568
  694. relationalai/semantics/reasoners/optimization/solvers_pb.py +0 -1163
  695. relationalai/semantics/rel/builtins.py +0 -40
  696. relationalai/semantics/rel/compiler.py +0 -989
  697. relationalai/semantics/rel/executor.py +0 -362
  698. relationalai/semantics/rel/rel.py +0 -482
  699. relationalai/semantics/rel/rel_utils.py +0 -276
  700. relationalai/semantics/snowflake/__init__.py +0 -3
  701. relationalai/semantics/sql/compiler.py +0 -2503
  702. relationalai/semantics/sql/executor/duck_db.py +0 -52
  703. relationalai/semantics/sql/executor/result_helpers.py +0 -64
  704. relationalai/semantics/sql/executor/snowflake.py +0 -149
  705. relationalai/semantics/sql/rewrite/denormalize.py +0 -222
  706. relationalai/semantics/sql/rewrite/double_negation.py +0 -49
  707. relationalai/semantics/sql/rewrite/recursive_union.py +0 -127
  708. relationalai/semantics/sql/rewrite/sort_output_query.py +0 -246
  709. relationalai/semantics/sql/sql.py +0 -504
  710. relationalai/semantics/std/pragmas.py +0 -11
  711. relationalai/semantics/std/std.py +0 -14
  712. relationalai/semantics/tests/test_snapshot_abstract.py +0 -143
  713. relationalai/semantics/tests/test_snapshot_base.py +0 -9
  714. relationalai/semantics/tests/utils.py +0 -46
  715. relationalai/std/__init__.py +0 -70
  716. relationalai/tools/cli.py +0 -1936
  717. relationalai/tools/cli_controls.py +0 -1826
  718. relationalai/tools/cli_helpers.py +0 -398
  719. relationalai/tools/debugger_client.py +0 -109
  720. relationalai/tools/debugger_server.py +0 -302
  721. relationalai/tools/dev.py +0 -685
  722. relationalai/tools/notes +0 -7
  723. relationalai/tools/qb_debugger.py +0 -425
  724. relationalai/util/clean_up_databases.py +0 -95
  725. relationalai/util/list_databases.py +0 -9
  726. relationalai/util/otel_configuration.py +0 -26
  727. relationalai/util/otel_handler.py +0 -484
  728. relationalai/util/snowflake_handler.py +0 -88
  729. relationalai/util/span_format_test.py +0 -43
  730. relationalai/util/span_tracker.py +0 -207
  731. relationalai/util/spans_file_handler.py +0 -72
  732. relationalai/util/tracing_handler.py +0 -34
  733. relationalai-0.13.0.dist-info/METADATA +0 -74
  734. relationalai-0.13.0.dist-info/RECORD +0 -458
  735. relationalai-0.13.0.dist-info/WHEEL +0 -4
  736. relationalai-0.13.0.dist-info/entry_points.txt +0 -3
  737. relationalai-0.13.0.dist-info/licenses/LICENSE +0 -202
  738. relationalai_test_util/__init__.py +0 -4
  739. relationalai_test_util/fixtures.py +0 -229
  740. relationalai_test_util/snapshot.py +0 -252
  741. relationalai_test_util/traceback.py +0 -118
  742. /relationalai/{analysis → semantics/frontend}/__init__.py +0 -0
  743. /relationalai/{auth/__init__.py → semantics/metamodel/metamodel_compiler.py} +0 -0
  744. /relationalai/{early_access → shims}/__init__.py +0 -0
  745. {relationalai/early_access/dsl/adapters → v0/relationalai/analysis}/__init__.py +0 -0
  746. {relationalai → v0/relationalai}/analysis/mechanistic.py +0 -0
  747. {relationalai → v0/relationalai}/analysis/whynot.py +0 -0
  748. {relationalai/early_access/dsl/adapters/orm → v0/relationalai/auth}/__init__.py +0 -0
  749. {relationalai → v0/relationalai}/auth/jwt_generator.py +0 -0
  750. {relationalai → v0/relationalai}/auth/oauth_callback_server.py +0 -0
  751. {relationalai → v0/relationalai}/auth/token_handler.py +0 -0
  752. {relationalai → v0/relationalai}/auth/util.py +0 -0
  753. {relationalai/clients/resources/snowflake → v0/relationalai/clients}/cache_store.py +0 -0
  754. {relationalai → v0/relationalai}/compiler.py +0 -0
  755. {relationalai → v0/relationalai}/dependencies.py +0 -0
  756. {relationalai → v0/relationalai}/docutils.py +0 -0
  757. {relationalai/early_access/dsl/adapters/owl → v0/relationalai/early_access}/__init__.py +0 -0
  758. {relationalai → v0/relationalai}/early_access/dsl/__init__.py +0 -0
  759. {relationalai/early_access/dsl/bindings → v0/relationalai/early_access/dsl/adapters}/__init__.py +0 -0
  760. {relationalai/early_access/dsl/bindings/legacy → v0/relationalai/early_access/dsl/adapters/orm}/__init__.py +0 -0
  761. {relationalai → v0/relationalai}/early_access/dsl/adapters/orm/model.py +0 -0
  762. {relationalai/early_access/dsl/codegen → v0/relationalai/early_access/dsl/adapters/owl}/__init__.py +0 -0
  763. {relationalai → v0/relationalai}/early_access/dsl/adapters/owl/model.py +0 -0
  764. {relationalai/early_access/dsl/core/temporal → v0/relationalai/early_access/dsl/bindings}/__init__.py +0 -0
  765. {relationalai/early_access/dsl/ir → v0/relationalai/early_access/dsl/bindings/legacy}/__init__.py +0 -0
  766. {relationalai/early_access/dsl/ontologies → v0/relationalai/early_access/dsl/codegen}/__init__.py +0 -0
  767. {relationalai → v0/relationalai}/early_access/dsl/constants.py +0 -0
  768. {relationalai → v0/relationalai}/early_access/dsl/core/__init__.py +0 -0
  769. {relationalai → v0/relationalai}/early_access/dsl/core/constraints/__init__.py +0 -0
  770. {relationalai → v0/relationalai}/early_access/dsl/core/constraints/predicate/__init__.py +0 -0
  771. {relationalai → v0/relationalai}/early_access/dsl/core/stack.py +0 -0
  772. {relationalai/early_access/dsl/orm → v0/relationalai/early_access/dsl/core/temporal}/__init__.py +0 -0
  773. {relationalai → v0/relationalai}/early_access/dsl/core/utils.py +0 -0
  774. {relationalai/early_access/dsl/orm/measures → v0/relationalai/early_access/dsl/ir}/__init__.py +0 -0
  775. {relationalai/early_access/dsl/physical_metadata → v0/relationalai/early_access/dsl/ontologies}/__init__.py +0 -0
  776. {relationalai → v0/relationalai}/early_access/dsl/ontologies/raw_source.py +0 -0
  777. {relationalai/early_access/dsl/serialize → v0/relationalai/early_access/dsl/orm}/__init__.py +0 -0
  778. {relationalai/early_access/dsl/snow → v0/relationalai/early_access/dsl/orm/measures}/__init__.py +0 -0
  779. {relationalai → v0/relationalai}/early_access/dsl/orm/reasoner_errors.py +0 -0
  780. {relationalai/loaders → v0/relationalai/early_access/dsl/physical_metadata}/__init__.py +0 -0
  781. {relationalai/semantics/tests → v0/relationalai/early_access/dsl/serialize}/__init__.py +0 -0
  782. {relationalai → v0/relationalai}/early_access/dsl/serialize/binding_model.py +0 -0
  783. {relationalai → v0/relationalai}/early_access/dsl/serialize/model.py +0 -0
  784. {relationalai/tools → v0/relationalai/early_access/dsl/snow}/__init__.py +0 -0
  785. {relationalai → v0/relationalai}/early_access/tests/__init__.py +0 -0
  786. {relationalai → v0/relationalai}/environments/ci.py +0 -0
  787. {relationalai → v0/relationalai}/environments/hex.py +0 -0
  788. {relationalai → v0/relationalai}/environments/terminal.py +0 -0
  789. {relationalai → v0/relationalai}/experimental/__init__.py +0 -0
  790. {relationalai → v0/relationalai}/experimental/graphs.py +0 -0
  791. {relationalai → v0/relationalai}/experimental/paths/__init__.py +0 -0
  792. {relationalai → v0/relationalai}/experimental/paths/benchmarks/__init__.py +0 -0
  793. {relationalai → v0/relationalai}/experimental/paths/path_algorithms/__init__.py +0 -0
  794. {relationalai → v0/relationalai}/experimental/paths/rpq/__init__.py +0 -0
  795. {relationalai → v0/relationalai}/experimental/paths/rpq/filter.py +0 -0
  796. {relationalai → v0/relationalai}/experimental/paths/rpq/glushkov.py +0 -0
  797. {relationalai → v0/relationalai}/experimental/paths/rpq/transition.py +0 -0
  798. {relationalai → v0/relationalai}/experimental/paths/utilities/__init__.py +0 -0
  799. {relationalai → v0/relationalai}/experimental/paths/utilities/utilities.py +0 -0
  800. {relationalai → v0/relationalai}/metagen.py +0 -0
  801. {relationalai → v0/relationalai}/metamodel.py +0 -0
  802. {relationalai → v0/relationalai}/rel.py +0 -0
  803. {relationalai → v0/relationalai}/semantics/devtools/__init__.py +0 -0
  804. {relationalai → v0/relationalai}/semantics/internal/__init__.py +0 -0
  805. {relationalai → v0/relationalai}/semantics/internal/annotations.py +0 -0
  806. {relationalai → v0/relationalai}/semantics/lqp/__init__.py +0 -0
  807. {relationalai → v0/relationalai}/semantics/lqp/ir.py +0 -0
  808. {relationalai → v0/relationalai}/semantics/lqp/pragmas.py +0 -0
  809. {relationalai → v0/relationalai}/semantics/lqp/rewrite/__init__.py +0 -0
  810. {relationalai → v0/relationalai}/semantics/metamodel/dataflow.py +0 -0
  811. {relationalai → v0/relationalai}/semantics/metamodel/ir.py +0 -0
  812. {relationalai → v0/relationalai}/semantics/metamodel/rewrite/__init__.py +0 -0
  813. {relationalai → v0/relationalai}/semantics/metamodel/typer/__init__.py +0 -0
  814. {relationalai → v0/relationalai}/semantics/metamodel/types.py +0 -0
  815. {relationalai → v0/relationalai}/semantics/metamodel/visitor.py +0 -0
  816. {relationalai → v0/relationalai}/semantics/reasoners/experimental/__init__.py +0 -0
  817. {relationalai → v0/relationalai}/semantics/rel/__init__.py +0 -0
  818. {relationalai → v0/relationalai}/semantics/sql/__init__.py +0 -0
  819. {relationalai → v0/relationalai}/semantics/sql/executor/__init__.py +0 -0
  820. {relationalai → v0/relationalai}/semantics/sql/rewrite/__init__.py +0 -0
  821. {relationalai → v0/relationalai}/semantics/tests/logging.py +0 -0
  822. {relationalai → v0/relationalai}/std/aggregates.py +0 -0
  823. {relationalai → v0/relationalai}/std/dates.py +0 -0
  824. {relationalai → v0/relationalai}/std/graphs.py +0 -0
  825. {relationalai → v0/relationalai}/std/inspect.py +0 -0
  826. {relationalai → v0/relationalai}/std/math.py +0 -0
  827. {relationalai → v0/relationalai}/std/re.py +0 -0
  828. {relationalai → v0/relationalai}/std/strings.py +0 -0
  829. {relationalai → v0/relationalai}/tools/cleanup_snapshots.py +0 -0
  830. {relationalai → v0/relationalai}/tools/constants.py +0 -0
  831. {relationalai → v0/relationalai}/tools/query_utils.py +0 -0
  832. {relationalai → v0/relationalai}/tools/snapshot_viewer.py +0 -0
  833. {relationalai → v0/relationalai}/util/__init__.py +0 -0
  834. {relationalai → v0/relationalai}/util/constants.py +0 -0
  835. {relationalai → v0/relationalai}/util/graph.py +0 -0
  836. {relationalai → v0/relationalai}/util/timeout.py +0 -0
@@ -1,3083 +0,0 @@
1
- # pyright: reportUnusedExpression=false
2
- from __future__ import annotations
3
- import base64
4
- import importlib.resources
5
- import io
6
- import re
7
- import json
8
- import time
9
- import textwrap
10
- import ast
11
- import uuid
12
- import warnings
13
- import atexit
14
- import hashlib
15
- from dataclasses import dataclass
16
-
17
- from ....auth.token_handler import TokenHandler
18
- import snowflake.snowpark
19
-
20
- from ....rel_utils import sanitize_identifier, to_fqn_relation_name
21
- from ....tools.constants import FIELD_PLACEHOLDER, SNOWFLAKE_AUTHS, USE_GRAPH_INDEX, DEFAULT_QUERY_TIMEOUT_MINS, WAIT_FOR_STREAM_SYNC, Generation
22
- from .... import std
23
- from collections import defaultdict
24
- import requests
25
- import snowflake.connector
26
- import pyarrow as pa
27
-
28
- from snowflake.snowpark import Session
29
- from snowflake.snowpark.context import get_active_session
30
- from ... import result_helpers
31
- from .... import debugging
32
- from typing import Any, Dict, Iterable, Tuple, List, Literal, cast
33
-
34
- from pandas import DataFrame
35
-
36
- from ....tools.cli_controls import Spinner
37
- from ...types import AvailableModel, EngineState, Import, ImportSource, ImportSourceTable, ImportsStatus, SourceInfo, TransactionAsyncResponse
38
- from ...config import Config
39
- from ...client import Client, ExportParams, ProviderBase, ResourcesBase
40
- from ...util import IdentityParser, escape_for_f_string, get_pyrel_version, get_with_retries, poll_with_specified_overhead, safe_json_loads, sanitize_module_name, scrub_exception, wrap_with_request_id, normalize_datetime
41
- from .util import (
42
- collect_error_messages,
43
- process_jinja_template,
44
- type_to_sql,
45
- type_to_snowpark,
46
- sanitize_user_name as _sanitize_user_name,
47
- normalize_params,
48
- format_sproc_name,
49
- is_azure_url,
50
- is_container_runtime,
51
- imports_to_dicts,
52
- txn_list_to_dicts,
53
- decrypt_artifact,
54
- )
55
- from ....environments import runtime_env, HexEnvironment, SnowbookEnvironment
56
- from .... import dsl, rel, metamodel as m
57
- from ....errors import EngineProvisioningFailed, EngineNameValidationException, Errors, InvalidAliasError, InvalidEngineSizeError, InvalidSourceTypeWarning, RAIException, HexSessionException, SnowflakeChangeTrackingNotEnabledException, SnowflakeDatabaseException, SnowflakeImportMissingException, SnowflakeInvalidSource, SnowflakeMissingConfigValuesException, SnowflakeProxyAPIDeprecationWarning, SnowflakeProxySourceError, ModelNotFoundException, UnknownSourceWarning, RowsDroppedFromTargetTableWarning, QueryTimeoutExceededException
58
- from concurrent.futures import ThreadPoolExecutor
59
- from datetime import datetime, timedelta
60
- from snowflake.snowpark.types import StringType, StructField, StructType
61
- # Import error handlers and constants
62
- from .error_handlers import (
63
- ErrorHandler,
64
- DuoSecurityErrorHandler,
65
- AppMissingErrorHandler,
66
- DatabaseErrorsHandler,
67
- EngineErrorsHandler,
68
- ServiceNotStartedErrorHandler,
69
- TransactionAbortedErrorHandler,
70
- )
71
- # Import engine state handlers
72
- from .engine_state_handlers import (
73
- EngineStateHandler,
74
- EngineContext,
75
- SyncPendingStateHandler,
76
- SyncSuspendedStateHandler,
77
- SyncReadyStateHandler,
78
- SyncGoneStateHandler,
79
- SyncMissingEngineHandler,
80
- AsyncPendingStateHandler,
81
- AsyncSuspendedStateHandler,
82
- AsyncReadyStateHandler,
83
- AsyncGoneStateHandler,
84
- AsyncMissingEngineHandler,
85
- )
86
-
87
-
88
- #--------------------------------------------------
89
- # Constants
90
- #--------------------------------------------------
91
-
92
- VALID_POOL_STATUS = ["ACTIVE", "IDLE", "SUSPENDED"]
93
- # transaction list and get return different fields (duration vs timings)
94
- LIST_TXN_SQL_FIELDS = ["id", "database_name", "engine_name", "state", "abort_reason", "read_only","created_by", "created_on", "finished_at", "duration"]
95
- GET_TXN_SQL_FIELDS = ["id", "database", "engine", "state", "abort_reason", "read_only","created_by", "created_on", "finished_at", "timings"]
96
- VALID_ENGINE_STATES = ["READY", "PENDING"]
97
-
98
- # Cloud-specific engine sizes
99
- INTERNAL_ENGINE_SIZES = ["XS", "S", "M", "L"]
100
- ENGINE_SIZES_AWS = ["HIGHMEM_X64_S", "HIGHMEM_X64_M", "HIGHMEM_X64_L"]
101
- ENGINE_SIZES_AZURE = ["HIGHMEM_X64_S", "HIGHMEM_X64_M", "HIGHMEM_X64_SL"]
102
- # Note: ENGINE_ERRORS, ENGINE_NOT_READY_MSGS, DATABASE_ERRORS moved to util.py
103
- PYREL_ROOT_DB = 'pyrel_root_db'
104
-
105
- TERMINAL_TXN_STATES = ["COMPLETED", "ABORTED"]
106
-
107
- TXN_ABORT_REASON_TIMEOUT = "transaction timeout"
108
-
109
- #--------------------------------------------------
110
- # Resources
111
- #--------------------------------------------------
112
-
113
- APP_NAME = "___RAI_APP___"
114
-
115
- @dataclass
116
- class ExecContext:
117
- """Execution context for SQL queries, containing all parameters needed for execution and retry."""
118
- code: str
119
- params: List[Any] | None = None
120
- raw: bool = False
121
- help: bool = True
122
- skip_engine_db_error_retry: bool = False
123
-
124
- def re_execute(self, resources: 'Resources') -> Any:
125
- """Re-execute this context's query using the provided resources instance."""
126
- return resources._exec(
127
- code=self.code,
128
- params=self.params,
129
- raw=self.raw,
130
- help=self.help,
131
- skip_engine_db_error_retry=self.skip_engine_db_error_retry
132
- )
133
-
134
- class Resources(ResourcesBase):
135
- def __init__(
136
- self,
137
- profile: str | None = None,
138
- config: Config | None = None,
139
- connection: Session | None = None,
140
- dry_run: bool = False,
141
- reset_session: bool = False,
142
- generation: Generation | None = None,
143
- language: str = "rel", # Accepted for backward compatibility, but not stored in base class
144
- ):
145
- super().__init__(profile, config=config)
146
- self._token_handler: TokenHandler | None = None
147
- self._session = connection
148
- self.generation = generation
149
- if self._session is None and not dry_run:
150
- try:
151
- # we may still be constructing the config, so this can fail now,
152
- # if so we'll create later
153
- self._session = self.get_sf_session(reset_session)
154
- except Exception:
155
- pass
156
- self._pending_transactions: list[str] = []
157
- self._ns_cache = {}
158
- # self.sources contains fully qualified Snowflake table/view names
159
- self.sources: set[str] = set()
160
- self._sproc_models = None
161
- # Store language for backward compatibility (used by child classes for use_index polling)
162
- self.language = language
163
- # Register error and state handlers
164
- self._register_handlers()
165
- # Register atexit callback to cancel pending transactions
166
- atexit.register(self.cancel_pending_transactions)
167
-
168
- #--------------------------------------------------
169
- # Initialization & Properties
170
- #--------------------------------------------------
171
-
172
- def _register_handlers(self) -> None:
173
- """Register error and engine state handlers for processing."""
174
- # Register base handlers using getter methods that subclasses can override
175
- # Use defensive copying to ensure each instance has its own handler lists
176
- # and prevent cross-instance contamination from subclass mutations
177
- self._error_handlers = list(self._get_error_handlers())
178
- self._sync_engine_state_handlers = list(self._get_engine_state_handlers(is_async=False))
179
- self._async_engine_state_handlers = list(self._get_engine_state_handlers(is_async=True))
180
-
181
- def _get_error_handlers(self) -> list[ErrorHandler]:
182
- """Get list of error handlers. Subclasses can override to add custom handlers.
183
-
184
- Returns:
185
- List of error handlers for standard error processing using Strategy Pattern.
186
-
187
- Example:
188
- def _get_error_handlers(self) -> list[ErrorHandler]:
189
- # Get base handlers
190
- handlers = super()._get_error_handlers()
191
- # Add custom handler
192
- handlers.append(MyCustomErrorHandler())
193
- return handlers
194
- """
195
- return [
196
- DuoSecurityErrorHandler(),
197
- AppMissingErrorHandler(),
198
- DatabaseErrorsHandler(),
199
- EngineErrorsHandler(),
200
- ServiceNotStartedErrorHandler(),
201
- TransactionAbortedErrorHandler(),
202
- ]
203
-
204
- def _get_engine_state_handlers(self, is_async: bool = False) -> list[EngineStateHandler]:
205
- """Get list of engine state handlers. Subclasses can override.
206
-
207
- Args:
208
- is_async: If True, returns async handlers; if False, returns sync handlers.
209
-
210
- Returns:
211
- List of engine state handlers for processing engine states.
212
-
213
- Example:
214
- def _get_engine_state_handlers(self, is_async: bool = False) -> list[EngineStateHandler]:
215
- # Get base handlers
216
- handlers = super()._get_engine_state_handlers(is_async)
217
- # Add custom handler
218
- handlers.append(MyCustomStateHandler())
219
- return handlers
220
- """
221
- if is_async:
222
- return [
223
- AsyncPendingStateHandler(),
224
- AsyncSuspendedStateHandler(),
225
- AsyncReadyStateHandler(),
226
- AsyncGoneStateHandler(),
227
- AsyncMissingEngineHandler(),
228
- ]
229
- else:
230
- return [
231
- SyncPendingStateHandler(),
232
- SyncSuspendedStateHandler(),
233
- SyncReadyStateHandler(),
234
- SyncGoneStateHandler(),
235
- SyncMissingEngineHandler(),
236
- ]
237
-
238
- @property
239
- def token_handler(self) -> TokenHandler:
240
- if not self._token_handler:
241
- self._token_handler = TokenHandler.from_config(self.config)
242
- return self._token_handler
243
-
244
- def reset(self):
245
- """Reset the session."""
246
- self._session = None
247
-
248
- #--------------------------------------------------
249
- # Session Management
250
- #--------------------------------------------------
251
-
252
- def is_erp_running(self, app_name: str) -> bool:
253
- """Check if the ERP is running. The app.service_status() returns single row/column containing an array of JSON service status objects."""
254
- query = f"CALL {app_name}.app.service_status();"
255
- try:
256
- result = self._exec(query)
257
- # The result is a list of dictionaries, each with a "STATUS" key
258
- # The column name containing the result is "SERVICE_STATUS"
259
- services_status = json.loads(result[0]["SERVICE_STATUS"])
260
- # Find the dictionary with "name" of "main" and check if its "status" is "READY"
261
- for service in services_status:
262
- if service.get("name") == "main" and service.get("status") == "READY":
263
- return True
264
- return False
265
- except Exception:
266
- return False
267
-
268
- def get_sf_session(self, reset_session: bool = False):
269
- if self._session:
270
- return self._session
271
-
272
- if isinstance(runtime_env, HexEnvironment):
273
- raise HexSessionException()
274
- if isinstance(runtime_env, SnowbookEnvironment):
275
- return get_active_session()
276
- else:
277
- # if there's already been a session created, try using that
278
- # if reset_session is true always try to get the new session
279
- if not reset_session:
280
- try:
281
- return get_active_session()
282
- except Exception:
283
- pass
284
-
285
- # otherwise, create a new session
286
- missing_keys = []
287
- connection_parameters = {}
288
-
289
- authenticator = self.config.get('authenticator', None)
290
- passcode = self.config.get("passcode", "")
291
- private_key_file = self.config.get("private_key_file", "")
292
-
293
- # If the authenticator is not set, we need to set it based on the provided parameters
294
- if authenticator is None:
295
- if private_key_file != "":
296
- authenticator = "snowflake_jwt"
297
- elif passcode != "":
298
- authenticator = "username_password_mfa"
299
- else:
300
- authenticator = "snowflake"
301
- # set the default authenticator in the config so we can skip it when we check for missing keys
302
- self.config.set("authenticator", authenticator)
303
-
304
- if authenticator in SNOWFLAKE_AUTHS:
305
- required_keys = {
306
- key for key, value in SNOWFLAKE_AUTHS[authenticator].items() if value.get("required", True)
307
- }
308
- for key in required_keys:
309
- if self.config.get(key, None) is None:
310
- default = SNOWFLAKE_AUTHS[authenticator][key].get("value", None)
311
- if default is None or default == FIELD_PLACEHOLDER:
312
- # No default value and no value in the config, add to missing keys
313
- missing_keys.append(key)
314
- else:
315
- # Set the default value in the config from the auth defaults
316
- self.config.set(key, default)
317
- if missing_keys:
318
- profile = getattr(self.config, 'profile', None)
319
- config_file_path = getattr(self.config, 'file_path', None)
320
- raise SnowflakeMissingConfigValuesException(missing_keys, profile, config_file_path)
321
- for key in SNOWFLAKE_AUTHS[authenticator]:
322
- connection_parameters[key] = self.config.get(key, None)
323
- else:
324
- raise ValueError(f'Authenticator "{authenticator}" not supported')
325
-
326
- return self._build_snowflake_session(connection_parameters)
327
-
328
- def _build_snowflake_session(self, connection_parameters: Dict[str, Any]) -> Session:
329
- try:
330
- tmp = {
331
- "client_session_keep_alive": True,
332
- "client_session_keep_alive_heartbeat_frequency": 60 * 5,
333
- }
334
- tmp.update(connection_parameters)
335
- connection_parameters = tmp
336
- # authenticator programmatic access token needs to be upper cased to work...
337
- connection_parameters["authenticator"] = connection_parameters["authenticator"].upper()
338
- if "authenticator" in connection_parameters and connection_parameters["authenticator"] == "OAUTH_AUTHORIZATION_CODE":
339
- # we are replicating OAUTH_AUTHORIZATION_CODE by first retrieving the token
340
- # and then authenticating with the token via the OAUTH authenticator
341
- connection_parameters["token"] = self.token_handler.get_session_login_token()
342
- connection_parameters["authenticator"] = "OAUTH"
343
- return Session.builder.configs(connection_parameters).create()
344
- except snowflake.connector.errors.Error as e:
345
- raise SnowflakeDatabaseException(e)
346
- except Exception as e:
347
- raise e
348
-
349
- #--------------------------------------------------
350
- # Core Execution Methods
351
- #--------------------------------------------------
352
-
353
- def _exec_sql(self, code: str, params: List[Any] | None, raw=False):
354
- """
355
- Lowest-level SQL execution method.
356
-
357
- Directly executes SQL via the Snowflake session. This is the foundation
358
- for all other execution methods. It:
359
- - Replaces APP_NAME placeholder with actual app name
360
- - Executes SQL with optional parameters
361
- - Returns either raw session results or collected results
362
-
363
- Args:
364
- code: SQL code to execute (may contain APP_NAME placeholder)
365
- params: Optional SQL parameters
366
- raw: If True, return raw session results; if False, collect results
367
-
368
- Returns:
369
- Raw session results if raw=True, otherwise collected results
370
- """
371
- assert self._session is not None
372
- sess_results = self._session.sql(
373
- code.replace(APP_NAME, self.get_app_name()),
374
- params
375
- )
376
- if raw:
377
- return sess_results
378
- return sess_results.collect()
379
-
380
- def _exec(
381
- self,
382
- code: str,
383
- params: List[Any] | Any | None = None,
384
- raw: bool = False,
385
- help: bool = True,
386
- skip_engine_db_error_retry: bool = False
387
- ) -> Any:
388
- """
389
- Mid-level SQL execution method with error handling.
390
-
391
- This is the primary method for executing SQL queries. It wraps _exec_sql
392
- with comprehensive error handling and parameter normalization. Used
393
- extensively throughout the codebase for direct SQL operations like:
394
- - SHOW commands (warehouses, databases, etc.)
395
- - CALL statements to RAI app stored procedures
396
- - Transaction management queries
397
-
398
- The error handling flow:
399
- 1. Normalizes parameters and creates execution context
400
- 2. Calls _exec_sql to execute the query
401
- 3. On error, uses standard error handling (Strategy Pattern), which subclasses
402
- can influence via `_get_error_handlers()` or by overriding `_handle_standard_exec_errors()`
403
-
404
- Args:
405
- code: SQL code to execute
406
- params: Optional SQL parameters (normalized to list if needed)
407
- raw: If True, return raw session results; if False, collect results
408
- help: If True, enable error handling; if False, raise errors immediately
409
- skip_engine_db_error_retry: If True, skip use_index retry logic in error handlers
410
-
411
- Returns:
412
- Query results (collected or raw depending on 'raw' parameter)
413
- """
414
- # print(f"\n--- sql---\n{code}\n--- end sql---\n")
415
- # Ensure session is initialized
416
- if not self._session:
417
- self._session = self.get_sf_session()
418
-
419
- # Normalize parameters
420
- normalized_params = normalize_params(params)
421
-
422
- # Create execution context
423
- ctx = ExecContext(
424
- code=code,
425
- params=normalized_params,
426
- raw=raw,
427
- help=help,
428
- skip_engine_db_error_retry=skip_engine_db_error_retry
429
- )
430
-
431
- # Execute SQL
432
- try:
433
- return self._exec_sql(ctx.code, ctx.params, raw=ctx.raw)
434
- except Exception as e:
435
- if not ctx.help:
436
- raise e
437
-
438
- # Handle standard errors
439
- result = self._handle_standard_exec_errors(e, ctx)
440
- if result is not None:
441
- return result
442
-
443
- #--------------------------------------------------
444
- # Error Handling
445
- #--------------------------------------------------
446
-
447
- def _handle_standard_exec_errors(self, e: Exception, ctx: ExecContext) -> Any | None:
448
- """
449
- Handle standard Snowflake/RAI errors using Strategy Pattern.
450
-
451
- Each error type has a dedicated handler class that encapsulates
452
- the detection logic and exception creation. Handlers are processed
453
- in order until one matches and handles the error.
454
- """
455
- message = str(e).lower()
456
-
457
- # Try each handler in order until one matches
458
- for handler in self._error_handlers:
459
- if handler.matches(e, message, ctx, self):
460
- result = handler.handle(e, ctx, self)
461
- if result is not None:
462
- return result
463
- return # Handler raised exception, we're done
464
-
465
- # Fallback: transform to RAIException
466
- raise RAIException(str(e))
467
-
468
- #--------------------------------------------------
469
- # Feature Detection & Configuration
470
- #--------------------------------------------------
471
-
472
- def is_direct_access_enabled(self) -> bool:
473
- try:
474
- feature_enabled = self._exec(
475
- f"call {APP_NAME}.APP.DIRECT_INGRESS_ENABLED();"
476
- )
477
- if not feature_enabled:
478
- return False
479
-
480
- # Even if the feature is enabled, customers still need to reactivate ERP to ensure the endpoint is available.
481
- endpoint = self._exec(
482
- f"call {APP_NAME}.APP.SERVICE_ENDPOINT(true);"
483
- )
484
- if not endpoint or endpoint[0][0] is None:
485
- return False
486
-
487
- return feature_enabled[0][0]
488
- except Exception as e:
489
- raise Exception(f"Unable to determine if direct access is enabled. Details error: {e}") from e
490
-
491
-
492
- def is_account_flag_set(self, flag: str) -> bool:
493
- results = self._exec(
494
- f"SHOW PARAMETERS LIKE '%{flag}%' IN ACCOUNT;"
495
- )
496
- if not results:
497
- return False
498
- return results[0]["value"] == "true"
499
-
500
- #--------------------------------------------------
501
- # Databases
502
- #--------------------------------------------------
503
-
504
- def get_database(self, database: str):
505
- try:
506
- results = self._exec(
507
- f"call {APP_NAME}.api.get_database('{database}');"
508
- )
509
- except Exception as e:
510
- messages = collect_error_messages(e)
511
- if any("database does not exist" in msg for msg in messages):
512
- return None
513
- raise e
514
-
515
- if not results:
516
- return None
517
- db = results[0]
518
- if not db:
519
- return None
520
- return {
521
- "id": db["ID"],
522
- "name": db["NAME"],
523
- "created_by": db["CREATED_BY"],
524
- "created_on": db["CREATED_ON"],
525
- "deleted_by": db["DELETED_BY"],
526
- "deleted_on": db["DELETED_ON"],
527
- "state": db["STATE"],
528
- }
529
-
530
- def get_installed_packages(self, database: str) -> Dict | None:
531
- query = f"call {APP_NAME}.api.get_installed_package_versions('{database}');"
532
- try:
533
- results = self._exec(query)
534
- except Exception as e:
535
- messages = collect_error_messages(e)
536
- if any("database does not exist" in msg for msg in messages):
537
- return None
538
- # fallback to None for old sql-lib versions
539
- if any("unknown user-defined function" in msg for msg in messages):
540
- return None
541
- raise e
542
-
543
- if not results:
544
- return None
545
-
546
- row = results[0]
547
- if not row:
548
- return None
549
-
550
- return safe_json_loads(row["PACKAGE_VERSIONS"])
551
-
552
- #--------------------------------------------------
553
- # Engines
554
- #--------------------------------------------------
555
-
556
- def _prepare_engine_params(
557
- self,
558
- name: str | None,
559
- size: str | None,
560
- use_default_size: bool = False
561
- ) -> tuple[str, str | None]:
562
- """
563
- Prepare engine parameters by resolving and validating name and size.
564
-
565
- Args:
566
- name: Engine name (None to use default)
567
- size: Engine size (None to use config or default)
568
- use_default_size: If True and size is None, use get_default_engine_size()
569
-
570
- Returns:
571
- Tuple of (engine_name, engine_size)
572
-
573
- Raises:
574
- EngineNameValidationException: If engine name is invalid
575
- Exception: If engine size is invalid
576
- """
577
- from relationalai.tools.cli_helpers import validate_engine_name
578
-
579
- engine_name = name or self.get_default_engine_name()
580
-
581
- # Resolve engine size
582
- if size:
583
- engine_size = size
584
- else:
585
- if use_default_size:
586
- engine_size = self.config.get_default_engine_size()
587
- else:
588
- engine_size = self.config.get("engine_size", None)
589
-
590
- # Validate engine size
591
- if engine_size:
592
- is_size_valid, sizes = self.validate_engine_size(engine_size)
593
- if not is_size_valid:
594
- error_msg = f"Invalid engine size '{engine_size}'. Valid sizes are: {', '.join(sizes)}"
595
- if use_default_size:
596
- error_msg = f"Invalid engine size in config: '{engine_size}'. Valid sizes are: {', '.join(sizes)}"
597
- raise Exception(error_msg)
598
-
599
- # Validate engine name
600
- is_name_valid, _ = validate_engine_name(engine_name)
601
- if not is_name_valid:
602
- raise EngineNameValidationException(engine_name)
603
-
604
- return engine_name, engine_size
605
-
606
- def _get_state_handler(self, state: str | None, handlers: list[EngineStateHandler]) -> EngineStateHandler:
607
- """Find the appropriate state handler for the given state."""
608
- for handler in handlers:
609
- if handler.handles_state(state):
610
- return handler
611
- # Fallback to missing engine handler if no match
612
- return handlers[-1] # Last handler should be MissingEngineHandler
613
-
614
- def _process_engine_state(
615
- self,
616
- engine: EngineState | Dict[str, Any] | None,
617
- context: EngineContext,
618
- handlers: list[EngineStateHandler],
619
- set_active_on_success: bool = False
620
- ) -> EngineState | Dict[str, Any] | None:
621
- """
622
- Process engine state using appropriate state handler.
623
-
624
- Args:
625
- engine: Current engine state (or None if missing)
626
- context: Engine context for state handling
627
- handlers: List of state handlers to use (sync or async)
628
- set_active_on_success: If True, set engine as active when handler returns engine
629
-
630
- Returns:
631
- Engine state after processing, or None if engine needs to be created
632
- """
633
- # Find and execute appropriate state handler
634
- state = engine["state"] if engine else None
635
- handler = self._get_state_handler(state, handlers)
636
- engine = handler.handle(engine, context, self)
637
-
638
- # If handler returned None and we didn't start with None state, engine needs to be created
639
- # (e.g., GONE state deleted the engine, so we need to create a new one)
640
- if not engine and state is not None:
641
- handler = self._get_state_handler(None, handlers)
642
- handler.handle(None, context, self)
643
- elif set_active_on_success:
644
- # Cast to EngineState for type safety (handlers return EngineDict which is compatible)
645
- self._set_active_engine(cast(EngineState, engine))
646
-
647
- return engine
648
-
649
- def _handle_engine_creation_errors(self, error: Exception, engine_name: str, preserve_rai_exception: bool = False) -> None:
650
- """
651
- Handle errors during engine creation using error handlers.
652
-
653
- Args:
654
- error: The exception that occurred
655
- engine_name: Name of the engine being created
656
- preserve_rai_exception: If True, re-raise RAIException without wrapping
657
-
658
- Raises:
659
- RAIException: If preserve_rai_exception is True and error is RAIException
660
- EngineProvisioningFailed: If error is not handled by error handlers
661
- """
662
- # Preserve RAIException passthrough if requested (for async mode)
663
- if preserve_rai_exception and isinstance(error, RAIException):
664
- raise error
665
-
666
- # Check if this is a known error type that should be handled by error handlers
667
- message = str(error).lower()
668
- handled = False
669
- # Engine creation isn't tied to a specific SQL ExecContext; pass a context that
670
- # disables use_index retry behavior (and any future ctx-dependent handlers).
671
- ctx = ExecContext(code="", help=True, skip_engine_db_error_retry=True)
672
- for handler in self._error_handlers:
673
- if handler.matches(error, message, ctx, self):
674
- handler.handle(error, ctx, self)
675
- handled = True
676
- break # Handler raised exception, we're done
677
-
678
- # If not handled by error handlers, wrap in EngineProvisioningFailed
679
- if not handled:
680
- raise EngineProvisioningFailed(engine_name, error) from error
681
-
682
- def validate_engine_size(self, size: str) -> Tuple[bool, List[str]]:
683
- if size is not None:
684
- sizes = self.get_engine_sizes()
685
- if size not in sizes:
686
- return False, sizes
687
- return True, []
688
-
689
- def get_engine_sizes(self, cloud_provider: str|None=None):
690
- sizes = []
691
- if cloud_provider is None:
692
- cloud_provider = self.get_cloud_provider()
693
- if cloud_provider == 'azure':
694
- sizes = ENGINE_SIZES_AZURE
695
- else:
696
- sizes = ENGINE_SIZES_AWS
697
- if self.config.show_all_engine_sizes():
698
- return INTERNAL_ENGINE_SIZES + sizes
699
- else:
700
- return sizes
701
-
702
- def list_engines(self, state: str | None = None):
703
- where_clause = f"WHERE STATUS = '{state.upper()}'" if state else ""
704
- statement = f"SELECT NAME, ID, SIZE, STATUS, CREATED_BY, CREATED_ON, UPDATED_ON FROM {APP_NAME}.api.engines {where_clause} ORDER BY NAME ASC;"
705
- results = self._exec(statement)
706
- if not results:
707
- return []
708
- return [
709
- {
710
- "name": row["NAME"],
711
- "id": row["ID"],
712
- "size": row["SIZE"],
713
- "state": row["STATUS"], # callers are expecting 'state'
714
- "created_by": row["CREATED_BY"],
715
- "created_on": row["CREATED_ON"],
716
- "updated_on": row["UPDATED_ON"],
717
- }
718
- for row in results
719
- ]
720
-
721
- def get_engine(self, name: str):
722
- results = self._exec(
723
- f"SELECT NAME, ID, SIZE, STATUS, CREATED_BY, CREATED_ON, UPDATED_ON, VERSION, AUTO_SUSPEND_MINS, SUSPENDS_AT FROM {APP_NAME}.api.engines WHERE NAME='{name}';"
724
- )
725
- if not results:
726
- return None
727
- engine = results[0]
728
- if not engine:
729
- return None
730
- engine_state: EngineState = {
731
- "name": engine["NAME"],
732
- "id": engine["ID"],
733
- "size": engine["SIZE"],
734
- "state": engine["STATUS"], # callers are expecting 'state'
735
- "created_by": engine["CREATED_BY"],
736
- "created_on": engine["CREATED_ON"],
737
- "updated_on": engine["UPDATED_ON"],
738
- "version": engine["VERSION"],
739
- "auto_suspend": engine["AUTO_SUSPEND_MINS"],
740
- "suspends_at": engine["SUSPENDS_AT"]
741
- }
742
- return engine_state
743
-
744
- def get_default_engine_name(self) -> str:
745
- if self.config.get("engine_name", None) is not None:
746
- profile = self.config.profile
747
- raise InvalidAliasError(f"""
748
- 'engine_name' is not a valid config option.
749
- If you meant to use a specific engine, use 'engine' instead.
750
- Otherwise, remove it from your '{profile}' configuration profile.
751
- """)
752
- engine = self.config.get("engine", None)
753
- if not engine and self.config.get("user", None):
754
- engine = _sanitize_user_name(str(self.config.get("user")))
755
- if not engine:
756
- engine = self.get_user_based_engine_name()
757
- self.config.set("engine", engine)
758
- return engine
759
-
760
- def is_valid_engine_state(self, name:str):
761
- return name in VALID_ENGINE_STATES
762
-
763
- def _create_engine(
764
- self,
765
- name: str,
766
- size: str | None = None,
767
- auto_suspend_mins: int | None= None,
768
- is_async: bool = False,
769
- headers: Dict | None = None,
770
- ):
771
- api = "create_engine_async" if is_async else "create_engine"
772
- if size is None:
773
- size = self.config.get_default_engine_size()
774
- # if auto_suspend_mins is None, get the default value from the config
775
- if auto_suspend_mins is None:
776
- auto_suspend_mins = self.config.get_default_auto_suspend_mins()
777
- try:
778
- headers = debugging.gen_current_propagation_headers()
779
- with debugging.span(api, name=name, size=size, auto_suspend_mins=auto_suspend_mins):
780
- # check in case the config default is missing
781
- if auto_suspend_mins is None:
782
- self._exec(f"call {APP_NAME}.api.{api}('{name}', '{size}', null, {headers});")
783
- else:
784
- self._exec(f"call {APP_NAME}.api.{api}('{name}', '{size}', PARSE_JSON('{{\"auto_suspend_mins\": {auto_suspend_mins}}}'), {headers});")
785
- except Exception as e:
786
- raise EngineProvisioningFailed(name, e) from e
787
-
788
- def create_engine(self, name:str, size:str|None=None, auto_suspend_mins:int|None=None, headers: Dict | None = None):
789
- self._create_engine(name, size, auto_suspend_mins, headers=headers)
790
-
791
- def create_engine_async(self, name:str, size:str|None=None, auto_suspend_mins:int|None=None):
792
- self._create_engine(name, size, auto_suspend_mins, True)
793
-
794
- def delete_engine(self, name:str, force:bool = False, headers: Dict | None = None):
795
- request_headers = debugging.add_current_propagation_headers(headers)
796
- self._exec(f"call {APP_NAME}.api.delete_engine('{name}', {force},{request_headers});")
797
-
798
- def suspend_engine(self, name:str):
799
- self._exec(f"call {APP_NAME}.api.suspend_engine('{name}');")
800
-
801
- def resume_engine(self, name:str, headers: Dict | None = None) -> Dict:
802
- request_headers = debugging.add_current_propagation_headers(headers)
803
- self._exec(f"call {APP_NAME}.api.resume_engine('{name}',{request_headers});")
804
- # returning empty dict to match the expected return type
805
- return {}
806
-
807
- def resume_engine_async(self, name:str, headers: Dict | None = None) -> Dict:
808
- if headers is None:
809
- headers = {}
810
- self._exec(f"call {APP_NAME}.api.resume_engine_async('{name}',{headers});")
811
- # returning empty dict to match the expected return type
812
- return {}
813
-
814
- def alter_engine_pool(self, size:str|None=None, mins:int|None=None, maxs:int|None=None):
815
- """Alter engine pool node limits for Snowflake."""
816
- self._exec(f"call {APP_NAME}.api.alter_engine_pool_node_limits('{size}', {mins}, {maxs});")
817
-
818
- #--------------------------------------------------
819
- # Graphs
820
- #--------------------------------------------------
821
-
822
- def list_graphs(self) -> List[AvailableModel]:
823
- with debugging.span("list_models"):
824
- query = textwrap.dedent(f"""
825
- SELECT NAME, ID, CREATED_BY, CREATED_ON, STATE, DELETED_BY, DELETED_ON
826
- FROM {APP_NAME}.api.databases
827
- WHERE state <> 'DELETED'
828
- ORDER BY NAME ASC;
829
- """)
830
- results = self._exec(query)
831
- if not results:
832
- return []
833
- return [
834
- {
835
- "name": row["NAME"],
836
- "id": row["ID"],
837
- "created_by": row["CREATED_BY"],
838
- "created_on": row["CREATED_ON"],
839
- "state": row["STATE"],
840
- "deleted_by": row["DELETED_BY"],
841
- "deleted_on": row["DELETED_ON"],
842
- }
843
- for row in results
844
- ]
845
-
846
- def get_graph(self, name: str):
847
- res = self.get_database(name)
848
- if res and res.get("state") != "DELETED":
849
- return res
850
-
851
- def create_graph(self, name: str):
852
- with debugging.span("create_model", name=name):
853
- self._exec(f"call {APP_NAME}.api.create_database('{name}', false, {debugging.gen_current_propagation_headers()});")
854
-
855
- def delete_graph(self, name:str, force=False, language:str="rel"):
856
- prop_hdrs = debugging.gen_current_propagation_headers()
857
- if self.config.get("use_graph_index", USE_GRAPH_INDEX):
858
- keep_database = not force and self.config.get("reuse_model", True)
859
- with debugging.span("release_index", name=name, keep_database=keep_database, language=language):
860
- #TODO add headers to release_index
861
- response = self._exec(f"call {APP_NAME}.api.release_index('{name}', OBJECT_CONSTRUCT('keep_database', {keep_database}, 'language', '{language}', 'user_agent', '{get_pyrel_version(self.generation)}'));")
862
- if response:
863
- result = next(iter(response))
864
- obj = json.loads(result["RELEASE_INDEX"])
865
- error = obj.get('error', None)
866
- if error and "Model database not found" not in error:
867
- raise Exception(f"Error releasing index: {error}")
868
- else:
869
- raise Exception("There was no response from the release index call.")
870
- else:
871
- with debugging.span("delete_model", name=name):
872
- self._exec(f"call {APP_NAME}.api.delete_database('{name}', false, {prop_hdrs});")
873
-
874
- def clone_graph(self, target_name:str, source_name:str, nowait_durable=True, force=False):
875
- if force and self.get_graph(target_name):
876
- self.delete_graph(target_name)
877
- with debugging.span("clone_model", target_name=target_name, source_name=source_name):
878
- # not a mistake: the clone_database argument order is indeed target then source:
879
- headers = debugging.gen_current_propagation_headers()
880
- self._exec(f"call {APP_NAME}.api.clone_database('{target_name}', '{source_name}', {nowait_durable}, {headers});")
881
-
882
- def _poll_use_index(
883
- self,
884
- app_name: str,
885
- sources: Iterable[str],
886
- model: str,
887
- engine_name: str,
888
- engine_size: str | None = None,
889
- program_span_id: str | None = None,
890
- headers: Dict | None = None,
891
- ) -> None:
892
- """
893
- Poll use_index to prepare indices for the given sources.
894
-
895
- This is an optional interface method. Base Resources provides a no-op implementation.
896
- UseIndexResources and DirectAccessResources override this to provide actual polling.
897
-
898
- Returns:
899
- None for base implementation. Child classes may return poller results.
900
- """
901
- return None
902
-
903
- def maybe_poll_use_index(
904
- self,
905
- app_name: str,
906
- sources: Iterable[str],
907
- model: str,
908
- engine_name: str,
909
- engine_size: str | None = None,
910
- program_span_id: str | None = None,
911
- headers: Dict | None = None,
912
- ) -> None:
913
- """
914
- Only call _poll_use_index if there are sources to process.
915
-
916
- This is an optional interface method. Base Resources provides a no-op implementation.
917
- UseIndexResources and DirectAccessResources override this to provide actual polling with caching.
918
-
919
- Returns:
920
- None for base implementation. Child classes may return poller results.
921
- """
922
- return None
923
-
924
- #--------------------------------------------------
925
- # Models
926
- #--------------------------------------------------
927
-
928
- def list_models(self, database: str, engine: str):
929
- pass
930
-
931
- def create_models(self, database: str, engine: str | None, models:List[Tuple[str, str]]) -> List[Any]:
932
- rel_code = self.create_models_code(models)
933
- self.exec_raw(database, engine, rel_code, readonly=False)
934
- # TODO: handle SPCS errors once they're figured out
935
- return []
936
-
937
- def delete_model(self, database:str, engine:str | None, name:str):
938
- self.exec_raw(database, engine, f"def delete[:rel, :catalog, :model, \"{name}\"]: rel[:catalog, :model, \"{name}\"]", readonly=False)
939
-
940
- def create_models_code(self, models:List[Tuple[str, str]]) -> str:
941
- lines = []
942
- for (name, code) in models:
943
- name = name.replace("\"", "\\\"")
944
- assert "\"\"\"\"\"\"\"" not in code, "Code literals must use fewer than 7 quotes."
945
-
946
- lines.append(textwrap.dedent(f"""
947
- def delete[:rel, :catalog, :model, "{name}"]: rel[:catalog, :model, "{name}"]
948
- def insert[:rel, :catalog, :model, "{name}"]: raw\"\"\"\"\"\"\"
949
- """) + code + "\n\"\"\"\"\"\"\"")
950
- rel_code = "\n\n".join(lines)
951
- return rel_code
952
-
953
- #--------------------------------------------------
954
- # Exports
955
- #--------------------------------------------------
956
-
957
- def list_exports(self, database: str, engine: str):
958
- return []
959
-
960
-
961
- def get_export_code(self, params: ExportParams, all_installs):
962
- sql_inputs = ", ".join([f"{name} {type_to_sql(type)}" for (name, _, type) in params.inputs])
963
- input_names = [name for (name, *_) in params.inputs]
964
- has_return_hint = params.out_fields and isinstance(params.out_fields[0], tuple)
965
- if has_return_hint:
966
- sql_out = ", ".join([f"\"{name}\" {type_to_sql(type)}" for (name, type) in params.out_fields])
967
- sql_out_names = ", ".join([f"('{name}', '{type_to_sql(type)}')" for (ix, (name, type)) in enumerate(params.out_fields)])
968
- py_outs = ", ".join([f"StructField(\"{name}\", {type_to_snowpark(type)})" for (name, type) in params.out_fields])
969
- else:
970
- sql_out = ""
971
- sql_out_names = ", ".join([f"'{name}'" for name in params.out_fields])
972
- py_outs = ", ".join([f"StructField(\"{name}\", {type_to_snowpark(str)})" for name in params.out_fields])
973
- py_inputs = ", ".join([name for (name, *_) in params.inputs])
974
- safe_rel = escape_for_f_string(params.code).strip()
975
- clean_inputs = []
976
- for (name, var, type) in params.inputs:
977
- if type is str:
978
- clean_inputs.append(f"{name} = '\"' + escape({name}) + '\"'")
979
- # Replace `var` with `name` and keep the following non-word character unchanged
980
- pattern = re.compile(re.escape(var) + r'(\W)')
981
- value = format_sproc_name(name, type)
982
- safe_rel = re.sub(pattern, rf"{{{value}}}\1", safe_rel)
983
- if py_inputs:
984
- py_inputs = f", {py_inputs}"
985
- clean_inputs = ("\n").join(clean_inputs)
986
- file = "export_procedure.py.jinja"
987
- with importlib.resources.open_text(
988
- "relationalai.clients.resources.snowflake", file
989
- ) as f:
990
- template = f.read()
991
- def quote(s: str, f = False) -> str:
992
- return '"' + s + '"' if not f else 'f"' + s + '"'
993
-
994
- wait_for_stream_sync = self.config.get("wait_for_stream_sync", WAIT_FOR_STREAM_SYNC)
995
- # 1. Check the sources for staled sources
996
- # 2. Get the object references for the sources
997
- # TODO: this could be optimized to do it in the run time of the stored procedure
998
- # instead of doing it here. It will make it more reliable when sources are
999
- # modified after the stored procedure is created.
1000
- checked_sources = self._check_source_updates(self.sources)
1001
- source_obj_references = self._get_source_references(checked_sources)
1002
-
1003
- # Escape double quotes in the source object references
1004
- escaped_source_obj_references = [source.replace('"', '\\"') for source in source_obj_references]
1005
- escaped_proc_database = params.proc_database.replace('"', '\\"')
1006
-
1007
- normalized_func_name = IdentityParser(params.func_name).identity
1008
- assert normalized_func_name is not None, "Function name must be set"
1009
- skip_invalid_data = params.skip_invalid_data
1010
- python_code = process_jinja_template(
1011
- template,
1012
- func_name=quote(normalized_func_name),
1013
- database=quote(params.root_database),
1014
- proc_database=quote(escaped_proc_database),
1015
- engine=quote(params.engine),
1016
- rel_code=quote(safe_rel, f=True),
1017
- APP_NAME=quote(APP_NAME),
1018
- input_names=input_names,
1019
- outputs=sql_out,
1020
- sql_out_names=sql_out_names,
1021
- clean_inputs=clean_inputs,
1022
- py_inputs=py_inputs,
1023
- py_outs=py_outs,
1024
- skip_invalid_data=skip_invalid_data,
1025
- source_references=", ".join(escaped_source_obj_references),
1026
- install_code=all_installs.replace("\\", "\\\\").replace("\n", "\\n"),
1027
- has_return_hint=has_return_hint,
1028
- wait_for_stream_sync=wait_for_stream_sync,
1029
- ).strip()
1030
- return_clause = f"TABLE({sql_out})" if sql_out else "STRING"
1031
- destination_input = "" if sql_out else "save_as_table STRING DEFAULT NULL,"
1032
- module_name = sanitize_module_name(normalized_func_name)
1033
- stage = f"@{self.get_app_name()}.app_state.stored_proc_code_stage"
1034
- file_loc = f"{stage}/{module_name}.py"
1035
- python_code = python_code.replace(APP_NAME, self.get_app_name())
1036
-
1037
- hash = hashlib.sha256()
1038
- hash.update(python_code.encode('utf-8'))
1039
- code_hash = hash.hexdigest()
1040
- print(code_hash)
1041
-
1042
- sql_code = textwrap.dedent(f"""
1043
- CREATE OR REPLACE PROCEDURE {normalized_func_name}({sql_inputs}{sql_inputs and ',' or ''} {destination_input} engine STRING DEFAULT NULL)
1044
- RETURNS {return_clause}
1045
- LANGUAGE PYTHON
1046
- RUNTIME_VERSION = '3.10'
1047
- IMPORTS = ('{file_loc}')
1048
- PACKAGES = ('snowflake-snowpark-python')
1049
- HANDLER = 'checked_handle'
1050
- EXECUTE AS CALLER
1051
- AS
1052
- $$
1053
- import {module_name}
1054
- import inspect, hashlib, os, sys
1055
- def checked_handle(*args, **kwargs):
1056
- import_dir = sys._xoptions["snowflake_import_directory"]
1057
- wheel_path = os.path.join(import_dir, '{module_name}.py')
1058
- h = hashlib.sha256()
1059
- with open(wheel_path, 'rb') as f:
1060
- for chunk in iter(lambda: f.read(1<<20), b''):
1061
- h.update(chunk)
1062
- code_hash = h.hexdigest()
1063
- if code_hash != '{code_hash}':
1064
- raise RuntimeError("Code hash mismatch. The code has been modified since it was uploaded.")
1065
- # Call the handle function with the provided arguments
1066
- return {module_name}.handle(*args, **kwargs)
1067
-
1068
- $$;
1069
- """)
1070
- # print(f"\n--- python---\n{python_code}\n--- end python---\n")
1071
- # This check helps catch invalid code early and for dry runs:
1072
- try:
1073
- ast.parse(python_code)
1074
- except SyntaxError:
1075
- raise ValueError(f"Internal error: invalid Python code generated:\n{python_code}")
1076
- return (sql_code, python_code, file_loc)
1077
-
1078
- def get_sproc_models(self, params: ExportParams):
1079
- if self._sproc_models is not None:
1080
- return self._sproc_models
1081
-
1082
- with debugging.span("get_sproc_models"):
1083
- code = """
1084
- def output(name, model):
1085
- rel(:catalog, :model, name, model)
1086
- and not starts_with(name, "rel/")
1087
- and not starts_with(name, "pkg/rel")
1088
- and not starts_with(name, "pkg/std")
1089
- and starts_with(name, "pkg/")
1090
- """
1091
- res = self.exec_raw(params.model_database, params.engine, code, readonly=True, nowait_durable=True)
1092
- df, errors = result_helpers.format_results(res, None, ["name", "model"])
1093
- models = []
1094
- for row in df.itertuples():
1095
- models.append((row.name, row.model))
1096
- self._sproc_models = models
1097
- return models
1098
-
1099
- def create_export(self, params: ExportParams):
1100
- with debugging.span("create_export") as span:
1101
- if params.dry_run:
1102
- (sql_code, python_code, file_loc) = self.get_export_code(params, params.install_code)
1103
- span["sql"] = sql_code
1104
- return
1105
-
1106
- start = time.perf_counter()
1107
- use_graph_index = self.config.get("use_graph_index", USE_GRAPH_INDEX)
1108
- # for the non graph index case we need to create the cloned proc database
1109
- if not use_graph_index:
1110
- raise RAIException(
1111
- "To ensure permissions are properly accounted for, stored procedures require using the graph index. "
1112
- "Set use_graph_index=True in your config to proceed."
1113
- )
1114
-
1115
- models = self.get_sproc_models(params)
1116
- lib_installs = self.create_models_code(models)
1117
- all_installs = lib_installs + "\n\n" + params.install_code
1118
-
1119
- (sql_code, python_code, file_loc) = self.get_export_code(params, all_installs)
1120
-
1121
- span["sql"] = sql_code
1122
- assert self._session
1123
-
1124
- with debugging.span("upload_sproc_code"):
1125
- code_bytes = python_code.encode('utf-8')
1126
- code_stream = io.BytesIO(code_bytes)
1127
- self._session.file.put_stream(code_stream, file_loc, auto_compress=False, overwrite=True)
1128
-
1129
- with debugging.span("sql_install"):
1130
- self._exec(sql_code)
1131
-
1132
- debugging.time("export", time.perf_counter() - start, DataFrame(), code=sql_code.replace(APP_NAME, self.get_app_name()))
1133
-
1134
-
1135
- def create_export_table(self, database: str, engine: str, table: str, relation: str, columns: Dict[str, str], code: str, refresh: str|None=None):
1136
- print("Snowflake doesn't support creating export tables yet. Try creating the table manually first.")
1137
- pass
1138
-
1139
- def delete_export(self, database: str, engine: str, name: str):
1140
- pass
1141
-
1142
- #--------------------------------------------------
1143
- # Imports
1144
- #--------------------------------------------------
1145
-
1146
-
1147
- def change_stream_status(self, stream_id: str, model:str, suspend: bool):
1148
- if stream_id and model:
1149
- if suspend:
1150
- self._exec(f"CALL {APP_NAME}.api.suspend_data_stream('{stream_id}', '{model}');")
1151
- else:
1152
- self._exec(f"CALL {APP_NAME}.api.resume_data_stream('{stream_id}', '{model}');")
1153
-
1154
- def change_imports_status(self, suspend: bool):
1155
- if suspend:
1156
- self._exec(f"CALL {APP_NAME}.app.suspend_cdc();")
1157
- else:
1158
- self._exec(f"CALL {APP_NAME}.app.resume_cdc();")
1159
-
1160
- def get_imports_status(self) -> ImportsStatus|None:
1161
- # NOTE: We expect there to only ever be one result?
1162
- results = self._exec(f"CALL {APP_NAME}.app.cdc_status();")
1163
- if results:
1164
- result = next(iter(results))
1165
- engine = result['CDC_ENGINE_NAME']
1166
- engine_status = result['CDC_ENGINE_STATUS']
1167
- engine_size = result['CDC_ENGINE_SIZE']
1168
- task_status = result['CDC_TASK_STATUS']
1169
- info = result['CDC_TASK_INFO']
1170
- enabled = result['CDC_ENABLED']
1171
- return {"engine": engine, "engine_size": engine_size, "engine_status": engine_status, "status": task_status, "enabled": enabled, "info": info }
1172
- return None
1173
-
1174
- def set_imports_engine_size(self, size:str):
1175
- try:
1176
- self._exec(f"CALL {APP_NAME}.app.alter_cdc_engine_size('{size}');")
1177
- except Exception as e:
1178
- raise e
1179
-
1180
- def list_imports(
1181
- self,
1182
- id:str|None = None,
1183
- name:str|None = None,
1184
- model:str|None = None,
1185
- status:str|None = None,
1186
- creator:str|None = None,
1187
- ) -> list[Import]:
1188
- where = []
1189
- if id and isinstance(id, str):
1190
- where.append(f"LOWER(ID) = '{id.lower()}'")
1191
- if name and isinstance(name, str):
1192
- where.append(f"LOWER(FQ_OBJECT_NAME) = '{name.lower()}'")
1193
- if model and isinstance(model, str):
1194
- where.append(f"LOWER(RAI_DATABASE) = '{model.lower()}'")
1195
- if creator and isinstance(creator, str):
1196
- where.append(f"LOWER(CREATED_BY) = '{creator.lower()}'")
1197
- if status and isinstance(status, str):
1198
- where.append(f"LOWER(batch_status) = '{status.lower()}'")
1199
- where_clause = " AND ".join(where)
1200
-
1201
- # This is roughly inspired by the native app code because we don't have a way to
1202
- # get the status of multiple streams at once and doing them individually is way
1203
- # too slow. We use window functions to get the status of the stream and the batch
1204
- # details.
1205
- statement = f"""
1206
- SELECT
1207
- ID,
1208
- RAI_DATABASE,
1209
- FQ_OBJECT_NAME,
1210
- CREATED_AT,
1211
- CREATED_BY,
1212
- CASE
1213
- WHEN nextBatch.quarantined > 0 THEN 'quarantined'
1214
- ELSE nextBatch.status
1215
- END as batch_status,
1216
- nextBatch.processing_errors,
1217
- nextBatch.batches
1218
- FROM {APP_NAME}.api.data_streams as ds
1219
- LEFT JOIN (
1220
- SELECT DISTINCT
1221
- data_stream_id,
1222
- -- Get status from the progress record using window functions
1223
- FIRST_VALUE(status) OVER (
1224
- PARTITION BY data_stream_id
1225
- ORDER BY
1226
- CASE WHEN unloaded IS NOT NULL THEN 1 ELSE 0 END DESC,
1227
- unloaded ASC
1228
- ) as status,
1229
- -- Get batch_details from the same record
1230
- FIRST_VALUE(batch_details) OVER (
1231
- PARTITION BY data_stream_id
1232
- ORDER BY
1233
- CASE WHEN unloaded IS NOT NULL THEN 1 ELSE 0 END DESC,
1234
- unloaded ASC
1235
- ) as batch_details,
1236
- -- Aggregate the other fields
1237
- FIRST_VALUE(processing_details:processingErrors) OVER (
1238
- PARTITION BY data_stream_id
1239
- ORDER BY
1240
- CASE WHEN unloaded IS NOT NULL THEN 1 ELSE 0 END DESC,
1241
- unloaded ASC
1242
- ) as processing_errors,
1243
- MIN(unloaded) OVER (PARTITION BY data_stream_id) as unloaded,
1244
- COUNT(*) OVER (PARTITION BY data_stream_id) as batches,
1245
- COUNT_IF(status = 'quarantined') OVER (PARTITION BY data_stream_id) as quarantined
1246
- FROM {APP_NAME}.api.data_stream_batches
1247
- ) nextBatch
1248
- ON ds.id = nextBatch.data_stream_id
1249
- {f"where {where_clause}" if where_clause else ""}
1250
- ORDER BY FQ_OBJECT_NAME ASC;
1251
- """
1252
- results = self._exec(statement)
1253
- items = []
1254
- if results:
1255
- for stream in results:
1256
- (id, db, name, created_at, created_by, status, processing_errors, batches) = stream
1257
- if status and isinstance(status, str):
1258
- status = status.upper()
1259
- if processing_errors:
1260
- if status in ["QUARANTINED", "PENDING"]:
1261
- start = processing_errors.rfind("Error")
1262
- if start != -1:
1263
- processing_errors = processing_errors[start:-1]
1264
- else:
1265
- processing_errors = None
1266
- items.append(cast(Import, {
1267
- "id": id,
1268
- "model": db,
1269
- "name": name,
1270
- "created": created_at,
1271
- "creator": created_by,
1272
- "status": status.upper() if status else None,
1273
- "errors": processing_errors if processing_errors != "[]" else None,
1274
- "batches": f"{batches}" if batches else "",
1275
- }))
1276
- return items
1277
-
1278
- def poll_imports(self, sources:List[str], model:str):
1279
- source_set = self._create_source_set(sources)
1280
- def check_imports():
1281
- imports = [
1282
- import_
1283
- for import_ in self.list_imports(model=model)
1284
- if import_["name"] in source_set
1285
- ]
1286
- # loop through printing status for each in the format (index): (name) - (status)
1287
- statuses = [import_["status"] for import_ in imports]
1288
- if all(status == "LOADED" for status in statuses):
1289
- return True
1290
- if any(status == "QUARANTINED" for status in statuses):
1291
- failed_imports = [import_["name"] for import_ in imports if import_["status"] == "QUARANTINED"]
1292
- raise RAIException("Imports failed:" + ", ".join(failed_imports)) from None
1293
- # this check is necessary in case some of the tables are empty;
1294
- # such tables may be synced even though their status is None:
1295
- def synced(import_):
1296
- if import_["status"] == "LOADED":
1297
- return True
1298
- if import_["status"] is None:
1299
- import_full_status = self.get_import_stream(import_["name"], model)
1300
- if import_full_status and import_full_status[0]["data_sync_status"] == "SYNCED":
1301
- return True
1302
- return False
1303
- if all(synced(import_) for import_ in imports):
1304
- return True
1305
- poll_with_specified_overhead(check_imports, overhead_rate=0.1, max_delay=10)
1306
-
1307
- def _create_source_set(self, sources: List[str]) -> set:
1308
- return {
1309
- source.upper() if not IdentityParser(source).has_double_quoted_identifier else IdentityParser(source).identity
1310
- for source in sources
1311
- }
1312
-
1313
- def get_import_stream(self, name:str|None, model:str|None):
1314
- results = self._exec(f"CALL {APP_NAME}.api.get_data_stream('{name}', '{model}');")
1315
- if not results:
1316
- return None
1317
- return imports_to_dicts(results)
1318
-
1319
- def create_import_stream(self, source:ImportSource, model:str, rate = 1, options: dict|None = None):
1320
- assert isinstance(source, ImportSourceTable), "Snowflake integration only supports loading from SF Tables. Try loading your data as a table via the Snowflake interface first."
1321
- object = source.fqn
1322
-
1323
- # Parse only to the schema level
1324
- schemaParser = IdentityParser(f"{source.database}.{source.schema}")
1325
-
1326
- if object.lower() in [x["name"].lower() for x in self.list_imports(model=model)]:
1327
- return
1328
-
1329
- query = f"SHOW OBJECTS LIKE '{source.table}' IN {schemaParser.identity}"
1330
-
1331
- info = self._exec(query)
1332
- if not info:
1333
- raise ValueError(f"Object {source.table} not found in schema {schemaParser.identity}")
1334
- else:
1335
- data = info[0]
1336
- if not data:
1337
- raise ValueError(f"Object {source.table} not found in {schemaParser.identity}")
1338
- # (time, name, db_name, schema_name, kind, *rest)
1339
- kind = data["kind"]
1340
-
1341
- relation_name = to_fqn_relation_name(object)
1342
-
1343
- command = f"""call {APP_NAME}.api.create_data_stream(
1344
- {APP_NAME}.api.object_reference('{kind}', '{object}'),
1345
- '{model}',
1346
- '{relation_name}');"""
1347
-
1348
- def create_stream(tracking_just_changed=False):
1349
- try:
1350
- self._exec(command)
1351
- except Exception as e:
1352
- messages = collect_error_messages(e)
1353
- if any("ensure that change_tracking is enabled on the source object" in msg for msg in messages):
1354
- if self.config.get("ensure_change_tracking", False) and not tracking_just_changed:
1355
- try:
1356
- self._exec(f"ALTER {kind} {object} SET CHANGE_TRACKING = TRUE;")
1357
- create_stream(tracking_just_changed=True)
1358
- except Exception:
1359
- pass
1360
- else:
1361
- print("\n")
1362
- exception = SnowflakeChangeTrackingNotEnabledException((object, kind))
1363
- raise exception from None
1364
- elif any("database does not exist" in msg for msg in messages):
1365
- print("\n")
1366
- raise ModelNotFoundException(model) from None
1367
- raise e
1368
-
1369
- create_stream()
1370
-
1371
- def create_import_snapshot(self, source:ImportSource, model:str, options: dict|None = None):
1372
- raise Exception("Snowflake integration doesn't support snapshot imports yet")
1373
-
1374
- def delete_import(self, import_name:str, model:str, force = False):
1375
- engine = self.get_default_engine_name()
1376
- rel_name = to_fqn_relation_name(import_name)
1377
- try:
1378
- self._exec(f"""call {APP_NAME}.api.delete_data_stream(
1379
- '{import_name}',
1380
- '{model}'
1381
- );""")
1382
- except RAIException as err:
1383
- if "streams do not exist" not in str(err) or not force:
1384
- raise
1385
-
1386
- # if force is true, we delete the leftover relation to free up the name (in case the user re-creates the stream)
1387
- if force:
1388
- self.exec_raw(model, engine, f"""
1389
- declare ::{rel_name}
1390
- def delete[:\"{rel_name}\"]: {{ {rel_name} }}
1391
- """, readonly=False, bypass_index=True)
1392
-
1393
- #--------------------------------------------------
1394
- # Exec Async
1395
- #--------------------------------------------------
1396
-
1397
- def _check_exec_async_status(self, txn_id: str, headers: Dict | None = None):
1398
- """Check whether the given transaction has completed."""
1399
- if headers is None:
1400
- headers = {}
1401
-
1402
- with debugging.span("check_status"):
1403
- response = self._exec(f"CALL {APP_NAME}.api.get_transaction('{txn_id}',{headers});")
1404
- assert response, f"No results from get_transaction('{txn_id}')"
1405
-
1406
- response_row = next(iter(response)).asDict()
1407
- status: str = response_row['STATE']
1408
-
1409
- # remove the transaction from the pending list if it's completed or aborted
1410
- if status in ["COMPLETED", "ABORTED"]:
1411
- if txn_id in self._pending_transactions:
1412
- self._pending_transactions.remove(txn_id)
1413
-
1414
- if status == "ABORTED" and response_row.get("ABORT_REASON", "") == TXN_ABORT_REASON_TIMEOUT:
1415
- config_file_path = getattr(self.config, 'file_path', None)
1416
- # todo: use the timeout returned alongside the transaction as soon as it's exposed
1417
- timeout_mins = int(self.config.get("query_timeout_mins", DEFAULT_QUERY_TIMEOUT_MINS) or DEFAULT_QUERY_TIMEOUT_MINS)
1418
- raise QueryTimeoutExceededException(
1419
- timeout_mins=timeout_mins,
1420
- query_id=txn_id,
1421
- config_file_path=config_file_path,
1422
- )
1423
-
1424
- # @TODO: Find some way to tunnel the ABORT_REASON out. Azure doesn't have this, but it's handy
1425
- return status == "COMPLETED" or status == "ABORTED"
1426
-
1427
-
1428
- def _list_exec_async_artifacts(self, txn_id: str, headers: Dict | None = None) -> Dict[str, Dict]:
1429
- """Grab the list of artifacts produced in the transaction and the URLs to retrieve their contents."""
1430
- if headers is None:
1431
- headers = {}
1432
- with debugging.span("list_results"):
1433
- response = self._exec(
1434
- f"CALL {APP_NAME}.api.get_own_transaction_artifacts('{txn_id}',{headers});"
1435
- )
1436
- assert response, f"No results from get_own_transaction_artifacts('{txn_id}')"
1437
- return {row["FILENAME"]: row for row in response}
1438
-
1439
- def _fetch_exec_async_artifacts(
1440
- self, artifact_info: Dict[str, Dict[str, Any]]
1441
- ) -> Dict[str, Any]:
1442
- """Grab the contents of the given artifacts from SF in parallel using threads."""
1443
-
1444
- with requests.Session() as session:
1445
- def _fetch_data(name_info):
1446
- filename, metadata = name_info
1447
-
1448
- try:
1449
- # Extract the presigned URL and encryption material from metadata
1450
- url_key = self.get_url_key(metadata)
1451
- presigned_url = metadata[url_key]
1452
- encryption_material = metadata["ENCRYPTION_MATERIAL"]
1453
-
1454
- response = get_with_retries(session, presigned_url, config=self.config)
1455
- response.raise_for_status() # Throw if something goes wrong
1456
-
1457
- decrypted = self._maybe_decrypt(response.content, encryption_material)
1458
- return (filename, decrypted)
1459
-
1460
- except requests.RequestException as e:
1461
- raise scrub_exception(wrap_with_request_id(e))
1462
-
1463
- # Create a list of tuples for the map function
1464
- name_info_pairs = list(artifact_info.items())
1465
-
1466
- with ThreadPoolExecutor(max_workers=5) as executor:
1467
- results = executor.map(_fetch_data, name_info_pairs)
1468
-
1469
- return {name: data for (name, data) in results}
1470
-
1471
- def _maybe_decrypt(self, content: bytes, encryption_material: str) -> bytes:
1472
- # Decrypt if encryption material is present
1473
- if encryption_material:
1474
- # if there's no padding, the initial file was empty
1475
- if len(content) == 0:
1476
- return b""
1477
-
1478
- return decrypt_artifact(content, encryption_material)
1479
-
1480
- # otherwise, return content directly
1481
- return content
1482
-
1483
- def _parse_exec_async_results(self, arrow_files: List[Tuple[str, bytes]]):
1484
- """Mimics the logic in _parse_arrow_results of railib/api.py#L303 without requiring a wrapping multipart form."""
1485
- results = []
1486
-
1487
- for file_name, file_content in arrow_files:
1488
- with pa.ipc.open_stream(file_content) as reader:
1489
- schema = reader.schema
1490
- batches = [batch for batch in reader]
1491
- table = pa.Table.from_batches(batches=batches, schema=schema)
1492
- results.append({"relationId": file_name, "table": table})
1493
-
1494
- return results
1495
-
1496
- def _download_results(
1497
- self, artifact_info: Dict[str, Dict], txn_id: str, state: str
1498
- ) -> TransactionAsyncResponse:
1499
- with debugging.span("download_results"):
1500
- # Fetch artifacts
1501
- artifacts = self._fetch_exec_async_artifacts(artifact_info)
1502
-
1503
- # Directly use meta_json as it is fetched
1504
- meta_json_bytes = artifacts["metadata.json"]
1505
-
1506
- # Decode the bytes and parse the JSON
1507
- meta_json_str = meta_json_bytes.decode('utf-8')
1508
- meta_json = json.loads(meta_json_str) # Parse the JSON string
1509
-
1510
- # Use the metadata to map arrow files to the relations they contain
1511
- try:
1512
- arrow_files_to_relations = {
1513
- artifact["filename"]: artifact["relationId"]
1514
- for artifact in meta_json
1515
- }
1516
- except KeyError:
1517
- # TODO: Remove this fallback mechanism later once several engine versions are updated
1518
- arrow_files_to_relations = {
1519
- f"{ix}.arrow": artifact["relationId"]
1520
- for ix, artifact in enumerate(meta_json)
1521
- }
1522
-
1523
- # Hydrate the arrow files into tables
1524
- results = self._parse_exec_async_results(
1525
- [
1526
- (arrow_files_to_relations[name], content)
1527
- for name, content in artifacts.items()
1528
- if name.endswith(".arrow")
1529
- ]
1530
- )
1531
-
1532
- # Create and return the response
1533
- rsp = TransactionAsyncResponse()
1534
- rsp.transaction = {
1535
- "id": txn_id,
1536
- "state": state,
1537
- "response_format_version": None,
1538
- }
1539
- rsp.metadata = meta_json
1540
- rsp.problems = artifacts.get(
1541
- "problems.json"
1542
- ) # Safely access possible missing keys
1543
- rsp.results = results
1544
- return rsp
1545
-
1546
- def get_transaction_problems(self, txn_id: str) -> List[Dict[str, Any]]:
1547
- with debugging.span("get_own_transaction_problems"):
1548
- response = self._exec(
1549
- f"select * from table({APP_NAME}.api.get_own_transaction_problems('{txn_id}'));"
1550
- )
1551
- if not response:
1552
- return []
1553
- return response
1554
-
1555
- def get_url_key(self, metadata) -> str:
1556
- # In Azure, there is only one type of URL, which is used for both internal and
1557
- # external access; always use that one
1558
- if is_azure_url(metadata['PRESIGNED_URL']):
1559
- return 'PRESIGNED_URL'
1560
-
1561
- configured = self.config.get("download_url_type", None)
1562
- if configured == "internal":
1563
- return 'PRESIGNED_URL_AP'
1564
- elif configured == "external":
1565
- return "PRESIGNED_URL"
1566
-
1567
- if is_container_runtime():
1568
- return 'PRESIGNED_URL_AP'
1569
-
1570
- return 'PRESIGNED_URL'
1571
-
1572
- def _exec_rai_app(
1573
- self,
1574
- database: str,
1575
- engine: str | None,
1576
- raw_code: str,
1577
- inputs: Dict,
1578
- readonly=True,
1579
- nowait_durable=False,
1580
- request_headers: Dict | None = None,
1581
- bypass_index=False,
1582
- language: str = "rel",
1583
- query_timeout_mins: int | None = None,
1584
- ):
1585
- """
1586
- High-level method to execute RAI app stored procedures.
1587
-
1588
- Builds and executes SQL to call the RAI app's exec_async_v2 stored procedure.
1589
- This method handles the SQL string construction for two different formats:
1590
- 1. New format (with graph index): Uses object payload with parameterized query
1591
- 2. Legacy format: Uses positional parameters
1592
-
1593
- The choice between formats depends on the use_graph_index configuration.
1594
- The new format allows the stored procedure to hash the model and username
1595
- to determine the database, while the legacy format uses the passed database directly.
1596
-
1597
- This method is called by _exec_async_v2 to create transactions. It skips
1598
- use_index retry logic (skip_engine_db_error_retry=True) because that
1599
- is handled at a higher level by exec_raw/exec_lqp.
1600
-
1601
- Args:
1602
- database: Database/model name
1603
- engine: Engine name (optional)
1604
- raw_code: Code to execute (REL, LQP, or SQL)
1605
- inputs: Input parameters for the query
1606
- readonly: Whether the transaction is read-only
1607
- nowait_durable: Whether to wait for durable writes
1608
- request_headers: Optional HTTP headers
1609
- bypass_index: Whether to bypass graph index setup
1610
- language: Query language ("rel" or "lqp")
1611
- query_timeout_mins: Optional query timeout in minutes
1612
-
1613
- Returns:
1614
- Response from the stored procedure call (transaction creation result)
1615
-
1616
- Raises:
1617
- Exception: If transaction creation fails
1618
- """
1619
- assert language == "rel" or language == "lqp", "Only 'rel' and 'lqp' languages are supported"
1620
- if query_timeout_mins is None and (timeout_value := self.config.get("query_timeout_mins", DEFAULT_QUERY_TIMEOUT_MINS)) is not None:
1621
- query_timeout_mins = int(timeout_value)
1622
- # Depending on the shape of the input, the behavior of exec_async_v2 changes.
1623
- # When using the new format (with an object), the function retrieves the
1624
- # 'rai' database by hashing the model and username. In contrast, the
1625
- # current version directly uses the passed database value.
1626
- # Therefore, we must use the original exec_async_v2 when not using the
1627
- # graph index to ensure the correct database is utilized.
1628
- use_graph_index = self.config.get("use_graph_index", USE_GRAPH_INDEX)
1629
- if use_graph_index and not bypass_index:
1630
- payload = {
1631
- 'database': database,
1632
- 'engine': engine,
1633
- 'inputs': inputs,
1634
- 'readonly': readonly,
1635
- 'nowait_durable': nowait_durable,
1636
- 'language': language,
1637
- 'headers': request_headers
1638
- }
1639
- if query_timeout_mins is not None:
1640
- payload["timeout_mins"] = query_timeout_mins
1641
- sql_string = f"CALL {APP_NAME}.api.exec_async_v2(?, {payload});"
1642
- else:
1643
- if query_timeout_mins is not None:
1644
- sql_string = f"CALL {APP_NAME}.api.exec_async_v2('{database}','{engine}', ?, {inputs}, {readonly}, {nowait_durable}, '{language}', {query_timeout_mins}, {request_headers});"
1645
- else:
1646
- sql_string = f"CALL {APP_NAME}.api.exec_async_v2('{database}','{engine}', ?, {inputs}, {readonly}, {nowait_durable}, '{language}', {request_headers});"
1647
- # Don't let exec setup GI on failure, exec_raw and exec_lqp will do that and add the correct headers.
1648
- response = self._exec(
1649
- sql_string,
1650
- raw_code,
1651
- skip_engine_db_error_retry=True,
1652
- )
1653
- if not response:
1654
- raise Exception("Failed to create transaction")
1655
- return response
1656
-
1657
- def _exec_async_v2(
1658
- self,
1659
- database: str,
1660
- engine: str | None,
1661
- raw_code: str,
1662
- inputs: Dict | None = None,
1663
- readonly=True,
1664
- nowait_durable=False,
1665
- headers: Dict | None = None,
1666
- bypass_index=False,
1667
- language: str = "rel",
1668
- query_timeout_mins: int | None = None,
1669
- gi_setup_skipped: bool = False,
1670
- ):
1671
- """
1672
- High-level async execution method with transaction polling and artifact management.
1673
-
1674
- This is the core method for executing queries asynchronously. It:
1675
- 1. Creates a transaction by calling _exec_rai_app
1676
- 2. Handles two execution paths:
1677
- - Fast path: Transaction completes immediately (COMPLETED/ABORTED)
1678
- - Slow path: Transaction is pending, requires polling until completion
1679
- 3. Manages pending transactions list
1680
- 4. Downloads and returns query results/artifacts
1681
-
1682
- This method is called by _execute_code (base implementation) and can be
1683
- overridden by child classes (e.g., DirectAccessResources uses HTTP instead).
1684
-
1685
- Args:
1686
- database: Database/model name
1687
- engine: Engine name (optional)
1688
- raw_code: Code to execute (REL, LQP, or SQL)
1689
- inputs: Input parameters for the query
1690
- readonly: Whether the transaction is read-only
1691
- nowait_durable: Whether to wait for durable writes
1692
- headers: Optional HTTP headers
1693
- bypass_index: Whether to bypass graph index setup
1694
- language: Query language ("rel" or "lqp")
1695
- query_timeout_mins: Optional query timeout in minutes
1696
- gi_setup_skipped: Whether graph index setup was skipped (for retry logic)
1697
-
1698
- Returns:
1699
- Query results (downloaded artifacts)
1700
- """
1701
- if inputs is None:
1702
- inputs = {}
1703
- request_headers = debugging.add_current_propagation_headers(headers)
1704
- query_attrs_dict = json.loads(request_headers.get("X-Query-Attributes", "{}"))
1705
-
1706
- with debugging.span("transaction", **query_attrs_dict) as txn_span:
1707
- with debugging.span("create_v2", **query_attrs_dict) as create_span:
1708
- request_headers['user-agent'] = get_pyrel_version(self.generation)
1709
- request_headers['gi_setup_skipped'] = str(gi_setup_skipped)
1710
- request_headers['pyrel_program_id'] = debugging.get_program_span_id() or ""
1711
- response = self._exec_rai_app(
1712
- database=database,
1713
- engine=engine,
1714
- raw_code=raw_code,
1715
- inputs=inputs,
1716
- readonly=readonly,
1717
- nowait_durable=nowait_durable,
1718
- request_headers=request_headers,
1719
- bypass_index=bypass_index,
1720
- language=language,
1721
- query_timeout_mins=query_timeout_mins,
1722
- )
1723
-
1724
- artifact_info = {}
1725
- rows = list(iter(response))
1726
-
1727
- # process the first row since txn_id and state are the same for all rows
1728
- first_row = rows[0]
1729
- txn_id = first_row['ID']
1730
- state = first_row['STATE']
1731
- filename = first_row['FILENAME']
1732
-
1733
- txn_span["txn_id"] = txn_id
1734
- create_span["txn_id"] = txn_id
1735
- debugging.event("transaction_created", txn_span, txn_id=txn_id)
1736
-
1737
- # fast path: transaction already finished
1738
- if state in ["COMPLETED", "ABORTED"]:
1739
- if txn_id in self._pending_transactions:
1740
- self._pending_transactions.remove(txn_id)
1741
-
1742
- # Process rows to get the rest of the artifacts
1743
- for row in rows:
1744
- filename = row['FILENAME']
1745
- artifact_info[filename] = row
1746
-
1747
- # Slow path: transaction not done yet; start polling
1748
- else:
1749
- self._pending_transactions.append(txn_id)
1750
- with debugging.span("wait", txn_id=txn_id):
1751
- poll_with_specified_overhead(
1752
- lambda: self._check_exec_async_status(txn_id, headers=request_headers), 0.1
1753
- )
1754
- artifact_info = self._list_exec_async_artifacts(txn_id, headers=request_headers)
1755
-
1756
- with debugging.span("fetch"):
1757
- return self._download_results(artifact_info, txn_id, state)
1758
-
1759
- def get_user_based_engine_name(self):
1760
- if not self._session:
1761
- self._session = self.get_sf_session()
1762
- user_table = self._session.sql("select current_user()").collect()
1763
- user = user_table[0][0]
1764
- assert isinstance(user, str), f"current_user() must return a string, not {type(user)}"
1765
- return _sanitize_user_name(user)
1766
-
1767
- def is_engine_ready(self, engine_name: str):
1768
- engine = self.get_engine(engine_name)
1769
- return engine and engine["state"] == "READY"
1770
-
1771
- def auto_create_engine(self, name: str | None = None, size: str | None = None, headers: Dict | None = None):
1772
- """Synchronously create/ensure an engine is ready, blocking until ready."""
1773
- with debugging.span("auto_create_engine", active=self._active_engine) as span:
1774
- active = self._get_active_engine()
1775
- if active:
1776
- return active
1777
-
1778
- # Resolve and validate parameters
1779
- engine_name, engine_size = self._prepare_engine_params(name, size)
1780
-
1781
- try:
1782
- # Get current engine state
1783
- engine = self.get_engine(engine_name)
1784
- if engine:
1785
- span.update(cast(dict, engine))
1786
-
1787
- # Create context for state handling
1788
- context = EngineContext(
1789
- engine_name=engine_name,
1790
- engine_size=engine_size,
1791
- headers=headers,
1792
- requested_size=size,
1793
- span=span,
1794
- )
1795
-
1796
- # Process engine state using sync handlers
1797
- self._process_engine_state(engine, context, self._sync_engine_state_handlers)
1798
-
1799
- except Exception as e:
1800
- self._handle_engine_creation_errors(e, engine_name)
1801
-
1802
- return engine_name
1803
-
1804
- def auto_create_engine_async(self, name: str | None = None):
1805
- """Asynchronously create/ensure an engine, returns immediately."""
1806
- active = self._get_active_engine()
1807
- if active and (active == name or name is None):
1808
- return active
1809
-
1810
- with Spinner(
1811
- "Checking engine status",
1812
- leading_newline=True,
1813
- ) as spinner:
1814
- with debugging.span("auto_create_engine_async", active=self._active_engine):
1815
- # Resolve and validate parameters (use_default_size=True for async)
1816
- engine_name, engine_size = self._prepare_engine_params(name, None, use_default_size=True)
1817
-
1818
- try:
1819
- # Get current engine state
1820
- engine = self.get_engine(engine_name)
1821
-
1822
- # Create context for state handling
1823
- context = EngineContext(
1824
- engine_name=engine_name,
1825
- engine_size=engine_size,
1826
- headers=None,
1827
- requested_size=None,
1828
- spinner=spinner,
1829
- )
1830
-
1831
- # Process engine state using async handlers
1832
- self._process_engine_state(engine, context, self._async_engine_state_handlers, set_active_on_success=True)
1833
-
1834
- except Exception as e:
1835
- spinner.update_messages({
1836
- "finished_message": f"Failed to create engine {engine_name}",
1837
- })
1838
- self._handle_engine_creation_errors(e, engine_name, preserve_rai_exception=True)
1839
-
1840
- return engine_name
1841
-
1842
- #--------------------------------------------------
1843
- # Exec
1844
- #--------------------------------------------------
1845
-
1846
- def _execute_code(
1847
- self,
1848
- database: str,
1849
- engine: str | None,
1850
- raw_code: str,
1851
- inputs: Dict | None,
1852
- readonly: bool,
1853
- nowait_durable: bool,
1854
- headers: Dict | None,
1855
- bypass_index: bool,
1856
- language: str,
1857
- query_timeout_mins: int | None,
1858
- ) -> Any:
1859
- """
1860
- Template method for code execution - can be overridden by child classes.
1861
-
1862
- This is a template method that provides a hook for child classes to add
1863
- execution logic (like retry mechanisms). The base implementation simply
1864
- calls _exec_async_v2 directly.
1865
-
1866
- UseIndexResources overrides this method to use _exec_with_gi_retry, which
1867
- adds automatic use_index polling on engine/database errors.
1868
-
1869
- This method is called by exec_lqp() and exec_raw() to provide a single
1870
- execution point that can be customized per resource class.
1871
-
1872
- Args:
1873
- database: Database/model name
1874
- engine: Engine name (optional)
1875
- raw_code: Code to execute (already processed/encoded)
1876
- inputs: Input parameters for the query
1877
- readonly: Whether the transaction is read-only
1878
- nowait_durable: Whether to wait for durable writes
1879
- headers: Optional HTTP headers
1880
- bypass_index: Whether to bypass graph index setup
1881
- language: Query language ("rel" or "lqp")
1882
- query_timeout_mins: Optional query timeout in minutes
1883
-
1884
- Returns:
1885
- Query results
1886
- """
1887
- return self._exec_async_v2(
1888
- database, engine, raw_code, inputs, readonly, nowait_durable,
1889
- headers=headers, bypass_index=bypass_index, language=language,
1890
- query_timeout_mins=query_timeout_mins, gi_setup_skipped=True,
1891
- )
1892
-
1893
- def exec_lqp(
1894
- self,
1895
- database: str,
1896
- engine: str | None,
1897
- raw_code: bytes,
1898
- readonly=True,
1899
- *,
1900
- inputs: Dict | None = None,
1901
- nowait_durable=False,
1902
- headers: Dict | None = None,
1903
- bypass_index=False,
1904
- query_timeout_mins: int | None = None,
1905
- ):
1906
- """Execute LQP code."""
1907
- raw_code_b64 = base64.b64encode(raw_code).decode("utf-8")
1908
- return self._execute_code(
1909
- database, engine, raw_code_b64, inputs, readonly, nowait_durable,
1910
- headers, bypass_index, 'lqp', query_timeout_mins
1911
- )
1912
-
1913
- def exec_raw(
1914
- self,
1915
- database: str,
1916
- engine: str | None,
1917
- raw_code: str,
1918
- readonly=True,
1919
- *,
1920
- inputs: Dict | None = None,
1921
- nowait_durable=False,
1922
- headers: Dict | None = None,
1923
- bypass_index=False,
1924
- query_timeout_mins: int | None = None,
1925
- ):
1926
- """Execute raw code."""
1927
- raw_code = raw_code.replace("'", "\\'")
1928
- return self._execute_code(
1929
- database, engine, raw_code, inputs, readonly, nowait_durable,
1930
- headers, bypass_index, 'rel', query_timeout_mins
1931
- )
1932
-
1933
-
1934
- def format_results(self, results, task:m.Task|None=None) -> Tuple[DataFrame, List[Any]]:
1935
- return result_helpers.format_results(results, task)
1936
-
1937
- #--------------------------------------------------
1938
- # Exec format
1939
- #--------------------------------------------------
1940
-
1941
- def exec_format(
1942
- self,
1943
- database: str,
1944
- engine: str,
1945
- raw_code: str,
1946
- cols: List[str],
1947
- format: str,
1948
- inputs: Dict | None = None,
1949
- readonly=True,
1950
- nowait_durable=False,
1951
- skip_invalid_data=False,
1952
- headers: Dict | None = None,
1953
- query_timeout_mins: int | None = None,
1954
- ):
1955
- if inputs is None:
1956
- inputs = {}
1957
- if headers is None:
1958
- headers = {}
1959
- if 'user-agent' not in headers:
1960
- headers['user-agent'] = get_pyrel_version(self.generation)
1961
- if query_timeout_mins is None and (timeout_value := self.config.get("query_timeout_mins", DEFAULT_QUERY_TIMEOUT_MINS)) is not None:
1962
- query_timeout_mins = int(timeout_value)
1963
- # TODO: add headers
1964
- start = time.perf_counter()
1965
- output_table = "out" + str(uuid.uuid4()).replace("-", "_")
1966
- temp_table = f"temp_{output_table}"
1967
- use_graph_index = self.config.get("use_graph_index", USE_GRAPH_INDEX)
1968
- txn_id = None
1969
- rejected_rows = None
1970
- col_names_map = None
1971
- artifacts = None
1972
- assert self._session
1973
- temp = self._session.createDataFrame([], StructType([StructField(name, StringType()) for name in cols]))
1974
- with debugging.span("transaction") as txn_span:
1975
- try:
1976
- # In the graph index case we need to use the new exec_into_table proc as it obfuscates the db name
1977
- with debugging.span("exec_format"):
1978
- if use_graph_index:
1979
- # we do not provide a default value for query_timeout_mins so that we can control the default on app level
1980
- if query_timeout_mins is not None:
1981
- res = self._exec(f"call {APP_NAME}.api.exec_into_table(?, ?, ?, ?, ?, NULL, ?, {headers}, ?, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data, query_timeout_mins])
1982
- else:
1983
- res = self._exec(f"call {APP_NAME}.api.exec_into_table(?, ?, ?, ?, ?, NULL, ?, {headers}, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data])
1984
- txn_id = json.loads(res[0]["EXEC_INTO_TABLE"])["rai_transaction_id"]
1985
- rejected_rows = json.loads(res[0]["EXEC_INTO_TABLE"]).get("rejected_rows", [])
1986
- rejected_rows_count = json.loads(res[0]["EXEC_INTO_TABLE"]).get("rejected_rows_count", 0)
1987
- else:
1988
- if query_timeout_mins is not None:
1989
- res = self._exec(f"call {APP_NAME}.api.exec_into(?, ?, ?, ?, ?, {inputs}, ?, {headers}, ?, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data, query_timeout_mins])
1990
- else:
1991
- res = self._exec(f"call {APP_NAME}.api.exec_into(?, ?, ?, ?, ?, {inputs}, ?, {headers}, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data])
1992
- txn_id = json.loads(res[0]["EXEC_INTO"])["rai_transaction_id"]
1993
- rejected_rows = json.loads(res[0]["EXEC_INTO"]).get("rejected_rows", [])
1994
- rejected_rows_count = json.loads(res[0]["EXEC_INTO"]).get("rejected_rows_count", 0)
1995
- debugging.event("transaction_created", txn_span, txn_id=txn_id)
1996
- debugging.time("exec_format", time.perf_counter() - start, DataFrame())
1997
-
1998
- with debugging.span("temp_table_swap", txn_id=txn_id):
1999
- out_sample = self._exec(f"select * from {APP_NAME}.results.{output_table} limit 1;")
2000
- if out_sample:
2001
- keys = set([k.lower() for k in out_sample[0].as_dict().keys()])
2002
- col_names_map = {}
2003
- for ix, name in enumerate(cols):
2004
- col_key = f"col{ix:03}"
2005
- if col_key in keys:
2006
- col_names_map[col_key] = IdentityParser(name).identity
2007
- else:
2008
- col_names_map[col_key] = name
2009
-
2010
- names = ", ".join([
2011
- f"{col_key} as {alias}" if col_key in keys else f"NULL as {alias}"
2012
- for col_key, alias in col_names_map.items()
2013
- ])
2014
- self._exec(f"CREATE TEMPORARY TABLE {APP_NAME}.results.{temp_table} AS SELECT {names} FROM {APP_NAME}.results.{output_table};")
2015
- self._exec(f"call {APP_NAME}.api.drop_result_table(?)", [output_table])
2016
- temp = cast(snowflake.snowpark.DataFrame, self._exec(f"select * from {APP_NAME}.results.{temp_table}", raw=True))
2017
- if rejected_rows:
2018
- debugging.warn(RowsDroppedFromTargetTableWarning(rejected_rows, rejected_rows_count, col_names_map))
2019
- except Exception as e:
2020
- messages = collect_error_messages(e)
2021
- if any("no columns returned" in msg or "columns of results could not be determined" in msg for msg in messages):
2022
- pass
2023
- else:
2024
- raise e
2025
- if txn_id:
2026
- artifact_info = self._list_exec_async_artifacts(txn_id)
2027
- with debugging.span("fetch"):
2028
- artifacts = self._download_results(artifact_info, txn_id, "ABORTED")
2029
- return (temp, artifacts)
2030
-
2031
- #--------------------------------------------------
2032
- # Custom model types
2033
- #--------------------------------------------------
2034
-
2035
- def _get_ns(self, model:dsl.Graph):
2036
- if model not in self._ns_cache:
2037
- self._ns_cache[model] = _Snowflake(model)
2038
- return self._ns_cache[model]
2039
-
2040
- def to_model_type(self, model:dsl.Graph, name: str, source:str):
2041
- parser = IdentityParser(source)
2042
- if not parser.is_complete:
2043
- raise SnowflakeInvalidSource(Errors.call_source(), source)
2044
- ns = self._get_ns(model)
2045
- # skip the last item in the list (the full identifier)
2046
- for part in parser.to_list()[:-1]:
2047
- ns = ns._safe_get(part)
2048
- assert parser.identity, f"Error parsing source in to_model_type: {source}"
2049
- self.sources.add(parser.identity)
2050
- return ns
2051
-
2052
- #--------------------------------------------------
2053
- # Source Management
2054
- #--------------------------------------------------
2055
-
2056
- def _check_source_updates(self, sources: Iterable[str]):
2057
- if not sources:
2058
- return {}
2059
- app_name = self.get_app_name()
2060
-
2061
- source_types = dict[str, SourceInfo]()
2062
- partitioned_sources: dict[str, dict[str, list[dict[str, str]]]] = defaultdict(
2063
- lambda: defaultdict(list)
2064
- )
2065
- fqn_to_parts: dict[str, tuple[str, str, str]] = {}
2066
-
2067
- for source in sources:
2068
- parser = IdentityParser(source, True)
2069
- parsed = parser.to_list()
2070
- assert len(parsed) == 4, f"Invalid source: {source}"
2071
- db, schema, entity, identity = parsed
2072
- assert db and schema and entity and identity, f"Invalid source: {source}"
2073
- source_types[identity] = cast(
2074
- SourceInfo,
2075
- {
2076
- "type": None,
2077
- "state": "",
2078
- "columns_hash": None,
2079
- "table_created_at": None,
2080
- "stream_created_at": None,
2081
- "last_ddl": None,
2082
- },
2083
- )
2084
- partitioned_sources[db][schema].append({"entity": entity, "identity": identity})
2085
- fqn_to_parts[identity] = (db, schema, entity)
2086
-
2087
- if not partitioned_sources:
2088
- return source_types
2089
-
2090
- state_queries: list[str] = []
2091
- for db, schemas in partitioned_sources.items():
2092
- select_rows: list[str] = []
2093
- for schema, tables in schemas.items():
2094
- for table_info in tables:
2095
- select_rows.append(
2096
- "SELECT "
2097
- f"{IdentityParser.to_sql_value(db)} AS catalog_name, "
2098
- f"{IdentityParser.to_sql_value(schema)} AS schema_name, "
2099
- f"{IdentityParser.to_sql_value(table_info['entity'])} AS table_name"
2100
- )
2101
-
2102
- if not select_rows:
2103
- continue
2104
-
2105
- target_entities_clause = "\n UNION ALL\n ".join(select_rows)
2106
- # Main query:
2107
- # 1. Enumerate the target tables via target_entities.
2108
- # 2. Pull their metadata (last_altered, type) from INFORMATION_SCHEMA.TABLES.
2109
- # 3. Look up the most recent stream activity for those FQNs only.
2110
- # 4. Capture creation timestamps and use last_ddl vs created_at to classify each target,
2111
- # so we mark tables as stale when they were recreated even if column hashes still match.
2112
- state_queries.append(
2113
- f"""WITH target_entities AS (
2114
- {target_entities_clause}
2115
- ),
2116
- table_info AS (
2117
- SELECT
2118
- {app_name}.api.normalize_fq_ids(
2119
- ARRAY_CONSTRUCT(
2120
- CASE
2121
- WHEN t.table_catalog = UPPER(t.table_catalog) THEN t.table_catalog
2122
- ELSE '"' || t.table_catalog || '"'
2123
- END || '.' ||
2124
- CASE
2125
- WHEN t.table_schema = UPPER(t.table_schema) THEN t.table_schema
2126
- ELSE '"' || t.table_schema || '"'
2127
- END || '.' ||
2128
- CASE
2129
- WHEN t.table_name = UPPER(t.table_name) THEN t.table_name
2130
- ELSE '"' || t.table_name || '"'
2131
- END
2132
- )
2133
- )[0]:identifier::string AS fqn,
2134
- CONVERT_TIMEZONE('UTC', t.last_altered) AS last_ddl,
2135
- CONVERT_TIMEZONE('UTC', t.created) AS table_created_at,
2136
- t.table_type AS kind
2137
- FROM {db}.INFORMATION_SCHEMA.tables t
2138
- JOIN target_entities te
2139
- ON t.table_catalog = te.catalog_name
2140
- AND t.table_schema = te.schema_name
2141
- AND t.table_name = te.table_name
2142
- ),
2143
- stream_activity AS (
2144
- SELECT
2145
- sa.fqn,
2146
- MAX(sa.created_at) AS created_at
2147
- FROM (
2148
- SELECT
2149
- {app_name}.api.normalize_fq_ids(ARRAY_CONSTRUCT(fq_object_name))[0]:identifier::string AS fqn,
2150
- created_at
2151
- FROM {app_name}.api.data_streams
2152
- WHERE rai_database = '{PYREL_ROOT_DB}'
2153
- ) sa
2154
- JOIN table_info ti
2155
- ON sa.fqn = ti.fqn
2156
- GROUP BY sa.fqn
2157
- )
2158
- SELECT
2159
- ti.fqn,
2160
- ti.kind,
2161
- ti.last_ddl,
2162
- ti.table_created_at,
2163
- sa.created_at AS stream_created_at,
2164
- IFF(
2165
- DATEDIFF(second, sa.created_at::timestamp, ti.last_ddl::timestamp) > 0,
2166
- 'STALE',
2167
- 'CURRENT'
2168
- ) AS state
2169
- FROM table_info ti
2170
- LEFT JOIN stream_activity sa
2171
- ON sa.fqn = ti.fqn
2172
- """
2173
- )
2174
-
2175
- stale_fqns: list[str] = []
2176
- for state_query in state_queries:
2177
- for row in self._exec(state_query):
2178
- row_dict = row.as_dict() if hasattr(row, "as_dict") else dict(row)
2179
- row_fqn = row_dict["FQN"]
2180
- parser = IdentityParser(row_fqn, True)
2181
- fqn = parser.identity
2182
- assert fqn, f"Error parsing returned FQN: {row_fqn}"
2183
-
2184
- source_types[fqn]["type"] = (
2185
- "TABLE" if row_dict["KIND"] == "BASE TABLE" else row_dict["KIND"]
2186
- )
2187
- source_types[fqn]["state"] = row_dict["STATE"]
2188
- source_types[fqn]["last_ddl"] = normalize_datetime(row_dict.get("LAST_DDL"))
2189
- source_types[fqn]["table_created_at"] = normalize_datetime(row_dict.get("TABLE_CREATED_AT"))
2190
- source_types[fqn]["stream_created_at"] = normalize_datetime(row_dict.get("STREAM_CREATED_AT"))
2191
- if row_dict["STATE"] == "STALE":
2192
- stale_fqns.append(fqn)
2193
-
2194
- if not stale_fqns:
2195
- return source_types
2196
-
2197
- # We batch stale tables by database/schema so each Snowflake query can hash
2198
- # multiple objects at once instead of issuing one statement per table.
2199
- stale_partitioned: dict[str, dict[str, list[dict[str, str]]]] = defaultdict(
2200
- lambda: defaultdict(list)
2201
- )
2202
- for fqn in stale_fqns:
2203
- db, schema, table = fqn_to_parts[fqn]
2204
- stale_partitioned[db][schema].append({"table": table, "identity": fqn})
2205
-
2206
- # Build one hash query per database, grouping schemas/tables inside so we submit
2207
- # at most a handful of set-based statements to Snowflake.
2208
- for db, schemas in stale_partitioned.items():
2209
- column_select_rows: list[str] = []
2210
- for schema, tables in schemas.items():
2211
- for table_info in tables:
2212
- # Build the literal rows for this db/schema so we can join back
2213
- # against INFORMATION_SCHEMA.COLUMNS in a single statement.
2214
- column_select_rows.append(
2215
- "SELECT "
2216
- f"{IdentityParser.to_sql_value(db)} AS catalog_name, "
2217
- f"{IdentityParser.to_sql_value(schema)} AS schema_name, "
2218
- f"{IdentityParser.to_sql_value(table_info['table'])} AS table_name"
2219
- )
2220
-
2221
- if not column_select_rows:
2222
- continue
2223
-
2224
- target_entities_clause = "\n UNION ALL\n ".join(column_select_rows)
2225
- # Main query: compute deterministic column hashes for every stale table
2226
- # in this database/schema batch so we can compare schemas without a round trip per table.
2227
- column_query = f"""WITH target_entities AS (
2228
- {target_entities_clause}
2229
- ),
2230
- column_info AS (
2231
- SELECT
2232
- {app_name}.api.normalize_fq_ids(
2233
- ARRAY_CONSTRUCT(
2234
- CASE
2235
- WHEN c.table_catalog = UPPER(c.table_catalog) THEN c.table_catalog
2236
- ELSE '"' || c.table_catalog || '"'
2237
- END || '.' ||
2238
- CASE
2239
- WHEN c.table_schema = UPPER(c.table_schema) THEN c.table_schema
2240
- ELSE '"' || c.table_schema || '"'
2241
- END || '.' ||
2242
- CASE
2243
- WHEN c.table_name = UPPER(c.table_name) THEN c.table_name
2244
- ELSE '"' || c.table_name || '"'
2245
- END
2246
- )
2247
- )[0]:identifier::string AS fqn,
2248
- c.column_name,
2249
- CASE
2250
- WHEN c.numeric_precision IS NOT NULL AND c.numeric_scale IS NOT NULL
2251
- THEN c.data_type || '(' || c.numeric_precision || ',' || c.numeric_scale || ')'
2252
- WHEN c.datetime_precision IS NOT NULL
2253
- THEN c.data_type || '(0,' || c.datetime_precision || ')'
2254
- WHEN c.character_maximum_length IS NOT NULL
2255
- THEN c.data_type || '(' || c.character_maximum_length || ')'
2256
- ELSE c.data_type
2257
- END AS type_signature,
2258
- IFF(c.is_nullable = 'YES', 'YES', 'NO') AS nullable_flag
2259
- FROM {db}.INFORMATION_SCHEMA.COLUMNS c
2260
- JOIN target_entities te
2261
- ON c.table_catalog = te.catalog_name
2262
- AND c.table_schema = te.schema_name
2263
- AND c.table_name = te.table_name
2264
- )
2265
- SELECT
2266
- fqn,
2267
- HEX_ENCODE(
2268
- HASH_AGG(
2269
- HASH(
2270
- column_name,
2271
- type_signature,
2272
- nullable_flag
2273
- )
2274
- )
2275
- ) AS columns_hash
2276
- FROM column_info
2277
- GROUP BY fqn
2278
- """
2279
-
2280
- for row in self._exec(column_query):
2281
- row_fqn = row["FQN"]
2282
- parser = IdentityParser(row_fqn, True)
2283
- fqn = parser.identity
2284
- assert fqn, f"Error parsing returned FQN: {row_fqn}"
2285
- source_types[fqn]["columns_hash"] = row["COLUMNS_HASH"]
2286
-
2287
- return source_types
2288
-
2289
- def _get_source_references(self, source_info: dict[str, SourceInfo]):
2290
- app_name = self.get_app_name()
2291
- missing_sources = []
2292
- invalid_sources = {}
2293
- source_references = []
2294
- for source, info in source_info.items():
2295
- source_type = info.get("type")
2296
- if source_type is None:
2297
- missing_sources.append(source)
2298
- elif source_type not in ("TABLE", "VIEW"):
2299
- invalid_sources[source] = source_type
2300
- else:
2301
- source_references.append(f"{app_name}.api.object_reference('{source_type}', '{source}')")
2302
-
2303
- if missing_sources:
2304
- current_role = self.get_sf_session().get_current_role()
2305
- if current_role is None:
2306
- current_role = self.config.get("role", None)
2307
- debugging.warn(UnknownSourceWarning(missing_sources, current_role))
2308
-
2309
- if invalid_sources:
2310
- debugging.warn(InvalidSourceTypeWarning(invalid_sources))
2311
-
2312
- self.source_references = source_references
2313
- return source_references
2314
-
2315
- #--------------------------------------------------
2316
- # Transactions
2317
- #--------------------------------------------------
2318
-
2319
- def get_transaction(self, transaction_id):
2320
- results = self._exec(
2321
- f"CALL {APP_NAME}.api.get_transaction(?);", [transaction_id])
2322
- if not results:
2323
- return None
2324
-
2325
- results = txn_list_to_dicts(results)
2326
-
2327
- txn = {field: results[0][field] for field in GET_TXN_SQL_FIELDS}
2328
-
2329
- state = txn.get("state")
2330
- created_on = txn.get("created_on")
2331
- finished_at = txn.get("finished_at")
2332
- if created_on:
2333
- # Transaction is still running
2334
- if state not in TERMINAL_TXN_STATES:
2335
- tz_info = created_on.tzinfo
2336
- txn['duration'] = datetime.now(tz_info) - created_on
2337
- # Transaction is terminal
2338
- elif finished_at:
2339
- txn['duration'] = finished_at - created_on
2340
- # Transaction is still running and we have no state or finished_at
2341
- else:
2342
- txn['duration'] = timedelta(0)
2343
- return txn
2344
-
2345
- def list_transactions(self, **kwargs):
2346
- id = kwargs.get("id", None)
2347
- state = kwargs.get("state", None)
2348
- engine = kwargs.get("engine", None)
2349
- limit = kwargs.get("limit", 100)
2350
- all_users = kwargs.get("all_users", False)
2351
- created_by = kwargs.get("created_by", None)
2352
- only_active = kwargs.get("only_active", False)
2353
- where_clause_arr = []
2354
-
2355
- if id:
2356
- where_clause_arr.append(f"id = '{id}'")
2357
- if state:
2358
- where_clause_arr.append(f"state = '{state.upper()}'")
2359
- if engine:
2360
- where_clause_arr.append(f"LOWER(engine_name) = '{engine.lower()}'")
2361
- else:
2362
- if only_active:
2363
- where_clause_arr.append("state in ('CREATED', 'RUNNING', 'PENDING')")
2364
- if not all_users and created_by is not None:
2365
- where_clause_arr.append(f"LOWER(created_by) = '{created_by.lower()}'")
2366
-
2367
- if len(where_clause_arr):
2368
- where_clause = f'WHERE {" AND ".join(where_clause_arr)}'
2369
- else:
2370
- where_clause = ""
2371
-
2372
- sql_fields = ", ".join(LIST_TXN_SQL_FIELDS)
2373
- query = f"SELECT {sql_fields} from {APP_NAME}.api.transactions {where_clause} ORDER BY created_on DESC LIMIT ?"
2374
- results = self._exec(query, [limit])
2375
- if not results:
2376
- return []
2377
- return txn_list_to_dicts(results)
2378
-
2379
- def cancel_transaction(self, transaction_id):
2380
- self._exec(f"CALL {APP_NAME}.api.cancel_own_transaction(?);", [transaction_id])
2381
- if transaction_id in self._pending_transactions:
2382
- self._pending_transactions.remove(transaction_id)
2383
-
2384
- def cancel_pending_transactions(self):
2385
- for txn_id in self._pending_transactions:
2386
- self.cancel_transaction(txn_id)
2387
-
2388
- def get_transaction_events(self, transaction_id: str, continuation_token:str=''):
2389
- results = self._exec(
2390
- f"SELECT {APP_NAME}.api.get_own_transaction_events(?, ?);",
2391
- [transaction_id, continuation_token],
2392
- )
2393
- if not results:
2394
- return {
2395
- "events": [],
2396
- "continuation_token": None
2397
- }
2398
- row = results[0][0]
2399
- return json.loads(row)
2400
-
2401
- #--------------------------------------------------
2402
- # Snowflake specific
2403
- #--------------------------------------------------
2404
-
2405
- def get_version(self):
2406
- results = self._exec(f"SELECT {APP_NAME}.app.get_release()")
2407
- if not results:
2408
- return None
2409
- return results[0][0]
2410
-
2411
- # CLI methods (list_warehouses, list_compute_pools, list_roles, list_apps,
2412
- # list_databases, list_sf_schemas, list_tables) are now in CLIResources class
2413
- # schema_info is kept in base Resources class since it's used by SnowflakeSchema._fetch_info()
2414
-
2415
- def schema_info(self, database: str, schema: str, tables: Iterable[str]):
2416
- """Get detailed schema information including primary keys, foreign keys, and columns."""
2417
- app_name = self.get_app_name()
2418
- # Only pass the db + schema as the identifier so that the resulting identity is correct
2419
- parser = IdentityParser(f"{database}.{schema}")
2420
-
2421
- with debugging.span("schema_info"):
2422
- with debugging.span("primary_keys") as span:
2423
- pk_query = f"SHOW PRIMARY KEYS IN SCHEMA {parser.identity};"
2424
- pks = self._exec(pk_query)
2425
- span["sql"] = pk_query
2426
-
2427
- with debugging.span("foreign_keys") as span:
2428
- fk_query = f"SHOW IMPORTED KEYS IN SCHEMA {parser.identity};"
2429
- fks = self._exec(fk_query)
2430
- span["sql"] = fk_query
2431
-
2432
- # IdentityParser will parse a single value (with no ".") and store it in this case in the db field
2433
- with debugging.span("columns") as span:
2434
- tables_str = ", ".join([f"'{IdentityParser(t).db}'" for t in tables])
2435
- query = textwrap.dedent(f"""
2436
- begin
2437
- SHOW COLUMNS IN SCHEMA {parser.identity};
2438
- let r resultset := (
2439
- SELECT
2440
- CASE
2441
- WHEN "table_name" = UPPER("table_name") THEN "table_name"
2442
- ELSE '"' || "table_name" || '"'
2443
- END as "table_name",
2444
- "column_name",
2445
- "data_type",
2446
- CASE
2447
- WHEN ARRAY_CONTAINS(PARSE_JSON("data_type"):"type", {app_name}.app.get_supported_column_types()) THEN TRUE
2448
- ELSE FALSE
2449
- END as "supported_type"
2450
- FROM table(result_scan(-1)) as t
2451
- WHERE "table_name" in ({tables_str})
2452
- );
2453
- return table(r);
2454
- end;
2455
- """)
2456
- span["sql"] = query
2457
- columns = self._exec(query)
2458
-
2459
- results = defaultdict(lambda: {"pks": [], "fks": {}, "columns": {}, "invalid_columns": {}})
2460
- if pks:
2461
- for row in pks:
2462
- results[row[3]]["pks"].append(row[4]) # type: ignore
2463
- if fks:
2464
- for row in fks:
2465
- results[row[7]]["fks"][row[8]] = row[3]
2466
- if columns:
2467
- # It seems that a SF parameter (QUOTED_IDENTIFIERS_IGNORE_CASE) can control
2468
- # whether snowflake will ignore case on `row.data_type`,
2469
- # so we have to use column indexes instead :(
2470
- for row in columns:
2471
- table_name = row[0]
2472
- column_name = row[1]
2473
- data_type = row[2]
2474
- supported_type = row[3]
2475
- # Filter out unsupported types
2476
- if supported_type:
2477
- results[table_name]["columns"][column_name] = data_type
2478
- else:
2479
- results[table_name]["invalid_columns"][column_name] = data_type
2480
- return results
2481
-
2482
- def get_cloud_provider(self) -> str:
2483
- """
2484
- Detect whether this is Snowflake on Azure, or AWS using Snowflake's CURRENT_REGION().
2485
- Returns 'azure' or 'aws'.
2486
- """
2487
- if self._session:
2488
- try:
2489
- # Query Snowflake's current region using the built-in function
2490
- result = self._session.sql("SELECT CURRENT_REGION()").collect()
2491
- if result:
2492
- region_info = result[0][0]
2493
- # Check if the region string contains the cloud provider name
2494
- if isinstance(region_info, str):
2495
- region_str = region_info.lower()
2496
- # Check for cloud providers in the region string
2497
- if 'azure' in region_str:
2498
- return 'azure'
2499
- else:
2500
- return 'aws'
2501
- except Exception:
2502
- pass
2503
-
2504
- # Fallback to AWS as default if detection fails
2505
- return 'aws'
2506
-
2507
- #--------------------------------------------------
2508
- # Snowflake Wrapper
2509
- #--------------------------------------------------
2510
-
2511
- class PrimaryKey:
2512
- pass
2513
-
2514
- class _Snowflake:
2515
- def __init__(self, model, auto_import=False):
2516
- self._model = model
2517
- self._auto_import = auto_import
2518
- if not isinstance(model._client.resources, Resources):
2519
- raise ValueError("Snowflake model must be used with a snowflake config")
2520
- self._dbs = {}
2521
- imports = model._client.resources.list_imports(model=model.name)
2522
- self._import_structure(imports)
2523
-
2524
- def _import_structure(self, imports: list[Import]):
2525
- tree = self._dbs
2526
- # pre-create existing imports
2527
- schemas = set()
2528
- for item in imports:
2529
- parser = IdentityParser(item["name"])
2530
- database_name, schema_name, table_name = parser.to_list()[:-1]
2531
- database = getattr(self, database_name)
2532
- schema = getattr(database, schema_name)
2533
- schemas.add(schema)
2534
- schema._add(table_name, is_imported=True)
2535
- return tree
2536
-
2537
- def _safe_get(self, name:str) -> 'SnowflakeDB':
2538
- name = name
2539
- if name in self._dbs:
2540
- return self._dbs[name]
2541
- self._dbs[name] = SnowflakeDB(self, name)
2542
- return self._dbs[name]
2543
-
2544
- def __getattr__(self, name: str) -> 'SnowflakeDB':
2545
- return self._safe_get(name)
2546
-
2547
-
2548
- class Snowflake(_Snowflake):
2549
- def __init__(self, model: dsl.Graph, auto_import=False):
2550
- if model._config.get_bool("use_graph_index", USE_GRAPH_INDEX):
2551
- raise SnowflakeProxySourceError()
2552
- else:
2553
- debugging.warn(SnowflakeProxyAPIDeprecationWarning())
2554
-
2555
- super().__init__(model, auto_import)
2556
-
2557
- class SnowflakeDB:
2558
- def __init__(self, parent, name):
2559
- self._name = name
2560
- self._parent = parent
2561
- self._model = parent._model
2562
- self._schemas = {}
2563
-
2564
- def _safe_get(self, name: str) -> 'SnowflakeSchema':
2565
- name = name
2566
- if name in self._schemas:
2567
- return self._schemas[name]
2568
- self._schemas[name] = SnowflakeSchema(self, name)
2569
- return self._schemas[name]
2570
-
2571
- def __getattr__(self, name: str) -> 'SnowflakeSchema':
2572
- return self._safe_get(name)
2573
-
2574
- class SnowflakeSchema:
2575
- def __init__(self, parent, name):
2576
- self._name = name
2577
- self._parent = parent
2578
- self._model = parent._model
2579
- self._tables = {}
2580
- self._imported = set()
2581
- self._table_info = defaultdict(lambda: {"pks": [], "fks": {}, "columns": {}, "invalid_columns": {}})
2582
- self._dirty = True
2583
-
2584
- def _fetch_info(self):
2585
- if not self._dirty:
2586
- return
2587
- self._table_info = self._model._client.resources.schema_info(self._parent._name, self._name, list(self._tables.keys()))
2588
-
2589
- check_column_types = self._model._config.get("check_column_types", True)
2590
-
2591
- if check_column_types:
2592
- self._check_and_confirm_invalid_columns()
2593
-
2594
- self._dirty = False
2595
-
2596
- def _check_and_confirm_invalid_columns(self):
2597
- """Check for invalid columns across the schema's tables."""
2598
- tables_with_invalid_columns = {}
2599
- for table_name, table_info in self._table_info.items():
2600
- if table_info.get("invalid_columns"):
2601
- tables_with_invalid_columns[table_name] = table_info["invalid_columns"]
2602
-
2603
- if tables_with_invalid_columns:
2604
- from relationalai.errors import UnsupportedColumnTypesWarning
2605
- UnsupportedColumnTypesWarning(tables_with_invalid_columns)
2606
-
2607
- def _add(self, name, is_imported=False):
2608
- if name in self._tables:
2609
- return self._tables[name]
2610
- self._dirty = True
2611
- if is_imported:
2612
- self._imported.add(name)
2613
- else:
2614
- self._tables[name] = SnowflakeTable(self, name)
2615
- return self._tables.get(name)
2616
-
2617
- def _safe_get(self, name: str) -> 'SnowflakeTable | None':
2618
- table = self._add(name)
2619
- return table
2620
-
2621
- def __getattr__(self, name: str) -> 'SnowflakeTable | None':
2622
- return self._safe_get(name)
2623
-
2624
-
2625
- class SnowflakeTable(dsl.Type):
2626
- def __init__(self, parent, name):
2627
- super().__init__(parent._model, f"sf_{name}")
2628
- # hack to make this work for pathfinder
2629
- self._type.parents.append(m.Builtins.PQFilterAnnotation)
2630
- self._name = name
2631
- self._model = parent._model
2632
- self._parent = parent
2633
- self._aliases = {}
2634
- self._finalzed = False
2635
- self._source = runtime_env.get_source()
2636
- relation_name = to_fqn_relation_name(self.fqname())
2637
- self._model.install_raw(f"declare {relation_name}")
2638
-
2639
- def __call__(self, *args, **kwargs):
2640
- self._lazy_init()
2641
- return super().__call__(*args, **kwargs)
2642
-
2643
- def add(self, *args, **kwargs):
2644
- self._lazy_init()
2645
- return super().add(*args, **kwargs)
2646
-
2647
- def extend(self, *args, **kwargs):
2648
- self._lazy_init()
2649
- return super().extend(*args, **kwargs)
2650
-
2651
- def known_properties(self):
2652
- self._lazy_init()
2653
- return super().known_properties()
2654
-
2655
- def _lazy_init(self):
2656
- if self._finalzed:
2657
- return
2658
-
2659
- parent = self._parent
2660
- name = self._name
2661
- use_graph_index = self._model._config.get("use_graph_index", USE_GRAPH_INDEX)
2662
-
2663
- if not use_graph_index and name not in parent._imported:
2664
- if self._parent._parent._parent._auto_import:
2665
- with Spinner(f"Creating stream for {self.fqname()}", f"Stream for {self.fqname()} created successfully"):
2666
- db_name = parent._parent._name
2667
- schema_name = parent._name
2668
- self._model._client.resources.create_import_stream(ImportSourceTable(db_name, schema_name, name), self._model.name)
2669
- print("")
2670
- parent._imported.add(name)
2671
- else:
2672
- imports = self._model._client.resources.list_imports(model=self._model.name)
2673
- for item in imports:
2674
- cur_name = item["name"].lower().split(".")[-1]
2675
- parent._imported.add(cur_name)
2676
- if name not in parent._imported:
2677
- exception = SnowflakeImportMissingException(runtime_env.get_source(), self.fqname(), self._model.name)
2678
- raise exception from None
2679
-
2680
- parent._fetch_info()
2681
- self._finalize()
2682
-
2683
- def _finalize(self):
2684
- if self._finalzed:
2685
- return
2686
-
2687
- self._finalzed = True
2688
- self._schema = self._parent._table_info[self._name]
2689
-
2690
- # Set the relation name to the sanitized version of the fully qualified name
2691
- relation_name = to_fqn_relation_name(self.fqname())
2692
-
2693
- model:dsl.Graph = self._model
2694
- edb = getattr(std.rel, relation_name)
2695
- edb._rel.parents.append(m.Builtins.EDB)
2696
- id_rel = getattr(std.rel, f"{relation_name}_pyrel_id")
2697
-
2698
- with model.rule(globalize=True, source=self._source):
2699
- id, val = dsl.create_vars(2)
2700
- edb(dsl.Symbol("METADATA$ROW_ID"), id, val)
2701
- std.rel.SHA1(id)
2702
- id_rel.add(id)
2703
-
2704
- with model.rule(dynamic=True, globalize=True, source=self._source):
2705
- prop, id, val = dsl.create_vars(3)
2706
- id_rel(id)
2707
- std.rel.SHA1(id)
2708
- self.add(snowflake_id=id)
2709
-
2710
- for prop, prop_type in self._schema["columns"].items():
2711
- _prop = prop
2712
- if _prop.startswith("_"):
2713
- _prop = "col" + prop
2714
-
2715
- prop_ident = sanitize_identifier(_prop.lower())
2716
-
2717
- with model.rule(dynamic=True, globalize=True, source=self._source):
2718
- id, val = dsl.create_vars(2)
2719
- edb(dsl.Symbol(prop), id, val)
2720
- std.rel.SHA1(id)
2721
- _prop = getattr(self, prop_ident)
2722
- if not _prop:
2723
- raise ValueError(f"Property {_prop} couldn't be accessed on {self.fqname()}")
2724
- if _prop.is_multi_valued:
2725
- inst = self(snowflake_id=id)
2726
- getattr(inst, prop_ident).add(val)
2727
- else:
2728
- self(snowflake_id=id).set(**{prop_ident: val})
2729
-
2730
- # Because we're bypassing a bunch of the normal Type.add machinery here,
2731
- # we need to manually account for the case where people are using value types.
2732
- def wrapped(x):
2733
- if not model._config.get("compiler.use_value_types", False):
2734
- return x
2735
- other_id = dsl.create_var()
2736
- model._action(dsl.build.construct(self._type, [x, other_id]))
2737
- return other_id
2738
-
2739
- # new UInt128 schema mapping rules
2740
- with model.rule(dynamic=True, globalize=True, source=self._source):
2741
- id = dsl.create_var()
2742
- # This will generate an arity mismatch warning when used with the old SHA-1 Data Streams.
2743
- # Ideally we have the `@no_diagnostics(:ARITY_MISMATCH)` attribute on the relation using
2744
- # the METADATA$KEY column but that ended up being a more involved change then expected
2745
- # for avoiding a non-blocking warning
2746
- edb(dsl.Symbol("METADATA$KEY"), id)
2747
- std.rel.UInt128(id)
2748
- self.add(wrapped(id), snowflake_id=id)
2749
-
2750
- for prop, prop_type in self._schema["columns"].items():
2751
- _prop = prop
2752
- if _prop.startswith("_"):
2753
- _prop = "col" + prop
2754
-
2755
- prop_ident = sanitize_identifier(_prop.lower())
2756
- with model.rule(dynamic=True, globalize=True, source=self._source):
2757
- id, val = dsl.create_vars(2)
2758
- edb(dsl.Symbol(prop), id, val)
2759
- std.rel.UInt128(id)
2760
- _prop = getattr(self, prop_ident)
2761
- if not _prop:
2762
- raise ValueError(f"Property {_prop} couldn't be accessed on {self.fqname()}")
2763
- if _prop.is_multi_valued:
2764
- inst = self(id)
2765
- getattr(inst, prop_ident).add(val)
2766
- else:
2767
- model._check_property(_prop._prop)
2768
- raw_relation = getattr(std.rel, prop_ident)
2769
- dsl.tag(raw_relation, dsl.Builtins.FunctionAnnotation)
2770
- raw_relation.add(wrapped(id), val)
2771
-
2772
- def namespace(self):
2773
- return f"{self._parent._parent._name}.{self._parent._name}"
2774
-
2775
- def fqname(self):
2776
- return f"{self.namespace()}.{self._name}"
2777
-
2778
- def describe(self, **kwargs):
2779
- model = self._model
2780
- for k, v in kwargs.items():
2781
- if v is PrimaryKey:
2782
- self._schema["pks"] = [k]
2783
- elif isinstance(v, tuple):
2784
- (table, name) = v
2785
- if isinstance(table, SnowflakeTable):
2786
- fk_table = table
2787
- pk = fk_table._schema["pks"]
2788
- with model.rule():
2789
- inst = fk_table()
2790
- me = self()
2791
- getattr(inst, pk[0]) == getattr(me, k)
2792
- if getattr(self, name).is_multi_valued:
2793
- getattr(me, name).add(inst)
2794
- else:
2795
- me.set(**{name: inst})
2796
- else:
2797
- raise ValueError(f"Invalid foreign key {v}")
2798
- else:
2799
- raise ValueError(f"Invalid column {k}={v}")
2800
- return self
2801
-
2802
- class Provider(ProviderBase):
2803
- def __init__(
2804
- self,
2805
- profile: str | None = None,
2806
- config: Config | None = None,
2807
- resources: Resources | None = None,
2808
- generation: Generation | None = None,
2809
- ):
2810
- if resources:
2811
- self.resources = resources
2812
- else:
2813
- from .resources_factory import create_resources_instance
2814
- self.resources = create_resources_instance(
2815
- config=config,
2816
- profile=profile,
2817
- generation=generation or Generation.V0,
2818
- dry_run=False,
2819
- language="rel",
2820
- )
2821
-
2822
- def list_streams(self, model:str):
2823
- return self.resources.list_imports(model=model)
2824
-
2825
- def create_streams(self, sources:List[str], model:str, force=False):
2826
- if not self.resources.get_graph(model):
2827
- self.resources.create_graph(model)
2828
- def parse_source(raw:str):
2829
- parser = IdentityParser(raw)
2830
- assert parser.is_complete, "Snowflake table imports must be in `database.schema.table` format"
2831
- return ImportSourceTable(*parser.to_list())
2832
- for source in sources:
2833
- source_table = parse_source(source)
2834
- try:
2835
- with Spinner(f"Creating stream for {source_table.name}", f"Stream for {source_table.name} created successfully"):
2836
- if force:
2837
- self.resources.delete_import(source_table.name, model, True)
2838
- self.resources.create_import_stream(source_table, model)
2839
- except Exception as e:
2840
- if "stream already exists" in f"{e}":
2841
- raise Exception(f"\n\nStream'{source_table.name.upper()}' already exists.")
2842
- elif "engine not found" in f"{e}":
2843
- raise Exception("\n\nNo engines found in a READY state. Please use `engines:create` to create an engine that will be used to initialize the target relation.")
2844
- else:
2845
- raise e
2846
- with Spinner("Waiting for imports to complete", "Imports complete"):
2847
- self.resources.poll_imports(sources, model)
2848
-
2849
- def delete_stream(self, stream_id: str, model: str):
2850
- return self.resources.delete_import(stream_id, model)
2851
-
2852
- def sql(self, query:str, params:List[Any]=[], format:Literal["list", "pandas", "polars", "lazy"]="list"):
2853
- # note: default format cannot be pandas because .to_pandas() only works on SELECT queries
2854
- result = self.resources._exec(query, params, raw=True, help=False)
2855
- if format == "lazy":
2856
- return cast(snowflake.snowpark.DataFrame, result)
2857
- elif format == "list":
2858
- return cast(list, result.collect())
2859
- elif format == "pandas":
2860
- import pandas as pd
2861
- try:
2862
- # use to_pandas for SELECT queries
2863
- return cast(pd.DataFrame, result.to_pandas())
2864
- except Exception:
2865
- # handle non-SELECT queries like SHOW
2866
- return pd.DataFrame(result.collect())
2867
- elif format == "polars":
2868
- import polars as pl # type: ignore
2869
- return pl.DataFrame(
2870
- [row.as_dict() for row in result.collect()],
2871
- orient="row",
2872
- strict=False,
2873
- infer_schema_length=None
2874
- )
2875
- else:
2876
- raise ValueError(f"Invalid format {format}. Should be one of 'list', 'pandas', 'polars', 'lazy'")
2877
-
2878
- def activate(self):
2879
- with Spinner("Activating RelationalAI app...", "RelationalAI app activated"):
2880
- self.sql("CALL RELATIONALAI.APP.ACTIVATE();")
2881
-
2882
- def deactivate(self):
2883
- with Spinner("Deactivating RelationalAI app...", "RelationalAI app deactivated"):
2884
- self.sql("CALL RELATIONALAI.APP.DEACTIVATE();")
2885
-
2886
- def drop_service(self):
2887
- warnings.warn(
2888
- "The drop_service method has been deprecated in favor of deactivate",
2889
- DeprecationWarning,
2890
- stacklevel=2,
2891
- )
2892
- self.deactivate()
2893
-
2894
- def resume_service(self):
2895
- warnings.warn(
2896
- "The resume_service method has been deprecated in favor of activate",
2897
- DeprecationWarning,
2898
- stacklevel=2,
2899
- )
2900
- self.activate()
2901
-
2902
-
2903
- #--------------------------------------------------
2904
- # SnowflakeClient
2905
- #--------------------------------------------------
2906
- class SnowflakeClient(Client):
2907
- def create_database(self, isolated=True, nowait_durable=True, headers: Dict | None = None):
2908
- from relationalai.tools.cli_helpers import validate_engine_name
2909
-
2910
- assert isinstance(self.resources, Resources)
2911
-
2912
- if self.last_database_version == len(self.resources.sources):
2913
- return
2914
-
2915
- model = self._source_database
2916
- app_name = self.resources.get_app_name()
2917
- engine_name = self.resources.get_default_engine_name()
2918
- engine_size = self.resources.config.get_default_engine_size()
2919
-
2920
- # Validate engine name
2921
- is_name_valid, _ = validate_engine_name(engine_name)
2922
- if not is_name_valid:
2923
- raise EngineNameValidationException(engine_name)
2924
-
2925
- # Validate engine size
2926
- valid_sizes = self.resources.get_engine_sizes()
2927
- if not isinstance(engine_size, str) or engine_size not in valid_sizes:
2928
- raise InvalidEngineSizeError(str(engine_size), valid_sizes)
2929
-
2930
- program_span_id = debugging.get_program_span_id()
2931
-
2932
- query_attrs_dict = json.loads(headers.get("X-Query-Attributes", "{}")) if headers else {}
2933
- with debugging.span("poll_use_index", sources=self.resources.sources, model=model, engine=engine_name, **query_attrs_dict):
2934
- self.maybe_poll_use_index(
2935
- app_name=app_name,
2936
- sources=self.resources.sources,
2937
- model=model,
2938
- engine_name=engine_name,
2939
- engine_size=engine_size,
2940
- program_span_id=program_span_id,
2941
- headers=headers
2942
- )
2943
-
2944
- self.last_database_version = len(self.resources.sources)
2945
- self._manage_packages()
2946
-
2947
- if isolated and not self.keep_model:
2948
- atexit.register(self.delete_database)
2949
-
2950
- def maybe_poll_use_index(
2951
- self,
2952
- app_name: str,
2953
- sources: Iterable[str],
2954
- model: str,
2955
- engine_name: str,
2956
- engine_size: str | None = None,
2957
- program_span_id: str | None = None,
2958
- headers: Dict | None = None,
2959
- ):
2960
- """Only call _poll_use_index if there are sources to process."""
2961
- assert isinstance(self.resources, Resources)
2962
- return self.resources.maybe_poll_use_index(
2963
- app_name=app_name,
2964
- sources=sources,
2965
- model=model,
2966
- engine_name=engine_name,
2967
- engine_size=engine_size,
2968
- program_span_id=program_span_id,
2969
- headers=headers
2970
- )
2971
-
2972
-
2973
- #--------------------------------------------------
2974
- # Graph
2975
- #--------------------------------------------------
2976
-
2977
- def Graph(
2978
- name,
2979
- *,
2980
- profile: str | None = None,
2981
- config: Config,
2982
- dry_run: bool = False,
2983
- isolated: bool = True,
2984
- connection: Session | None = None,
2985
- keep_model: bool = False,
2986
- nowait_durable: bool = True,
2987
- format: str = "default",
2988
- ):
2989
- from .resources_factory import create_resources_instance
2990
- from .use_index_resources import UseIndexResources
2991
-
2992
- use_graph_index = config.get("use_graph_index", USE_GRAPH_INDEX)
2993
- use_monotype_operators = config.get("compiler.use_monotype_operators", False)
2994
-
2995
- # Create resources instance using factory
2996
- resources = create_resources_instance(
2997
- config=config,
2998
- profile=profile,
2999
- connection=connection,
3000
- generation=Generation.V0,
3001
- dry_run=False, # Resources instance dry_run is separate from client dry_run
3002
- language="rel",
3003
- )
3004
-
3005
- # Determine client class based on resources type and config
3006
- # SnowflakeClient is used for resources that support use_index functionality
3007
- if use_graph_index or isinstance(resources, UseIndexResources):
3008
- client_class = SnowflakeClient
3009
- else:
3010
- client_class = Client
3011
-
3012
- client = client_class(
3013
- resources,
3014
- rel.Compiler(config),
3015
- name,
3016
- config,
3017
- dry_run=dry_run,
3018
- isolated=isolated,
3019
- keep_model=keep_model,
3020
- nowait_durable=nowait_durable
3021
- )
3022
- base_rel = """
3023
- @inline
3024
- def make_identity(x..., z):
3025
- rel_primitive_hash_tuple_uint128(x..., z)
3026
-
3027
- @inline
3028
- def pyrel_default({F}, c, k..., v):
3029
- F(k..., v) or (not F(k..., _) and v = c)
3030
-
3031
- @inline
3032
- def pyrel_unwrap(x in UInt128, y): y = x
3033
-
3034
- @inline
3035
- def pyrel_dates_period_days(x in Date, y in Date, z in Int):
3036
- exists((u) | dates_period_days(x, y , u) and u = ::std::common::^Day[z])
3037
-
3038
- @inline
3039
- def pyrel_datetimes_period_milliseconds(x in DateTime, y in DateTime, z in Int):
3040
- exists((u) | datetimes_period_milliseconds(x, y , u) and u = ^Millisecond[z])
3041
-
3042
- @inline
3043
- def pyrel_bool_filter(a, b, {F}, z): { z = if_then_else[F(a, b), boolean_true, boolean_false] }
3044
-
3045
- @inline
3046
- def pyrel_strftime(v, fmt, tz in String, s in String):
3047
- (Date(v) and s = format_date[v, fmt])
3048
- or (DateTime(v) and s = format_datetime[v, fmt, tz])
3049
-
3050
- @inline
3051
- def pyrel_regex_match_all(pattern, string in String, pos in Int, offset in Int, match in String):
3052
- regex_match_all(pattern, string, offset, match) and offset >= pos
3053
-
3054
- @inline
3055
- def pyrel_regex_match(pattern, string in String, pos in Int, offset in Int, match in String):
3056
- pyrel_regex_match_all(pattern, string, pos, offset, match) and offset = pos
3057
-
3058
- @inline
3059
- def pyrel_regex_search(pattern, string in String, pos in Int, offset in Int, match in String):
3060
- enumerate(pyrel_regex_match_all[pattern, string, pos], 1, offset, match)
3061
-
3062
- @inline
3063
- def pyrel_regex_sub(pattern, repl in String, string in String, result in String):
3064
- string_replace_multiple(string, {(last[regex_match_all[pattern, string]], repl)}, result)
3065
-
3066
- @inline
3067
- def pyrel_capture_group(regex in Pattern, string in String, pos in Int, index, match in String):
3068
- (Integer(index) and capture_group_by_index(regex, string, pos, index, match)) or
3069
- (String(index) and capture_group_by_name(regex, string, pos, index, match))
3070
-
3071
- declare __resource
3072
- declare __compiled_patterns
3073
- """
3074
- if use_monotype_operators:
3075
- base_rel += """
3076
-
3077
- // use monotyped operators
3078
- from ::std::monotype import +, -, *, /, <, <=, >, >=
3079
- """
3080
- pyrel_base = dsl.build.raw_task(base_rel)
3081
- debugging.set_source(pyrel_base)
3082
- client.install("pyrel_base", pyrel_base)
3083
- return dsl.Graph(client, name, format=format)