relationalai 0.13.0__py3-none-any.whl → 0.13.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- relationalai/__init__.py +1 -256
- relationalai/config/__init__.py +56 -0
- relationalai/config/config.py +289 -0
- relationalai/config/config_fields.py +86 -0
- relationalai/config/connections/__init__.py +46 -0
- relationalai/config/connections/base.py +23 -0
- relationalai/config/connections/duckdb.py +29 -0
- relationalai/config/connections/snowflake.py +243 -0
- relationalai/config/external/__init__.py +17 -0
- relationalai/config/external/dbt_converter.py +101 -0
- relationalai/config/external/dbt_models.py +93 -0
- relationalai/config/external/snowflake_converter.py +41 -0
- relationalai/config/external/snowflake_models.py +85 -0
- relationalai/config/external/utils.py +19 -0
- relationalai/semantics/__init__.py +146 -22
- relationalai/semantics/backends/lqp/annotations.py +11 -0
- relationalai/semantics/backends/sql/sql_compiler.py +327 -0
- relationalai/semantics/frontend/base.py +1707 -0
- relationalai/semantics/frontend/core.py +179 -0
- relationalai/semantics/frontend/front_compiler.py +1313 -0
- relationalai/semantics/frontend/pprint.py +408 -0
- relationalai/semantics/metamodel/__init__.py +6 -40
- relationalai/semantics/metamodel/builtins.py +205 -771
- relationalai/semantics/metamodel/metamodel.py +437 -0
- relationalai/semantics/metamodel/metamodel_analyzer.py +519 -0
- relationalai/semantics/metamodel/pprint.py +412 -0
- relationalai/semantics/metamodel/rewriter.py +266 -0
- relationalai/semantics/metamodel/typer.py +1378 -0
- relationalai/semantics/std/__init__.py +60 -40
- relationalai/semantics/std/aggregates.py +149 -0
- relationalai/semantics/std/common.py +44 -0
- relationalai/semantics/std/constraints.py +37 -43
- relationalai/semantics/std/datetime.py +246 -135
- relationalai/semantics/std/decimals.py +45 -52
- relationalai/semantics/std/floats.py +13 -5
- relationalai/semantics/std/integers.py +26 -11
- relationalai/semantics/std/math.py +183 -112
- relationalai/semantics/std/numbers.py +86 -0
- relationalai/semantics/std/re.py +80 -62
- relationalai/semantics/std/strings.py +117 -60
- relationalai/shims/executor.py +147 -0
- relationalai/shims/helpers.py +126 -0
- relationalai/shims/hoister.py +221 -0
- relationalai/shims/mm2v0.py +1290 -0
- relationalai/tools/cli/__init__.py +6 -0
- relationalai/tools/cli/cli.py +90 -0
- relationalai/tools/cli/components/__init__.py +5 -0
- relationalai/tools/cli/components/progress_reader.py +1524 -0
- relationalai/tools/cli/components/utils.py +58 -0
- relationalai/tools/cli/config_template.py +45 -0
- relationalai/tools/cli/dev.py +19 -0
- relationalai/tools/debugger.py +289 -183
- relationalai/tools/typer_debugger.py +93 -0
- relationalai/util/dataclasses.py +43 -0
- relationalai/util/docutils.py +40 -0
- relationalai/util/error.py +199 -0
- relationalai/util/format.py +48 -106
- relationalai/util/naming.py +145 -0
- relationalai/util/python.py +35 -0
- relationalai/util/runtime.py +156 -0
- relationalai/util/schema.py +197 -0
- relationalai/util/source.py +185 -0
- relationalai/util/structures.py +163 -0
- relationalai/util/tracing.py +261 -0
- relationalai-0.13.0.dev0.dist-info/METADATA +46 -0
- relationalai-0.13.0.dev0.dist-info/RECORD +488 -0
- relationalai-0.13.0.dev0.dist-info/WHEEL +5 -0
- relationalai-0.13.0.dev0.dist-info/entry_points.txt +3 -0
- relationalai-0.13.0.dev0.dist-info/top_level.txt +2 -0
- v0/relationalai/__init__.py +216 -0
- v0/relationalai/clients/__init__.py +5 -0
- v0/relationalai/clients/azure.py +477 -0
- v0/relationalai/clients/client.py +912 -0
- v0/relationalai/clients/config.py +673 -0
- v0/relationalai/clients/direct_access_client.py +118 -0
- v0/relationalai/clients/hash_util.py +31 -0
- v0/relationalai/clients/local.py +571 -0
- v0/relationalai/clients/profile_polling.py +73 -0
- v0/relationalai/clients/result_helpers.py +420 -0
- v0/relationalai/clients/snowflake.py +3869 -0
- v0/relationalai/clients/types.py +113 -0
- v0/relationalai/clients/use_index_poller.py +980 -0
- v0/relationalai/clients/util.py +356 -0
- v0/relationalai/debugging.py +389 -0
- v0/relationalai/dsl.py +1749 -0
- v0/relationalai/early_access/builder/__init__.py +30 -0
- v0/relationalai/early_access/builder/builder/__init__.py +35 -0
- v0/relationalai/early_access/builder/snowflake/__init__.py +12 -0
- v0/relationalai/early_access/builder/std/__init__.py +25 -0
- v0/relationalai/early_access/builder/std/decimals/__init__.py +12 -0
- v0/relationalai/early_access/builder/std/integers/__init__.py +12 -0
- v0/relationalai/early_access/builder/std/math/__init__.py +12 -0
- v0/relationalai/early_access/builder/std/strings/__init__.py +14 -0
- v0/relationalai/early_access/devtools/__init__.py +12 -0
- v0/relationalai/early_access/devtools/benchmark_lqp/__init__.py +12 -0
- v0/relationalai/early_access/devtools/extract_lqp/__init__.py +12 -0
- v0/relationalai/early_access/dsl/adapters/orm/adapter_qb.py +427 -0
- v0/relationalai/early_access/dsl/adapters/orm/parser.py +636 -0
- v0/relationalai/early_access/dsl/adapters/owl/adapter.py +176 -0
- v0/relationalai/early_access/dsl/adapters/owl/parser.py +160 -0
- v0/relationalai/early_access/dsl/bindings/common.py +402 -0
- v0/relationalai/early_access/dsl/bindings/csv.py +170 -0
- v0/relationalai/early_access/dsl/bindings/legacy/binding_models.py +143 -0
- v0/relationalai/early_access/dsl/bindings/snowflake.py +64 -0
- v0/relationalai/early_access/dsl/codegen/binder.py +411 -0
- v0/relationalai/early_access/dsl/codegen/common.py +79 -0
- v0/relationalai/early_access/dsl/codegen/helpers.py +23 -0
- v0/relationalai/early_access/dsl/codegen/relations.py +700 -0
- v0/relationalai/early_access/dsl/codegen/weaver.py +417 -0
- v0/relationalai/early_access/dsl/core/builders/__init__.py +47 -0
- v0/relationalai/early_access/dsl/core/builders/logic.py +19 -0
- v0/relationalai/early_access/dsl/core/builders/scalar_constraint.py +11 -0
- v0/relationalai/early_access/dsl/core/constraints/predicate/atomic.py +455 -0
- v0/relationalai/early_access/dsl/core/constraints/predicate/universal.py +73 -0
- v0/relationalai/early_access/dsl/core/constraints/scalar.py +310 -0
- v0/relationalai/early_access/dsl/core/context.py +13 -0
- v0/relationalai/early_access/dsl/core/cset.py +132 -0
- v0/relationalai/early_access/dsl/core/exprs/__init__.py +116 -0
- v0/relationalai/early_access/dsl/core/exprs/relational.py +18 -0
- v0/relationalai/early_access/dsl/core/exprs/scalar.py +412 -0
- v0/relationalai/early_access/dsl/core/instances.py +44 -0
- v0/relationalai/early_access/dsl/core/logic/__init__.py +193 -0
- v0/relationalai/early_access/dsl/core/logic/aggregation.py +98 -0
- v0/relationalai/early_access/dsl/core/logic/exists.py +223 -0
- v0/relationalai/early_access/dsl/core/logic/helper.py +163 -0
- v0/relationalai/early_access/dsl/core/namespaces.py +32 -0
- v0/relationalai/early_access/dsl/core/relations.py +276 -0
- v0/relationalai/early_access/dsl/core/rules.py +112 -0
- v0/relationalai/early_access/dsl/core/std/__init__.py +45 -0
- v0/relationalai/early_access/dsl/core/temporal/recall.py +6 -0
- v0/relationalai/early_access/dsl/core/types/__init__.py +270 -0
- v0/relationalai/early_access/dsl/core/types/concepts.py +128 -0
- v0/relationalai/early_access/dsl/core/types/constrained/__init__.py +267 -0
- v0/relationalai/early_access/dsl/core/types/constrained/nominal.py +143 -0
- v0/relationalai/early_access/dsl/core/types/constrained/subtype.py +124 -0
- v0/relationalai/early_access/dsl/core/types/standard.py +92 -0
- v0/relationalai/early_access/dsl/core/types/unconstrained.py +50 -0
- v0/relationalai/early_access/dsl/core/types/variables.py +203 -0
- v0/relationalai/early_access/dsl/ir/compiler.py +318 -0
- v0/relationalai/early_access/dsl/ir/executor.py +260 -0
- v0/relationalai/early_access/dsl/ontologies/constraints.py +88 -0
- v0/relationalai/early_access/dsl/ontologies/export.py +30 -0
- v0/relationalai/early_access/dsl/ontologies/models.py +453 -0
- v0/relationalai/early_access/dsl/ontologies/python_printer.py +303 -0
- v0/relationalai/early_access/dsl/ontologies/readings.py +60 -0
- v0/relationalai/early_access/dsl/ontologies/relationships.py +322 -0
- v0/relationalai/early_access/dsl/ontologies/roles.py +87 -0
- v0/relationalai/early_access/dsl/ontologies/subtyping.py +55 -0
- v0/relationalai/early_access/dsl/orm/constraints.py +438 -0
- v0/relationalai/early_access/dsl/orm/measures/dimensions.py +200 -0
- v0/relationalai/early_access/dsl/orm/measures/initializer.py +16 -0
- v0/relationalai/early_access/dsl/orm/measures/measure_rules.py +275 -0
- v0/relationalai/early_access/dsl/orm/measures/measures.py +299 -0
- v0/relationalai/early_access/dsl/orm/measures/role_exprs.py +268 -0
- v0/relationalai/early_access/dsl/orm/models.py +256 -0
- v0/relationalai/early_access/dsl/orm/object_oriented_printer.py +344 -0
- v0/relationalai/early_access/dsl/orm/printer.py +469 -0
- v0/relationalai/early_access/dsl/orm/reasoners.py +480 -0
- v0/relationalai/early_access/dsl/orm/relations.py +19 -0
- v0/relationalai/early_access/dsl/orm/relationships.py +251 -0
- v0/relationalai/early_access/dsl/orm/types.py +42 -0
- v0/relationalai/early_access/dsl/orm/utils.py +79 -0
- v0/relationalai/early_access/dsl/orm/verb.py +204 -0
- v0/relationalai/early_access/dsl/physical_metadata/tables.py +133 -0
- v0/relationalai/early_access/dsl/relations.py +170 -0
- v0/relationalai/early_access/dsl/rulesets.py +69 -0
- v0/relationalai/early_access/dsl/schemas/__init__.py +450 -0
- v0/relationalai/early_access/dsl/schemas/builder.py +48 -0
- v0/relationalai/early_access/dsl/schemas/comp_names.py +51 -0
- v0/relationalai/early_access/dsl/schemas/components.py +203 -0
- v0/relationalai/early_access/dsl/schemas/contexts.py +156 -0
- v0/relationalai/early_access/dsl/schemas/exprs.py +89 -0
- v0/relationalai/early_access/dsl/schemas/fragments.py +464 -0
- v0/relationalai/early_access/dsl/serialization.py +79 -0
- v0/relationalai/early_access/dsl/serialize/exporter.py +163 -0
- v0/relationalai/early_access/dsl/snow/api.py +104 -0
- v0/relationalai/early_access/dsl/snow/common.py +76 -0
- v0/relationalai/early_access/dsl/state_mgmt/__init__.py +129 -0
- v0/relationalai/early_access/dsl/state_mgmt/state_charts.py +125 -0
- v0/relationalai/early_access/dsl/state_mgmt/transitions.py +130 -0
- v0/relationalai/early_access/dsl/types/__init__.py +40 -0
- v0/relationalai/early_access/dsl/types/concepts.py +12 -0
- v0/relationalai/early_access/dsl/types/entities.py +135 -0
- v0/relationalai/early_access/dsl/types/values.py +17 -0
- v0/relationalai/early_access/dsl/utils.py +102 -0
- v0/relationalai/early_access/graphs/__init__.py +13 -0
- v0/relationalai/early_access/lqp/__init__.py +12 -0
- v0/relationalai/early_access/lqp/compiler/__init__.py +12 -0
- v0/relationalai/early_access/lqp/constructors/__init__.py +18 -0
- v0/relationalai/early_access/lqp/executor/__init__.py +12 -0
- v0/relationalai/early_access/lqp/ir/__init__.py +12 -0
- v0/relationalai/early_access/lqp/passes/__init__.py +12 -0
- v0/relationalai/early_access/lqp/pragmas/__init__.py +12 -0
- v0/relationalai/early_access/lqp/primitives/__init__.py +12 -0
- v0/relationalai/early_access/lqp/types/__init__.py +12 -0
- v0/relationalai/early_access/lqp/utils/__init__.py +12 -0
- v0/relationalai/early_access/lqp/validators/__init__.py +12 -0
- v0/relationalai/early_access/metamodel/__init__.py +58 -0
- v0/relationalai/early_access/metamodel/builtins/__init__.py +12 -0
- v0/relationalai/early_access/metamodel/compiler/__init__.py +12 -0
- v0/relationalai/early_access/metamodel/dependency/__init__.py +12 -0
- v0/relationalai/early_access/metamodel/factory/__init__.py +17 -0
- v0/relationalai/early_access/metamodel/helpers/__init__.py +12 -0
- v0/relationalai/early_access/metamodel/ir/__init__.py +14 -0
- v0/relationalai/early_access/metamodel/rewrite/__init__.py +7 -0
- v0/relationalai/early_access/metamodel/typer/__init__.py +3 -0
- v0/relationalai/early_access/metamodel/typer/typer/__init__.py +12 -0
- v0/relationalai/early_access/metamodel/types/__init__.py +15 -0
- v0/relationalai/early_access/metamodel/util/__init__.py +15 -0
- v0/relationalai/early_access/metamodel/visitor/__init__.py +12 -0
- v0/relationalai/early_access/rel/__init__.py +12 -0
- v0/relationalai/early_access/rel/executor/__init__.py +12 -0
- v0/relationalai/early_access/rel/rel_utils/__init__.py +12 -0
- v0/relationalai/early_access/rel/rewrite/__init__.py +7 -0
- v0/relationalai/early_access/solvers/__init__.py +19 -0
- v0/relationalai/early_access/sql/__init__.py +11 -0
- v0/relationalai/early_access/sql/executor/__init__.py +3 -0
- v0/relationalai/early_access/sql/rewrite/__init__.py +3 -0
- v0/relationalai/early_access/tests/logging/__init__.py +12 -0
- v0/relationalai/early_access/tests/test_snapshot_base/__init__.py +12 -0
- v0/relationalai/early_access/tests/utils/__init__.py +12 -0
- v0/relationalai/environments/__init__.py +35 -0
- v0/relationalai/environments/base.py +381 -0
- v0/relationalai/environments/colab.py +14 -0
- v0/relationalai/environments/generic.py +71 -0
- v0/relationalai/environments/ipython.py +68 -0
- v0/relationalai/environments/jupyter.py +9 -0
- v0/relationalai/environments/snowbook.py +169 -0
- v0/relationalai/errors.py +2455 -0
- v0/relationalai/experimental/SF.py +38 -0
- v0/relationalai/experimental/inspect.py +47 -0
- v0/relationalai/experimental/pathfinder/__init__.py +158 -0
- v0/relationalai/experimental/pathfinder/api.py +160 -0
- v0/relationalai/experimental/pathfinder/automaton.py +584 -0
- v0/relationalai/experimental/pathfinder/bridge.py +226 -0
- v0/relationalai/experimental/pathfinder/compiler.py +416 -0
- v0/relationalai/experimental/pathfinder/datalog.py +214 -0
- v0/relationalai/experimental/pathfinder/diagnostics.py +56 -0
- v0/relationalai/experimental/pathfinder/filter.py +236 -0
- v0/relationalai/experimental/pathfinder/glushkov.py +439 -0
- v0/relationalai/experimental/pathfinder/options.py +265 -0
- v0/relationalai/experimental/pathfinder/rpq.py +344 -0
- v0/relationalai/experimental/pathfinder/transition.py +200 -0
- v0/relationalai/experimental/pathfinder/utils.py +26 -0
- v0/relationalai/experimental/paths/api.py +143 -0
- v0/relationalai/experimental/paths/benchmarks/grid_graph.py +37 -0
- v0/relationalai/experimental/paths/examples/basic_example.py +40 -0
- v0/relationalai/experimental/paths/examples/minimal_engine_warmup.py +3 -0
- v0/relationalai/experimental/paths/examples/movie_example.py +77 -0
- v0/relationalai/experimental/paths/examples/paths_benchmark.py +115 -0
- v0/relationalai/experimental/paths/examples/paths_example.py +116 -0
- v0/relationalai/experimental/paths/examples/pattern_to_automaton.py +28 -0
- v0/relationalai/experimental/paths/find_paths_via_automaton.py +85 -0
- v0/relationalai/experimental/paths/graph.py +185 -0
- v0/relationalai/experimental/paths/path_algorithms/find_paths.py +280 -0
- v0/relationalai/experimental/paths/path_algorithms/one_sided_ball_repetition.py +26 -0
- v0/relationalai/experimental/paths/path_algorithms/one_sided_ball_upto.py +111 -0
- v0/relationalai/experimental/paths/path_algorithms/single.py +59 -0
- v0/relationalai/experimental/paths/path_algorithms/two_sided_balls_repetition.py +39 -0
- v0/relationalai/experimental/paths/path_algorithms/two_sided_balls_upto.py +103 -0
- v0/relationalai/experimental/paths/path_algorithms/usp-old.py +130 -0
- v0/relationalai/experimental/paths/path_algorithms/usp-tuple.py +183 -0
- v0/relationalai/experimental/paths/path_algorithms/usp.py +150 -0
- v0/relationalai/experimental/paths/product_graph.py +93 -0
- v0/relationalai/experimental/paths/rpq/automaton.py +584 -0
- v0/relationalai/experimental/paths/rpq/diagnostics.py +56 -0
- v0/relationalai/experimental/paths/rpq/rpq.py +378 -0
- v0/relationalai/experimental/paths/tests/tests_limit_sp_max_length.py +90 -0
- v0/relationalai/experimental/paths/tests/tests_limit_sp_multiple.py +119 -0
- v0/relationalai/experimental/paths/tests/tests_limit_sp_single.py +104 -0
- v0/relationalai/experimental/paths/tests/tests_limit_walks_multiple.py +113 -0
- v0/relationalai/experimental/paths/tests/tests_limit_walks_single.py +149 -0
- v0/relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_multiple.py +70 -0
- v0/relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_single.py +64 -0
- v0/relationalai/experimental/paths/tests/tests_one_sided_ball_upto_multiple.py +115 -0
- v0/relationalai/experimental/paths/tests/tests_one_sided_ball_upto_single.py +75 -0
- v0/relationalai/experimental/paths/tests/tests_single_paths.py +152 -0
- v0/relationalai/experimental/paths/tests/tests_single_walks.py +208 -0
- v0/relationalai/experimental/paths/tests/tests_single_walks_undirected.py +297 -0
- v0/relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_multiple.py +107 -0
- v0/relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_single.py +76 -0
- v0/relationalai/experimental/paths/tests/tests_two_sided_balls_upto_multiple.py +76 -0
- v0/relationalai/experimental/paths/tests/tests_two_sided_balls_upto_single.py +110 -0
- v0/relationalai/experimental/paths/tests/tests_usp_nsp_multiple.py +229 -0
- v0/relationalai/experimental/paths/tests/tests_usp_nsp_single.py +108 -0
- v0/relationalai/experimental/paths/tree_agg.py +168 -0
- v0/relationalai/experimental/paths/utilities/iterators.py +27 -0
- v0/relationalai/experimental/paths/utilities/prefix_sum.py +91 -0
- v0/relationalai/experimental/solvers.py +1087 -0
- v0/relationalai/loaders/__init__.py +0 -0
- v0/relationalai/loaders/csv.py +195 -0
- v0/relationalai/loaders/loader.py +177 -0
- v0/relationalai/loaders/types.py +23 -0
- v0/relationalai/rel_emitter.py +373 -0
- v0/relationalai/rel_utils.py +185 -0
- v0/relationalai/semantics/__init__.py +29 -0
- v0/relationalai/semantics/devtools/benchmark_lqp.py +536 -0
- v0/relationalai/semantics/devtools/compilation_manager.py +294 -0
- v0/relationalai/semantics/devtools/extract_lqp.py +110 -0
- v0/relationalai/semantics/internal/internal.py +3785 -0
- v0/relationalai/semantics/internal/snowflake.py +324 -0
- v0/relationalai/semantics/lqp/builtins.py +16 -0
- v0/relationalai/semantics/lqp/compiler.py +22 -0
- v0/relationalai/semantics/lqp/constructors.py +68 -0
- v0/relationalai/semantics/lqp/executor.py +469 -0
- v0/relationalai/semantics/lqp/intrinsics.py +24 -0
- v0/relationalai/semantics/lqp/model2lqp.py +839 -0
- v0/relationalai/semantics/lqp/passes.py +680 -0
- v0/relationalai/semantics/lqp/primitives.py +252 -0
- v0/relationalai/semantics/lqp/result_helpers.py +202 -0
- v0/relationalai/semantics/lqp/rewrite/annotate_constraints.py +57 -0
- v0/relationalai/semantics/lqp/rewrite/cdc.py +216 -0
- v0/relationalai/semantics/lqp/rewrite/extract_common.py +338 -0
- v0/relationalai/semantics/lqp/rewrite/extract_keys.py +449 -0
- v0/relationalai/semantics/lqp/rewrite/function_annotations.py +114 -0
- v0/relationalai/semantics/lqp/rewrite/functional_dependencies.py +314 -0
- v0/relationalai/semantics/lqp/rewrite/quantify_vars.py +296 -0
- v0/relationalai/semantics/lqp/rewrite/splinter.py +76 -0
- v0/relationalai/semantics/lqp/types.py +101 -0
- v0/relationalai/semantics/lqp/utils.py +160 -0
- v0/relationalai/semantics/lqp/validators.py +57 -0
- v0/relationalai/semantics/metamodel/__init__.py +40 -0
- v0/relationalai/semantics/metamodel/builtins.py +774 -0
- v0/relationalai/semantics/metamodel/compiler.py +133 -0
- v0/relationalai/semantics/metamodel/dependency.py +862 -0
- v0/relationalai/semantics/metamodel/executor.py +61 -0
- v0/relationalai/semantics/metamodel/factory.py +287 -0
- v0/relationalai/semantics/metamodel/helpers.py +361 -0
- v0/relationalai/semantics/metamodel/rewrite/discharge_constraints.py +39 -0
- v0/relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +210 -0
- v0/relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py +78 -0
- v0/relationalai/semantics/metamodel/rewrite/flatten.py +549 -0
- v0/relationalai/semantics/metamodel/rewrite/format_outputs.py +165 -0
- v0/relationalai/semantics/metamodel/typer/checker.py +353 -0
- v0/relationalai/semantics/metamodel/typer/typer.py +1395 -0
- v0/relationalai/semantics/metamodel/util.py +505 -0
- v0/relationalai/semantics/reasoners/__init__.py +10 -0
- v0/relationalai/semantics/reasoners/graph/__init__.py +37 -0
- v0/relationalai/semantics/reasoners/graph/core.py +9020 -0
- v0/relationalai/semantics/reasoners/optimization/__init__.py +68 -0
- v0/relationalai/semantics/reasoners/optimization/common.py +88 -0
- v0/relationalai/semantics/reasoners/optimization/solvers_dev.py +568 -0
- v0/relationalai/semantics/reasoners/optimization/solvers_pb.py +1163 -0
- v0/relationalai/semantics/rel/builtins.py +40 -0
- v0/relationalai/semantics/rel/compiler.py +989 -0
- v0/relationalai/semantics/rel/executor.py +359 -0
- v0/relationalai/semantics/rel/rel.py +482 -0
- v0/relationalai/semantics/rel/rel_utils.py +276 -0
- v0/relationalai/semantics/snowflake/__init__.py +3 -0
- v0/relationalai/semantics/sql/compiler.py +2503 -0
- v0/relationalai/semantics/sql/executor/duck_db.py +52 -0
- v0/relationalai/semantics/sql/executor/result_helpers.py +64 -0
- v0/relationalai/semantics/sql/executor/snowflake.py +145 -0
- v0/relationalai/semantics/sql/rewrite/denormalize.py +222 -0
- v0/relationalai/semantics/sql/rewrite/double_negation.py +49 -0
- v0/relationalai/semantics/sql/rewrite/recursive_union.py +127 -0
- v0/relationalai/semantics/sql/rewrite/sort_output_query.py +246 -0
- v0/relationalai/semantics/sql/sql.py +504 -0
- v0/relationalai/semantics/std/__init__.py +54 -0
- v0/relationalai/semantics/std/constraints.py +43 -0
- v0/relationalai/semantics/std/datetime.py +363 -0
- v0/relationalai/semantics/std/decimals.py +62 -0
- v0/relationalai/semantics/std/floats.py +7 -0
- v0/relationalai/semantics/std/integers.py +22 -0
- v0/relationalai/semantics/std/math.py +141 -0
- v0/relationalai/semantics/std/pragmas.py +11 -0
- v0/relationalai/semantics/std/re.py +83 -0
- v0/relationalai/semantics/std/std.py +14 -0
- v0/relationalai/semantics/std/strings.py +63 -0
- v0/relationalai/semantics/tests/__init__.py +0 -0
- v0/relationalai/semantics/tests/test_snapshot_abstract.py +143 -0
- v0/relationalai/semantics/tests/test_snapshot_base.py +9 -0
- v0/relationalai/semantics/tests/utils.py +46 -0
- v0/relationalai/std/__init__.py +70 -0
- v0/relationalai/tools/__init__.py +0 -0
- v0/relationalai/tools/cli.py +1940 -0
- v0/relationalai/tools/cli_controls.py +1826 -0
- v0/relationalai/tools/cli_helpers.py +390 -0
- v0/relationalai/tools/debugger.py +183 -0
- v0/relationalai/tools/debugger_client.py +109 -0
- v0/relationalai/tools/debugger_server.py +302 -0
- v0/relationalai/tools/dev.py +685 -0
- v0/relationalai/tools/qb_debugger.py +425 -0
- v0/relationalai/util/clean_up_databases.py +95 -0
- v0/relationalai/util/format.py +123 -0
- v0/relationalai/util/list_databases.py +9 -0
- v0/relationalai/util/otel_configuration.py +25 -0
- v0/relationalai/util/otel_handler.py +484 -0
- v0/relationalai/util/snowflake_handler.py +88 -0
- v0/relationalai/util/span_format_test.py +43 -0
- v0/relationalai/util/span_tracker.py +207 -0
- v0/relationalai/util/spans_file_handler.py +72 -0
- v0/relationalai/util/tracing_handler.py +34 -0
- frontend/debugger/dist/.gitignore +0 -2
- frontend/debugger/dist/assets/favicon-Dy0ZgA6N.png +0 -0
- frontend/debugger/dist/assets/index-Cssla-O7.js +0 -208
- frontend/debugger/dist/assets/index-DlHsYx1V.css +0 -9
- frontend/debugger/dist/index.html +0 -17
- relationalai/clients/__init__.py +0 -18
- relationalai/clients/client.py +0 -912
- relationalai/clients/config.py +0 -673
- relationalai/clients/direct_access_client.py +0 -118
- relationalai/clients/hash_util.py +0 -31
- relationalai/clients/local.py +0 -571
- relationalai/clients/profile_polling.py +0 -73
- relationalai/clients/resources/__init__.py +0 -8
- relationalai/clients/resources/azure/azure.py +0 -477
- relationalai/clients/resources/snowflake/__init__.py +0 -20
- relationalai/clients/resources/snowflake/cli_resources.py +0 -87
- relationalai/clients/resources/snowflake/direct_access_resources.py +0 -711
- relationalai/clients/resources/snowflake/engine_state_handlers.py +0 -309
- relationalai/clients/resources/snowflake/error_handlers.py +0 -199
- relationalai/clients/resources/snowflake/export_procedure.py.jinja +0 -249
- relationalai/clients/resources/snowflake/resources_factory.py +0 -99
- relationalai/clients/resources/snowflake/snowflake.py +0 -3083
- relationalai/clients/resources/snowflake/use_index_poller.py +0 -1011
- relationalai/clients/resources/snowflake/use_index_resources.py +0 -188
- relationalai/clients/resources/snowflake/util.py +0 -387
- relationalai/clients/result_helpers.py +0 -420
- relationalai/clients/types.py +0 -113
- relationalai/clients/util.py +0 -356
- relationalai/debugging.py +0 -389
- relationalai/dsl.py +0 -1749
- relationalai/early_access/builder/__init__.py +0 -30
- relationalai/early_access/builder/builder/__init__.py +0 -35
- relationalai/early_access/builder/snowflake/__init__.py +0 -12
- relationalai/early_access/builder/std/__init__.py +0 -25
- relationalai/early_access/builder/std/decimals/__init__.py +0 -12
- relationalai/early_access/builder/std/integers/__init__.py +0 -12
- relationalai/early_access/builder/std/math/__init__.py +0 -12
- relationalai/early_access/builder/std/strings/__init__.py +0 -14
- relationalai/early_access/devtools/__init__.py +0 -12
- relationalai/early_access/devtools/benchmark_lqp/__init__.py +0 -12
- relationalai/early_access/devtools/extract_lqp/__init__.py +0 -12
- relationalai/early_access/dsl/adapters/orm/adapter_qb.py +0 -427
- relationalai/early_access/dsl/adapters/orm/parser.py +0 -636
- relationalai/early_access/dsl/adapters/owl/adapter.py +0 -176
- relationalai/early_access/dsl/adapters/owl/parser.py +0 -160
- relationalai/early_access/dsl/bindings/common.py +0 -402
- relationalai/early_access/dsl/bindings/csv.py +0 -170
- relationalai/early_access/dsl/bindings/legacy/binding_models.py +0 -143
- relationalai/early_access/dsl/bindings/snowflake.py +0 -64
- relationalai/early_access/dsl/codegen/binder.py +0 -411
- relationalai/early_access/dsl/codegen/common.py +0 -79
- relationalai/early_access/dsl/codegen/helpers.py +0 -23
- relationalai/early_access/dsl/codegen/relations.py +0 -700
- relationalai/early_access/dsl/codegen/weaver.py +0 -417
- relationalai/early_access/dsl/core/builders/__init__.py +0 -47
- relationalai/early_access/dsl/core/builders/logic.py +0 -19
- relationalai/early_access/dsl/core/builders/scalar_constraint.py +0 -11
- relationalai/early_access/dsl/core/constraints/predicate/atomic.py +0 -455
- relationalai/early_access/dsl/core/constraints/predicate/universal.py +0 -73
- relationalai/early_access/dsl/core/constraints/scalar.py +0 -310
- relationalai/early_access/dsl/core/context.py +0 -13
- relationalai/early_access/dsl/core/cset.py +0 -132
- relationalai/early_access/dsl/core/exprs/__init__.py +0 -116
- relationalai/early_access/dsl/core/exprs/relational.py +0 -18
- relationalai/early_access/dsl/core/exprs/scalar.py +0 -412
- relationalai/early_access/dsl/core/instances.py +0 -44
- relationalai/early_access/dsl/core/logic/__init__.py +0 -193
- relationalai/early_access/dsl/core/logic/aggregation.py +0 -98
- relationalai/early_access/dsl/core/logic/exists.py +0 -223
- relationalai/early_access/dsl/core/logic/helper.py +0 -163
- relationalai/early_access/dsl/core/namespaces.py +0 -32
- relationalai/early_access/dsl/core/relations.py +0 -276
- relationalai/early_access/dsl/core/rules.py +0 -112
- relationalai/early_access/dsl/core/std/__init__.py +0 -45
- relationalai/early_access/dsl/core/temporal/recall.py +0 -6
- relationalai/early_access/dsl/core/types/__init__.py +0 -270
- relationalai/early_access/dsl/core/types/concepts.py +0 -128
- relationalai/early_access/dsl/core/types/constrained/__init__.py +0 -267
- relationalai/early_access/dsl/core/types/constrained/nominal.py +0 -143
- relationalai/early_access/dsl/core/types/constrained/subtype.py +0 -124
- relationalai/early_access/dsl/core/types/standard.py +0 -92
- relationalai/early_access/dsl/core/types/unconstrained.py +0 -50
- relationalai/early_access/dsl/core/types/variables.py +0 -203
- relationalai/early_access/dsl/ir/compiler.py +0 -318
- relationalai/early_access/dsl/ir/executor.py +0 -260
- relationalai/early_access/dsl/ontologies/constraints.py +0 -88
- relationalai/early_access/dsl/ontologies/export.py +0 -30
- relationalai/early_access/dsl/ontologies/models.py +0 -453
- relationalai/early_access/dsl/ontologies/python_printer.py +0 -303
- relationalai/early_access/dsl/ontologies/readings.py +0 -60
- relationalai/early_access/dsl/ontologies/relationships.py +0 -322
- relationalai/early_access/dsl/ontologies/roles.py +0 -87
- relationalai/early_access/dsl/ontologies/subtyping.py +0 -55
- relationalai/early_access/dsl/orm/constraints.py +0 -438
- relationalai/early_access/dsl/orm/measures/dimensions.py +0 -200
- relationalai/early_access/dsl/orm/measures/initializer.py +0 -16
- relationalai/early_access/dsl/orm/measures/measure_rules.py +0 -275
- relationalai/early_access/dsl/orm/measures/measures.py +0 -299
- relationalai/early_access/dsl/orm/measures/role_exprs.py +0 -268
- relationalai/early_access/dsl/orm/models.py +0 -256
- relationalai/early_access/dsl/orm/object_oriented_printer.py +0 -344
- relationalai/early_access/dsl/orm/printer.py +0 -469
- relationalai/early_access/dsl/orm/reasoners.py +0 -480
- relationalai/early_access/dsl/orm/relations.py +0 -19
- relationalai/early_access/dsl/orm/relationships.py +0 -251
- relationalai/early_access/dsl/orm/types.py +0 -42
- relationalai/early_access/dsl/orm/utils.py +0 -79
- relationalai/early_access/dsl/orm/verb.py +0 -204
- relationalai/early_access/dsl/physical_metadata/tables.py +0 -133
- relationalai/early_access/dsl/relations.py +0 -170
- relationalai/early_access/dsl/rulesets.py +0 -69
- relationalai/early_access/dsl/schemas/__init__.py +0 -450
- relationalai/early_access/dsl/schemas/builder.py +0 -48
- relationalai/early_access/dsl/schemas/comp_names.py +0 -51
- relationalai/early_access/dsl/schemas/components.py +0 -203
- relationalai/early_access/dsl/schemas/contexts.py +0 -156
- relationalai/early_access/dsl/schemas/exprs.py +0 -89
- relationalai/early_access/dsl/schemas/fragments.py +0 -464
- relationalai/early_access/dsl/serialization.py +0 -79
- relationalai/early_access/dsl/serialize/exporter.py +0 -163
- relationalai/early_access/dsl/snow/api.py +0 -105
- relationalai/early_access/dsl/snow/common.py +0 -76
- relationalai/early_access/dsl/state_mgmt/__init__.py +0 -129
- relationalai/early_access/dsl/state_mgmt/state_charts.py +0 -125
- relationalai/early_access/dsl/state_mgmt/transitions.py +0 -130
- relationalai/early_access/dsl/types/__init__.py +0 -40
- relationalai/early_access/dsl/types/concepts.py +0 -12
- relationalai/early_access/dsl/types/entities.py +0 -135
- relationalai/early_access/dsl/types/values.py +0 -17
- relationalai/early_access/dsl/utils.py +0 -102
- relationalai/early_access/graphs/__init__.py +0 -13
- relationalai/early_access/lqp/__init__.py +0 -12
- relationalai/early_access/lqp/compiler/__init__.py +0 -12
- relationalai/early_access/lqp/constructors/__init__.py +0 -18
- relationalai/early_access/lqp/executor/__init__.py +0 -12
- relationalai/early_access/lqp/ir/__init__.py +0 -12
- relationalai/early_access/lqp/passes/__init__.py +0 -12
- relationalai/early_access/lqp/pragmas/__init__.py +0 -12
- relationalai/early_access/lqp/primitives/__init__.py +0 -12
- relationalai/early_access/lqp/types/__init__.py +0 -12
- relationalai/early_access/lqp/utils/__init__.py +0 -12
- relationalai/early_access/lqp/validators/__init__.py +0 -12
- relationalai/early_access/metamodel/__init__.py +0 -58
- relationalai/early_access/metamodel/builtins/__init__.py +0 -12
- relationalai/early_access/metamodel/compiler/__init__.py +0 -12
- relationalai/early_access/metamodel/dependency/__init__.py +0 -12
- relationalai/early_access/metamodel/factory/__init__.py +0 -17
- relationalai/early_access/metamodel/helpers/__init__.py +0 -12
- relationalai/early_access/metamodel/ir/__init__.py +0 -14
- relationalai/early_access/metamodel/rewrite/__init__.py +0 -7
- relationalai/early_access/metamodel/typer/__init__.py +0 -3
- relationalai/early_access/metamodel/typer/typer/__init__.py +0 -12
- relationalai/early_access/metamodel/types/__init__.py +0 -15
- relationalai/early_access/metamodel/util/__init__.py +0 -15
- relationalai/early_access/metamodel/visitor/__init__.py +0 -12
- relationalai/early_access/rel/__init__.py +0 -12
- relationalai/early_access/rel/executor/__init__.py +0 -12
- relationalai/early_access/rel/rel_utils/__init__.py +0 -12
- relationalai/early_access/rel/rewrite/__init__.py +0 -7
- relationalai/early_access/solvers/__init__.py +0 -19
- relationalai/early_access/sql/__init__.py +0 -11
- relationalai/early_access/sql/executor/__init__.py +0 -3
- relationalai/early_access/sql/rewrite/__init__.py +0 -3
- relationalai/early_access/tests/logging/__init__.py +0 -12
- relationalai/early_access/tests/test_snapshot_base/__init__.py +0 -12
- relationalai/early_access/tests/utils/__init__.py +0 -12
- relationalai/environments/__init__.py +0 -35
- relationalai/environments/base.py +0 -381
- relationalai/environments/colab.py +0 -14
- relationalai/environments/generic.py +0 -71
- relationalai/environments/ipython.py +0 -68
- relationalai/environments/jupyter.py +0 -9
- relationalai/environments/snowbook.py +0 -169
- relationalai/errors.py +0 -2478
- relationalai/experimental/SF.py +0 -38
- relationalai/experimental/inspect.py +0 -47
- relationalai/experimental/pathfinder/__init__.py +0 -158
- relationalai/experimental/pathfinder/api.py +0 -160
- relationalai/experimental/pathfinder/automaton.py +0 -584
- relationalai/experimental/pathfinder/bridge.py +0 -226
- relationalai/experimental/pathfinder/compiler.py +0 -416
- relationalai/experimental/pathfinder/datalog.py +0 -214
- relationalai/experimental/pathfinder/diagnostics.py +0 -56
- relationalai/experimental/pathfinder/filter.py +0 -236
- relationalai/experimental/pathfinder/glushkov.py +0 -439
- relationalai/experimental/pathfinder/options.py +0 -265
- relationalai/experimental/pathfinder/pathfinder-v0.7.0.rel +0 -1951
- relationalai/experimental/pathfinder/rpq.py +0 -344
- relationalai/experimental/pathfinder/transition.py +0 -200
- relationalai/experimental/pathfinder/utils.py +0 -26
- relationalai/experimental/paths/README.md +0 -107
- relationalai/experimental/paths/api.py +0 -143
- relationalai/experimental/paths/benchmarks/grid_graph.py +0 -37
- relationalai/experimental/paths/code_organization.md +0 -2
- relationalai/experimental/paths/examples/Movies.ipynb +0 -16328
- relationalai/experimental/paths/examples/basic_example.py +0 -40
- relationalai/experimental/paths/examples/minimal_engine_warmup.py +0 -3
- relationalai/experimental/paths/examples/movie_example.py +0 -77
- relationalai/experimental/paths/examples/movies_data/actedin.csv +0 -193
- relationalai/experimental/paths/examples/movies_data/directed.csv +0 -45
- relationalai/experimental/paths/examples/movies_data/follows.csv +0 -7
- relationalai/experimental/paths/examples/movies_data/movies.csv +0 -39
- relationalai/experimental/paths/examples/movies_data/person.csv +0 -134
- relationalai/experimental/paths/examples/movies_data/produced.csv +0 -16
- relationalai/experimental/paths/examples/movies_data/ratings.csv +0 -10
- relationalai/experimental/paths/examples/movies_data/wrote.csv +0 -11
- relationalai/experimental/paths/examples/paths_benchmark.py +0 -115
- relationalai/experimental/paths/examples/paths_example.py +0 -116
- relationalai/experimental/paths/examples/pattern_to_automaton.py +0 -28
- relationalai/experimental/paths/find_paths_via_automaton.py +0 -85
- relationalai/experimental/paths/graph.py +0 -185
- relationalai/experimental/paths/path_algorithms/find_paths.py +0 -280
- relationalai/experimental/paths/path_algorithms/one_sided_ball_repetition.py +0 -26
- relationalai/experimental/paths/path_algorithms/one_sided_ball_upto.py +0 -111
- relationalai/experimental/paths/path_algorithms/single.py +0 -59
- relationalai/experimental/paths/path_algorithms/two_sided_balls_repetition.py +0 -39
- relationalai/experimental/paths/path_algorithms/two_sided_balls_upto.py +0 -103
- relationalai/experimental/paths/path_algorithms/usp-old.py +0 -130
- relationalai/experimental/paths/path_algorithms/usp-tuple.py +0 -183
- relationalai/experimental/paths/path_algorithms/usp.py +0 -150
- relationalai/experimental/paths/product_graph.py +0 -93
- relationalai/experimental/paths/rpq/automaton.py +0 -584
- relationalai/experimental/paths/rpq/diagnostics.py +0 -56
- relationalai/experimental/paths/rpq/rpq.py +0 -378
- relationalai/experimental/paths/tests/tests_limit_sp_max_length.py +0 -90
- relationalai/experimental/paths/tests/tests_limit_sp_multiple.py +0 -119
- relationalai/experimental/paths/tests/tests_limit_sp_single.py +0 -104
- relationalai/experimental/paths/tests/tests_limit_walks_multiple.py +0 -113
- relationalai/experimental/paths/tests/tests_limit_walks_single.py +0 -149
- relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_multiple.py +0 -70
- relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_single.py +0 -64
- relationalai/experimental/paths/tests/tests_one_sided_ball_upto_multiple.py +0 -115
- relationalai/experimental/paths/tests/tests_one_sided_ball_upto_single.py +0 -75
- relationalai/experimental/paths/tests/tests_single_paths.py +0 -152
- relationalai/experimental/paths/tests/tests_single_walks.py +0 -208
- relationalai/experimental/paths/tests/tests_single_walks_undirected.py +0 -297
- relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_multiple.py +0 -107
- relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_single.py +0 -76
- relationalai/experimental/paths/tests/tests_two_sided_balls_upto_multiple.py +0 -76
- relationalai/experimental/paths/tests/tests_two_sided_balls_upto_single.py +0 -110
- relationalai/experimental/paths/tests/tests_usp_nsp_multiple.py +0 -229
- relationalai/experimental/paths/tests/tests_usp_nsp_single.py +0 -108
- relationalai/experimental/paths/tree_agg.py +0 -168
- relationalai/experimental/paths/utilities/iterators.py +0 -27
- relationalai/experimental/paths/utilities/prefix_sum.py +0 -91
- relationalai/experimental/solvers.py +0 -1087
- relationalai/loaders/csv.py +0 -195
- relationalai/loaders/loader.py +0 -177
- relationalai/loaders/types.py +0 -23
- relationalai/rel_emitter.py +0 -373
- relationalai/rel_utils.py +0 -185
- relationalai/semantics/designs/query_builder/identify_by.md +0 -106
- relationalai/semantics/devtools/benchmark_lqp.py +0 -535
- relationalai/semantics/devtools/compilation_manager.py +0 -294
- relationalai/semantics/devtools/extract_lqp.py +0 -110
- relationalai/semantics/internal/internal.py +0 -3785
- relationalai/semantics/internal/snowflake.py +0 -325
- relationalai/semantics/lqp/README.md +0 -34
- relationalai/semantics/lqp/builtins.py +0 -16
- relationalai/semantics/lqp/compiler.py +0 -22
- relationalai/semantics/lqp/constructors.py +0 -68
- relationalai/semantics/lqp/executor.py +0 -469
- relationalai/semantics/lqp/intrinsics.py +0 -24
- relationalai/semantics/lqp/model2lqp.py +0 -877
- relationalai/semantics/lqp/passes.py +0 -680
- relationalai/semantics/lqp/primitives.py +0 -252
- relationalai/semantics/lqp/result_helpers.py +0 -202
- relationalai/semantics/lqp/rewrite/annotate_constraints.py +0 -57
- relationalai/semantics/lqp/rewrite/cdc.py +0 -216
- relationalai/semantics/lqp/rewrite/extract_common.py +0 -338
- relationalai/semantics/lqp/rewrite/extract_keys.py +0 -506
- relationalai/semantics/lqp/rewrite/function_annotations.py +0 -114
- relationalai/semantics/lqp/rewrite/functional_dependencies.py +0 -314
- relationalai/semantics/lqp/rewrite/quantify_vars.py +0 -296
- relationalai/semantics/lqp/rewrite/splinter.py +0 -76
- relationalai/semantics/lqp/types.py +0 -101
- relationalai/semantics/lqp/utils.py +0 -160
- relationalai/semantics/lqp/validators.py +0 -57
- relationalai/semantics/metamodel/compiler.py +0 -133
- relationalai/semantics/metamodel/dependency.py +0 -862
- relationalai/semantics/metamodel/executor.py +0 -61
- relationalai/semantics/metamodel/factory.py +0 -287
- relationalai/semantics/metamodel/helpers.py +0 -361
- relationalai/semantics/metamodel/rewrite/discharge_constraints.py +0 -39
- relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +0 -210
- relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py +0 -78
- relationalai/semantics/metamodel/rewrite/flatten.py +0 -554
- relationalai/semantics/metamodel/rewrite/format_outputs.py +0 -165
- relationalai/semantics/metamodel/typer/checker.py +0 -353
- relationalai/semantics/metamodel/typer/typer.py +0 -1395
- relationalai/semantics/metamodel/util.py +0 -506
- relationalai/semantics/reasoners/__init__.py +0 -10
- relationalai/semantics/reasoners/graph/README.md +0 -620
- relationalai/semantics/reasoners/graph/__init__.py +0 -37
- relationalai/semantics/reasoners/graph/core.py +0 -9019
- relationalai/semantics/reasoners/graph/design/beyond_demand_transform.md +0 -797
- relationalai/semantics/reasoners/graph/tests/README.md +0 -21
- relationalai/semantics/reasoners/optimization/__init__.py +0 -68
- relationalai/semantics/reasoners/optimization/common.py +0 -88
- relationalai/semantics/reasoners/optimization/solvers_dev.py +0 -568
- relationalai/semantics/reasoners/optimization/solvers_pb.py +0 -1163
- relationalai/semantics/rel/builtins.py +0 -40
- relationalai/semantics/rel/compiler.py +0 -989
- relationalai/semantics/rel/executor.py +0 -362
- relationalai/semantics/rel/rel.py +0 -482
- relationalai/semantics/rel/rel_utils.py +0 -276
- relationalai/semantics/snowflake/__init__.py +0 -3
- relationalai/semantics/sql/compiler.py +0 -2503
- relationalai/semantics/sql/executor/duck_db.py +0 -52
- relationalai/semantics/sql/executor/result_helpers.py +0 -64
- relationalai/semantics/sql/executor/snowflake.py +0 -149
- relationalai/semantics/sql/rewrite/denormalize.py +0 -222
- relationalai/semantics/sql/rewrite/double_negation.py +0 -49
- relationalai/semantics/sql/rewrite/recursive_union.py +0 -127
- relationalai/semantics/sql/rewrite/sort_output_query.py +0 -246
- relationalai/semantics/sql/sql.py +0 -504
- relationalai/semantics/std/pragmas.py +0 -11
- relationalai/semantics/std/std.py +0 -14
- relationalai/semantics/tests/test_snapshot_abstract.py +0 -143
- relationalai/semantics/tests/test_snapshot_base.py +0 -9
- relationalai/semantics/tests/utils.py +0 -46
- relationalai/std/__init__.py +0 -70
- relationalai/tools/cli.py +0 -1936
- relationalai/tools/cli_controls.py +0 -1826
- relationalai/tools/cli_helpers.py +0 -398
- relationalai/tools/debugger_client.py +0 -109
- relationalai/tools/debugger_server.py +0 -302
- relationalai/tools/dev.py +0 -685
- relationalai/tools/notes +0 -7
- relationalai/tools/qb_debugger.py +0 -425
- relationalai/util/clean_up_databases.py +0 -95
- relationalai/util/list_databases.py +0 -9
- relationalai/util/otel_configuration.py +0 -26
- relationalai/util/otel_handler.py +0 -484
- relationalai/util/snowflake_handler.py +0 -88
- relationalai/util/span_format_test.py +0 -43
- relationalai/util/span_tracker.py +0 -207
- relationalai/util/spans_file_handler.py +0 -72
- relationalai/util/tracing_handler.py +0 -34
- relationalai-0.13.0.dist-info/METADATA +0 -74
- relationalai-0.13.0.dist-info/RECORD +0 -458
- relationalai-0.13.0.dist-info/WHEEL +0 -4
- relationalai-0.13.0.dist-info/entry_points.txt +0 -3
- relationalai-0.13.0.dist-info/licenses/LICENSE +0 -202
- relationalai_test_util/__init__.py +0 -4
- relationalai_test_util/fixtures.py +0 -229
- relationalai_test_util/snapshot.py +0 -252
- relationalai_test_util/traceback.py +0 -118
- /relationalai/{analysis → semantics/frontend}/__init__.py +0 -0
- /relationalai/{auth/__init__.py → semantics/metamodel/metamodel_compiler.py} +0 -0
- /relationalai/{early_access → shims}/__init__.py +0 -0
- {relationalai/early_access/dsl/adapters → v0/relationalai/analysis}/__init__.py +0 -0
- {relationalai → v0/relationalai}/analysis/mechanistic.py +0 -0
- {relationalai → v0/relationalai}/analysis/whynot.py +0 -0
- {relationalai/early_access/dsl/adapters/orm → v0/relationalai/auth}/__init__.py +0 -0
- {relationalai → v0/relationalai}/auth/jwt_generator.py +0 -0
- {relationalai → v0/relationalai}/auth/oauth_callback_server.py +0 -0
- {relationalai → v0/relationalai}/auth/token_handler.py +0 -0
- {relationalai → v0/relationalai}/auth/util.py +0 -0
- {relationalai/clients/resources/snowflake → v0/relationalai/clients}/cache_store.py +0 -0
- {relationalai → v0/relationalai}/compiler.py +0 -0
- {relationalai → v0/relationalai}/dependencies.py +0 -0
- {relationalai → v0/relationalai}/docutils.py +0 -0
- {relationalai/early_access/dsl/adapters/owl → v0/relationalai/early_access}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/__init__.py +0 -0
- {relationalai/early_access/dsl/bindings → v0/relationalai/early_access/dsl/adapters}/__init__.py +0 -0
- {relationalai/early_access/dsl/bindings/legacy → v0/relationalai/early_access/dsl/adapters/orm}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/adapters/orm/model.py +0 -0
- {relationalai/early_access/dsl/codegen → v0/relationalai/early_access/dsl/adapters/owl}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/adapters/owl/model.py +0 -0
- {relationalai/early_access/dsl/core/temporal → v0/relationalai/early_access/dsl/bindings}/__init__.py +0 -0
- {relationalai/early_access/dsl/ir → v0/relationalai/early_access/dsl/bindings/legacy}/__init__.py +0 -0
- {relationalai/early_access/dsl/ontologies → v0/relationalai/early_access/dsl/codegen}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/constants.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/core/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/core/constraints/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/core/constraints/predicate/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/core/stack.py +0 -0
- {relationalai/early_access/dsl/orm → v0/relationalai/early_access/dsl/core/temporal}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/core/utils.py +0 -0
- {relationalai/early_access/dsl/orm/measures → v0/relationalai/early_access/dsl/ir}/__init__.py +0 -0
- {relationalai/early_access/dsl/physical_metadata → v0/relationalai/early_access/dsl/ontologies}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/ontologies/raw_source.py +0 -0
- {relationalai/early_access/dsl/serialize → v0/relationalai/early_access/dsl/orm}/__init__.py +0 -0
- {relationalai/early_access/dsl/snow → v0/relationalai/early_access/dsl/orm/measures}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/orm/reasoner_errors.py +0 -0
- {relationalai/loaders → v0/relationalai/early_access/dsl/physical_metadata}/__init__.py +0 -0
- {relationalai/semantics/tests → v0/relationalai/early_access/dsl/serialize}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/serialize/binding_model.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/serialize/model.py +0 -0
- {relationalai/tools → v0/relationalai/early_access/dsl/snow}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/tests/__init__.py +0 -0
- {relationalai → v0/relationalai}/environments/ci.py +0 -0
- {relationalai → v0/relationalai}/environments/hex.py +0 -0
- {relationalai → v0/relationalai}/environments/terminal.py +0 -0
- {relationalai → v0/relationalai}/experimental/__init__.py +0 -0
- {relationalai → v0/relationalai}/experimental/graphs.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/__init__.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/benchmarks/__init__.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/path_algorithms/__init__.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/rpq/__init__.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/rpq/filter.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/rpq/glushkov.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/rpq/transition.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/utilities/__init__.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/utilities/utilities.py +0 -0
- {relationalai → v0/relationalai}/metagen.py +0 -0
- {relationalai → v0/relationalai}/metamodel.py +0 -0
- {relationalai → v0/relationalai}/rel.py +0 -0
- {relationalai → v0/relationalai}/semantics/devtools/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/internal/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/internal/annotations.py +0 -0
- {relationalai → v0/relationalai}/semantics/lqp/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/lqp/ir.py +0 -0
- {relationalai → v0/relationalai}/semantics/lqp/pragmas.py +0 -0
- {relationalai → v0/relationalai}/semantics/lqp/rewrite/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/metamodel/dataflow.py +0 -0
- {relationalai → v0/relationalai}/semantics/metamodel/ir.py +0 -0
- {relationalai → v0/relationalai}/semantics/metamodel/rewrite/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/metamodel/typer/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/metamodel/types.py +0 -0
- {relationalai → v0/relationalai}/semantics/metamodel/visitor.py +0 -0
- {relationalai → v0/relationalai}/semantics/reasoners/experimental/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/rel/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/sql/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/sql/executor/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/sql/rewrite/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/tests/logging.py +0 -0
- {relationalai → v0/relationalai}/std/aggregates.py +0 -0
- {relationalai → v0/relationalai}/std/dates.py +0 -0
- {relationalai → v0/relationalai}/std/graphs.py +0 -0
- {relationalai → v0/relationalai}/std/inspect.py +0 -0
- {relationalai → v0/relationalai}/std/math.py +0 -0
- {relationalai → v0/relationalai}/std/re.py +0 -0
- {relationalai → v0/relationalai}/std/strings.py +0 -0
- {relationalai → v0/relationalai}/tools/cleanup_snapshots.py +0 -0
- {relationalai → v0/relationalai}/tools/constants.py +0 -0
- {relationalai → v0/relationalai}/tools/query_utils.py +0 -0
- {relationalai → v0/relationalai}/tools/snapshot_viewer.py +0 -0
- {relationalai → v0/relationalai}/util/__init__.py +0 -0
- {relationalai → v0/relationalai}/util/constants.py +0 -0
- {relationalai → v0/relationalai}/util/graph.py +0 -0
- {relationalai → v0/relationalai}/util/timeout.py +0 -0
|
@@ -1,3083 +0,0 @@
|
|
|
1
|
-
# pyright: reportUnusedExpression=false
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
import base64
|
|
4
|
-
import importlib.resources
|
|
5
|
-
import io
|
|
6
|
-
import re
|
|
7
|
-
import json
|
|
8
|
-
import time
|
|
9
|
-
import textwrap
|
|
10
|
-
import ast
|
|
11
|
-
import uuid
|
|
12
|
-
import warnings
|
|
13
|
-
import atexit
|
|
14
|
-
import hashlib
|
|
15
|
-
from dataclasses import dataclass
|
|
16
|
-
|
|
17
|
-
from ....auth.token_handler import TokenHandler
|
|
18
|
-
import snowflake.snowpark
|
|
19
|
-
|
|
20
|
-
from ....rel_utils import sanitize_identifier, to_fqn_relation_name
|
|
21
|
-
from ....tools.constants import FIELD_PLACEHOLDER, SNOWFLAKE_AUTHS, USE_GRAPH_INDEX, DEFAULT_QUERY_TIMEOUT_MINS, WAIT_FOR_STREAM_SYNC, Generation
|
|
22
|
-
from .... import std
|
|
23
|
-
from collections import defaultdict
|
|
24
|
-
import requests
|
|
25
|
-
import snowflake.connector
|
|
26
|
-
import pyarrow as pa
|
|
27
|
-
|
|
28
|
-
from snowflake.snowpark import Session
|
|
29
|
-
from snowflake.snowpark.context import get_active_session
|
|
30
|
-
from ... import result_helpers
|
|
31
|
-
from .... import debugging
|
|
32
|
-
from typing import Any, Dict, Iterable, Tuple, List, Literal, cast
|
|
33
|
-
|
|
34
|
-
from pandas import DataFrame
|
|
35
|
-
|
|
36
|
-
from ....tools.cli_controls import Spinner
|
|
37
|
-
from ...types import AvailableModel, EngineState, Import, ImportSource, ImportSourceTable, ImportsStatus, SourceInfo, TransactionAsyncResponse
|
|
38
|
-
from ...config import Config
|
|
39
|
-
from ...client import Client, ExportParams, ProviderBase, ResourcesBase
|
|
40
|
-
from ...util import IdentityParser, escape_for_f_string, get_pyrel_version, get_with_retries, poll_with_specified_overhead, safe_json_loads, sanitize_module_name, scrub_exception, wrap_with_request_id, normalize_datetime
|
|
41
|
-
from .util import (
|
|
42
|
-
collect_error_messages,
|
|
43
|
-
process_jinja_template,
|
|
44
|
-
type_to_sql,
|
|
45
|
-
type_to_snowpark,
|
|
46
|
-
sanitize_user_name as _sanitize_user_name,
|
|
47
|
-
normalize_params,
|
|
48
|
-
format_sproc_name,
|
|
49
|
-
is_azure_url,
|
|
50
|
-
is_container_runtime,
|
|
51
|
-
imports_to_dicts,
|
|
52
|
-
txn_list_to_dicts,
|
|
53
|
-
decrypt_artifact,
|
|
54
|
-
)
|
|
55
|
-
from ....environments import runtime_env, HexEnvironment, SnowbookEnvironment
|
|
56
|
-
from .... import dsl, rel, metamodel as m
|
|
57
|
-
from ....errors import EngineProvisioningFailed, EngineNameValidationException, Errors, InvalidAliasError, InvalidEngineSizeError, InvalidSourceTypeWarning, RAIException, HexSessionException, SnowflakeChangeTrackingNotEnabledException, SnowflakeDatabaseException, SnowflakeImportMissingException, SnowflakeInvalidSource, SnowflakeMissingConfigValuesException, SnowflakeProxyAPIDeprecationWarning, SnowflakeProxySourceError, ModelNotFoundException, UnknownSourceWarning, RowsDroppedFromTargetTableWarning, QueryTimeoutExceededException
|
|
58
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
59
|
-
from datetime import datetime, timedelta
|
|
60
|
-
from snowflake.snowpark.types import StringType, StructField, StructType
|
|
61
|
-
# Import error handlers and constants
|
|
62
|
-
from .error_handlers import (
|
|
63
|
-
ErrorHandler,
|
|
64
|
-
DuoSecurityErrorHandler,
|
|
65
|
-
AppMissingErrorHandler,
|
|
66
|
-
DatabaseErrorsHandler,
|
|
67
|
-
EngineErrorsHandler,
|
|
68
|
-
ServiceNotStartedErrorHandler,
|
|
69
|
-
TransactionAbortedErrorHandler,
|
|
70
|
-
)
|
|
71
|
-
# Import engine state handlers
|
|
72
|
-
from .engine_state_handlers import (
|
|
73
|
-
EngineStateHandler,
|
|
74
|
-
EngineContext,
|
|
75
|
-
SyncPendingStateHandler,
|
|
76
|
-
SyncSuspendedStateHandler,
|
|
77
|
-
SyncReadyStateHandler,
|
|
78
|
-
SyncGoneStateHandler,
|
|
79
|
-
SyncMissingEngineHandler,
|
|
80
|
-
AsyncPendingStateHandler,
|
|
81
|
-
AsyncSuspendedStateHandler,
|
|
82
|
-
AsyncReadyStateHandler,
|
|
83
|
-
AsyncGoneStateHandler,
|
|
84
|
-
AsyncMissingEngineHandler,
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
#--------------------------------------------------
|
|
89
|
-
# Constants
|
|
90
|
-
#--------------------------------------------------
|
|
91
|
-
|
|
92
|
-
VALID_POOL_STATUS = ["ACTIVE", "IDLE", "SUSPENDED"]
|
|
93
|
-
# transaction list and get return different fields (duration vs timings)
|
|
94
|
-
LIST_TXN_SQL_FIELDS = ["id", "database_name", "engine_name", "state", "abort_reason", "read_only","created_by", "created_on", "finished_at", "duration"]
|
|
95
|
-
GET_TXN_SQL_FIELDS = ["id", "database", "engine", "state", "abort_reason", "read_only","created_by", "created_on", "finished_at", "timings"]
|
|
96
|
-
VALID_ENGINE_STATES = ["READY", "PENDING"]
|
|
97
|
-
|
|
98
|
-
# Cloud-specific engine sizes
|
|
99
|
-
INTERNAL_ENGINE_SIZES = ["XS", "S", "M", "L"]
|
|
100
|
-
ENGINE_SIZES_AWS = ["HIGHMEM_X64_S", "HIGHMEM_X64_M", "HIGHMEM_X64_L"]
|
|
101
|
-
ENGINE_SIZES_AZURE = ["HIGHMEM_X64_S", "HIGHMEM_X64_M", "HIGHMEM_X64_SL"]
|
|
102
|
-
# Note: ENGINE_ERRORS, ENGINE_NOT_READY_MSGS, DATABASE_ERRORS moved to util.py
|
|
103
|
-
PYREL_ROOT_DB = 'pyrel_root_db'
|
|
104
|
-
|
|
105
|
-
TERMINAL_TXN_STATES = ["COMPLETED", "ABORTED"]
|
|
106
|
-
|
|
107
|
-
TXN_ABORT_REASON_TIMEOUT = "transaction timeout"
|
|
108
|
-
|
|
109
|
-
#--------------------------------------------------
|
|
110
|
-
# Resources
|
|
111
|
-
#--------------------------------------------------
|
|
112
|
-
|
|
113
|
-
APP_NAME = "___RAI_APP___"
|
|
114
|
-
|
|
115
|
-
@dataclass
|
|
116
|
-
class ExecContext:
|
|
117
|
-
"""Execution context for SQL queries, containing all parameters needed for execution and retry."""
|
|
118
|
-
code: str
|
|
119
|
-
params: List[Any] | None = None
|
|
120
|
-
raw: bool = False
|
|
121
|
-
help: bool = True
|
|
122
|
-
skip_engine_db_error_retry: bool = False
|
|
123
|
-
|
|
124
|
-
def re_execute(self, resources: 'Resources') -> Any:
|
|
125
|
-
"""Re-execute this context's query using the provided resources instance."""
|
|
126
|
-
return resources._exec(
|
|
127
|
-
code=self.code,
|
|
128
|
-
params=self.params,
|
|
129
|
-
raw=self.raw,
|
|
130
|
-
help=self.help,
|
|
131
|
-
skip_engine_db_error_retry=self.skip_engine_db_error_retry
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
class Resources(ResourcesBase):
|
|
135
|
-
def __init__(
|
|
136
|
-
self,
|
|
137
|
-
profile: str | None = None,
|
|
138
|
-
config: Config | None = None,
|
|
139
|
-
connection: Session | None = None,
|
|
140
|
-
dry_run: bool = False,
|
|
141
|
-
reset_session: bool = False,
|
|
142
|
-
generation: Generation | None = None,
|
|
143
|
-
language: str = "rel", # Accepted for backward compatibility, but not stored in base class
|
|
144
|
-
):
|
|
145
|
-
super().__init__(profile, config=config)
|
|
146
|
-
self._token_handler: TokenHandler | None = None
|
|
147
|
-
self._session = connection
|
|
148
|
-
self.generation = generation
|
|
149
|
-
if self._session is None and not dry_run:
|
|
150
|
-
try:
|
|
151
|
-
# we may still be constructing the config, so this can fail now,
|
|
152
|
-
# if so we'll create later
|
|
153
|
-
self._session = self.get_sf_session(reset_session)
|
|
154
|
-
except Exception:
|
|
155
|
-
pass
|
|
156
|
-
self._pending_transactions: list[str] = []
|
|
157
|
-
self._ns_cache = {}
|
|
158
|
-
# self.sources contains fully qualified Snowflake table/view names
|
|
159
|
-
self.sources: set[str] = set()
|
|
160
|
-
self._sproc_models = None
|
|
161
|
-
# Store language for backward compatibility (used by child classes for use_index polling)
|
|
162
|
-
self.language = language
|
|
163
|
-
# Register error and state handlers
|
|
164
|
-
self._register_handlers()
|
|
165
|
-
# Register atexit callback to cancel pending transactions
|
|
166
|
-
atexit.register(self.cancel_pending_transactions)
|
|
167
|
-
|
|
168
|
-
#--------------------------------------------------
|
|
169
|
-
# Initialization & Properties
|
|
170
|
-
#--------------------------------------------------
|
|
171
|
-
|
|
172
|
-
def _register_handlers(self) -> None:
|
|
173
|
-
"""Register error and engine state handlers for processing."""
|
|
174
|
-
# Register base handlers using getter methods that subclasses can override
|
|
175
|
-
# Use defensive copying to ensure each instance has its own handler lists
|
|
176
|
-
# and prevent cross-instance contamination from subclass mutations
|
|
177
|
-
self._error_handlers = list(self._get_error_handlers())
|
|
178
|
-
self._sync_engine_state_handlers = list(self._get_engine_state_handlers(is_async=False))
|
|
179
|
-
self._async_engine_state_handlers = list(self._get_engine_state_handlers(is_async=True))
|
|
180
|
-
|
|
181
|
-
def _get_error_handlers(self) -> list[ErrorHandler]:
|
|
182
|
-
"""Get list of error handlers. Subclasses can override to add custom handlers.
|
|
183
|
-
|
|
184
|
-
Returns:
|
|
185
|
-
List of error handlers for standard error processing using Strategy Pattern.
|
|
186
|
-
|
|
187
|
-
Example:
|
|
188
|
-
def _get_error_handlers(self) -> list[ErrorHandler]:
|
|
189
|
-
# Get base handlers
|
|
190
|
-
handlers = super()._get_error_handlers()
|
|
191
|
-
# Add custom handler
|
|
192
|
-
handlers.append(MyCustomErrorHandler())
|
|
193
|
-
return handlers
|
|
194
|
-
"""
|
|
195
|
-
return [
|
|
196
|
-
DuoSecurityErrorHandler(),
|
|
197
|
-
AppMissingErrorHandler(),
|
|
198
|
-
DatabaseErrorsHandler(),
|
|
199
|
-
EngineErrorsHandler(),
|
|
200
|
-
ServiceNotStartedErrorHandler(),
|
|
201
|
-
TransactionAbortedErrorHandler(),
|
|
202
|
-
]
|
|
203
|
-
|
|
204
|
-
def _get_engine_state_handlers(self, is_async: bool = False) -> list[EngineStateHandler]:
|
|
205
|
-
"""Get list of engine state handlers. Subclasses can override.
|
|
206
|
-
|
|
207
|
-
Args:
|
|
208
|
-
is_async: If True, returns async handlers; if False, returns sync handlers.
|
|
209
|
-
|
|
210
|
-
Returns:
|
|
211
|
-
List of engine state handlers for processing engine states.
|
|
212
|
-
|
|
213
|
-
Example:
|
|
214
|
-
def _get_engine_state_handlers(self, is_async: bool = False) -> list[EngineStateHandler]:
|
|
215
|
-
# Get base handlers
|
|
216
|
-
handlers = super()._get_engine_state_handlers(is_async)
|
|
217
|
-
# Add custom handler
|
|
218
|
-
handlers.append(MyCustomStateHandler())
|
|
219
|
-
return handlers
|
|
220
|
-
"""
|
|
221
|
-
if is_async:
|
|
222
|
-
return [
|
|
223
|
-
AsyncPendingStateHandler(),
|
|
224
|
-
AsyncSuspendedStateHandler(),
|
|
225
|
-
AsyncReadyStateHandler(),
|
|
226
|
-
AsyncGoneStateHandler(),
|
|
227
|
-
AsyncMissingEngineHandler(),
|
|
228
|
-
]
|
|
229
|
-
else:
|
|
230
|
-
return [
|
|
231
|
-
SyncPendingStateHandler(),
|
|
232
|
-
SyncSuspendedStateHandler(),
|
|
233
|
-
SyncReadyStateHandler(),
|
|
234
|
-
SyncGoneStateHandler(),
|
|
235
|
-
SyncMissingEngineHandler(),
|
|
236
|
-
]
|
|
237
|
-
|
|
238
|
-
@property
|
|
239
|
-
def token_handler(self) -> TokenHandler:
|
|
240
|
-
if not self._token_handler:
|
|
241
|
-
self._token_handler = TokenHandler.from_config(self.config)
|
|
242
|
-
return self._token_handler
|
|
243
|
-
|
|
244
|
-
def reset(self):
|
|
245
|
-
"""Reset the session."""
|
|
246
|
-
self._session = None
|
|
247
|
-
|
|
248
|
-
#--------------------------------------------------
|
|
249
|
-
# Session Management
|
|
250
|
-
#--------------------------------------------------
|
|
251
|
-
|
|
252
|
-
def is_erp_running(self, app_name: str) -> bool:
|
|
253
|
-
"""Check if the ERP is running. The app.service_status() returns single row/column containing an array of JSON service status objects."""
|
|
254
|
-
query = f"CALL {app_name}.app.service_status();"
|
|
255
|
-
try:
|
|
256
|
-
result = self._exec(query)
|
|
257
|
-
# The result is a list of dictionaries, each with a "STATUS" key
|
|
258
|
-
# The column name containing the result is "SERVICE_STATUS"
|
|
259
|
-
services_status = json.loads(result[0]["SERVICE_STATUS"])
|
|
260
|
-
# Find the dictionary with "name" of "main" and check if its "status" is "READY"
|
|
261
|
-
for service in services_status:
|
|
262
|
-
if service.get("name") == "main" and service.get("status") == "READY":
|
|
263
|
-
return True
|
|
264
|
-
return False
|
|
265
|
-
except Exception:
|
|
266
|
-
return False
|
|
267
|
-
|
|
268
|
-
def get_sf_session(self, reset_session: bool = False):
|
|
269
|
-
if self._session:
|
|
270
|
-
return self._session
|
|
271
|
-
|
|
272
|
-
if isinstance(runtime_env, HexEnvironment):
|
|
273
|
-
raise HexSessionException()
|
|
274
|
-
if isinstance(runtime_env, SnowbookEnvironment):
|
|
275
|
-
return get_active_session()
|
|
276
|
-
else:
|
|
277
|
-
# if there's already been a session created, try using that
|
|
278
|
-
# if reset_session is true always try to get the new session
|
|
279
|
-
if not reset_session:
|
|
280
|
-
try:
|
|
281
|
-
return get_active_session()
|
|
282
|
-
except Exception:
|
|
283
|
-
pass
|
|
284
|
-
|
|
285
|
-
# otherwise, create a new session
|
|
286
|
-
missing_keys = []
|
|
287
|
-
connection_parameters = {}
|
|
288
|
-
|
|
289
|
-
authenticator = self.config.get('authenticator', None)
|
|
290
|
-
passcode = self.config.get("passcode", "")
|
|
291
|
-
private_key_file = self.config.get("private_key_file", "")
|
|
292
|
-
|
|
293
|
-
# If the authenticator is not set, we need to set it based on the provided parameters
|
|
294
|
-
if authenticator is None:
|
|
295
|
-
if private_key_file != "":
|
|
296
|
-
authenticator = "snowflake_jwt"
|
|
297
|
-
elif passcode != "":
|
|
298
|
-
authenticator = "username_password_mfa"
|
|
299
|
-
else:
|
|
300
|
-
authenticator = "snowflake"
|
|
301
|
-
# set the default authenticator in the config so we can skip it when we check for missing keys
|
|
302
|
-
self.config.set("authenticator", authenticator)
|
|
303
|
-
|
|
304
|
-
if authenticator in SNOWFLAKE_AUTHS:
|
|
305
|
-
required_keys = {
|
|
306
|
-
key for key, value in SNOWFLAKE_AUTHS[authenticator].items() if value.get("required", True)
|
|
307
|
-
}
|
|
308
|
-
for key in required_keys:
|
|
309
|
-
if self.config.get(key, None) is None:
|
|
310
|
-
default = SNOWFLAKE_AUTHS[authenticator][key].get("value", None)
|
|
311
|
-
if default is None or default == FIELD_PLACEHOLDER:
|
|
312
|
-
# No default value and no value in the config, add to missing keys
|
|
313
|
-
missing_keys.append(key)
|
|
314
|
-
else:
|
|
315
|
-
# Set the default value in the config from the auth defaults
|
|
316
|
-
self.config.set(key, default)
|
|
317
|
-
if missing_keys:
|
|
318
|
-
profile = getattr(self.config, 'profile', None)
|
|
319
|
-
config_file_path = getattr(self.config, 'file_path', None)
|
|
320
|
-
raise SnowflakeMissingConfigValuesException(missing_keys, profile, config_file_path)
|
|
321
|
-
for key in SNOWFLAKE_AUTHS[authenticator]:
|
|
322
|
-
connection_parameters[key] = self.config.get(key, None)
|
|
323
|
-
else:
|
|
324
|
-
raise ValueError(f'Authenticator "{authenticator}" not supported')
|
|
325
|
-
|
|
326
|
-
return self._build_snowflake_session(connection_parameters)
|
|
327
|
-
|
|
328
|
-
def _build_snowflake_session(self, connection_parameters: Dict[str, Any]) -> Session:
|
|
329
|
-
try:
|
|
330
|
-
tmp = {
|
|
331
|
-
"client_session_keep_alive": True,
|
|
332
|
-
"client_session_keep_alive_heartbeat_frequency": 60 * 5,
|
|
333
|
-
}
|
|
334
|
-
tmp.update(connection_parameters)
|
|
335
|
-
connection_parameters = tmp
|
|
336
|
-
# authenticator programmatic access token needs to be upper cased to work...
|
|
337
|
-
connection_parameters["authenticator"] = connection_parameters["authenticator"].upper()
|
|
338
|
-
if "authenticator" in connection_parameters and connection_parameters["authenticator"] == "OAUTH_AUTHORIZATION_CODE":
|
|
339
|
-
# we are replicating OAUTH_AUTHORIZATION_CODE by first retrieving the token
|
|
340
|
-
# and then authenticating with the token via the OAUTH authenticator
|
|
341
|
-
connection_parameters["token"] = self.token_handler.get_session_login_token()
|
|
342
|
-
connection_parameters["authenticator"] = "OAUTH"
|
|
343
|
-
return Session.builder.configs(connection_parameters).create()
|
|
344
|
-
except snowflake.connector.errors.Error as e:
|
|
345
|
-
raise SnowflakeDatabaseException(e)
|
|
346
|
-
except Exception as e:
|
|
347
|
-
raise e
|
|
348
|
-
|
|
349
|
-
#--------------------------------------------------
|
|
350
|
-
# Core Execution Methods
|
|
351
|
-
#--------------------------------------------------
|
|
352
|
-
|
|
353
|
-
def _exec_sql(self, code: str, params: List[Any] | None, raw=False):
|
|
354
|
-
"""
|
|
355
|
-
Lowest-level SQL execution method.
|
|
356
|
-
|
|
357
|
-
Directly executes SQL via the Snowflake session. This is the foundation
|
|
358
|
-
for all other execution methods. It:
|
|
359
|
-
- Replaces APP_NAME placeholder with actual app name
|
|
360
|
-
- Executes SQL with optional parameters
|
|
361
|
-
- Returns either raw session results or collected results
|
|
362
|
-
|
|
363
|
-
Args:
|
|
364
|
-
code: SQL code to execute (may contain APP_NAME placeholder)
|
|
365
|
-
params: Optional SQL parameters
|
|
366
|
-
raw: If True, return raw session results; if False, collect results
|
|
367
|
-
|
|
368
|
-
Returns:
|
|
369
|
-
Raw session results if raw=True, otherwise collected results
|
|
370
|
-
"""
|
|
371
|
-
assert self._session is not None
|
|
372
|
-
sess_results = self._session.sql(
|
|
373
|
-
code.replace(APP_NAME, self.get_app_name()),
|
|
374
|
-
params
|
|
375
|
-
)
|
|
376
|
-
if raw:
|
|
377
|
-
return sess_results
|
|
378
|
-
return sess_results.collect()
|
|
379
|
-
|
|
380
|
-
def _exec(
|
|
381
|
-
self,
|
|
382
|
-
code: str,
|
|
383
|
-
params: List[Any] | Any | None = None,
|
|
384
|
-
raw: bool = False,
|
|
385
|
-
help: bool = True,
|
|
386
|
-
skip_engine_db_error_retry: bool = False
|
|
387
|
-
) -> Any:
|
|
388
|
-
"""
|
|
389
|
-
Mid-level SQL execution method with error handling.
|
|
390
|
-
|
|
391
|
-
This is the primary method for executing SQL queries. It wraps _exec_sql
|
|
392
|
-
with comprehensive error handling and parameter normalization. Used
|
|
393
|
-
extensively throughout the codebase for direct SQL operations like:
|
|
394
|
-
- SHOW commands (warehouses, databases, etc.)
|
|
395
|
-
- CALL statements to RAI app stored procedures
|
|
396
|
-
- Transaction management queries
|
|
397
|
-
|
|
398
|
-
The error handling flow:
|
|
399
|
-
1. Normalizes parameters and creates execution context
|
|
400
|
-
2. Calls _exec_sql to execute the query
|
|
401
|
-
3. On error, uses standard error handling (Strategy Pattern), which subclasses
|
|
402
|
-
can influence via `_get_error_handlers()` or by overriding `_handle_standard_exec_errors()`
|
|
403
|
-
|
|
404
|
-
Args:
|
|
405
|
-
code: SQL code to execute
|
|
406
|
-
params: Optional SQL parameters (normalized to list if needed)
|
|
407
|
-
raw: If True, return raw session results; if False, collect results
|
|
408
|
-
help: If True, enable error handling; if False, raise errors immediately
|
|
409
|
-
skip_engine_db_error_retry: If True, skip use_index retry logic in error handlers
|
|
410
|
-
|
|
411
|
-
Returns:
|
|
412
|
-
Query results (collected or raw depending on 'raw' parameter)
|
|
413
|
-
"""
|
|
414
|
-
# print(f"\n--- sql---\n{code}\n--- end sql---\n")
|
|
415
|
-
# Ensure session is initialized
|
|
416
|
-
if not self._session:
|
|
417
|
-
self._session = self.get_sf_session()
|
|
418
|
-
|
|
419
|
-
# Normalize parameters
|
|
420
|
-
normalized_params = normalize_params(params)
|
|
421
|
-
|
|
422
|
-
# Create execution context
|
|
423
|
-
ctx = ExecContext(
|
|
424
|
-
code=code,
|
|
425
|
-
params=normalized_params,
|
|
426
|
-
raw=raw,
|
|
427
|
-
help=help,
|
|
428
|
-
skip_engine_db_error_retry=skip_engine_db_error_retry
|
|
429
|
-
)
|
|
430
|
-
|
|
431
|
-
# Execute SQL
|
|
432
|
-
try:
|
|
433
|
-
return self._exec_sql(ctx.code, ctx.params, raw=ctx.raw)
|
|
434
|
-
except Exception as e:
|
|
435
|
-
if not ctx.help:
|
|
436
|
-
raise e
|
|
437
|
-
|
|
438
|
-
# Handle standard errors
|
|
439
|
-
result = self._handle_standard_exec_errors(e, ctx)
|
|
440
|
-
if result is not None:
|
|
441
|
-
return result
|
|
442
|
-
|
|
443
|
-
#--------------------------------------------------
|
|
444
|
-
# Error Handling
|
|
445
|
-
#--------------------------------------------------
|
|
446
|
-
|
|
447
|
-
def _handle_standard_exec_errors(self, e: Exception, ctx: ExecContext) -> Any | None:
|
|
448
|
-
"""
|
|
449
|
-
Handle standard Snowflake/RAI errors using Strategy Pattern.
|
|
450
|
-
|
|
451
|
-
Each error type has a dedicated handler class that encapsulates
|
|
452
|
-
the detection logic and exception creation. Handlers are processed
|
|
453
|
-
in order until one matches and handles the error.
|
|
454
|
-
"""
|
|
455
|
-
message = str(e).lower()
|
|
456
|
-
|
|
457
|
-
# Try each handler in order until one matches
|
|
458
|
-
for handler in self._error_handlers:
|
|
459
|
-
if handler.matches(e, message, ctx, self):
|
|
460
|
-
result = handler.handle(e, ctx, self)
|
|
461
|
-
if result is not None:
|
|
462
|
-
return result
|
|
463
|
-
return # Handler raised exception, we're done
|
|
464
|
-
|
|
465
|
-
# Fallback: transform to RAIException
|
|
466
|
-
raise RAIException(str(e))
|
|
467
|
-
|
|
468
|
-
#--------------------------------------------------
|
|
469
|
-
# Feature Detection & Configuration
|
|
470
|
-
#--------------------------------------------------
|
|
471
|
-
|
|
472
|
-
def is_direct_access_enabled(self) -> bool:
|
|
473
|
-
try:
|
|
474
|
-
feature_enabled = self._exec(
|
|
475
|
-
f"call {APP_NAME}.APP.DIRECT_INGRESS_ENABLED();"
|
|
476
|
-
)
|
|
477
|
-
if not feature_enabled:
|
|
478
|
-
return False
|
|
479
|
-
|
|
480
|
-
# Even if the feature is enabled, customers still need to reactivate ERP to ensure the endpoint is available.
|
|
481
|
-
endpoint = self._exec(
|
|
482
|
-
f"call {APP_NAME}.APP.SERVICE_ENDPOINT(true);"
|
|
483
|
-
)
|
|
484
|
-
if not endpoint or endpoint[0][0] is None:
|
|
485
|
-
return False
|
|
486
|
-
|
|
487
|
-
return feature_enabled[0][0]
|
|
488
|
-
except Exception as e:
|
|
489
|
-
raise Exception(f"Unable to determine if direct access is enabled. Details error: {e}") from e
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
def is_account_flag_set(self, flag: str) -> bool:
|
|
493
|
-
results = self._exec(
|
|
494
|
-
f"SHOW PARAMETERS LIKE '%{flag}%' IN ACCOUNT;"
|
|
495
|
-
)
|
|
496
|
-
if not results:
|
|
497
|
-
return False
|
|
498
|
-
return results[0]["value"] == "true"
|
|
499
|
-
|
|
500
|
-
#--------------------------------------------------
|
|
501
|
-
# Databases
|
|
502
|
-
#--------------------------------------------------
|
|
503
|
-
|
|
504
|
-
def get_database(self, database: str):
|
|
505
|
-
try:
|
|
506
|
-
results = self._exec(
|
|
507
|
-
f"call {APP_NAME}.api.get_database('{database}');"
|
|
508
|
-
)
|
|
509
|
-
except Exception as e:
|
|
510
|
-
messages = collect_error_messages(e)
|
|
511
|
-
if any("database does not exist" in msg for msg in messages):
|
|
512
|
-
return None
|
|
513
|
-
raise e
|
|
514
|
-
|
|
515
|
-
if not results:
|
|
516
|
-
return None
|
|
517
|
-
db = results[0]
|
|
518
|
-
if not db:
|
|
519
|
-
return None
|
|
520
|
-
return {
|
|
521
|
-
"id": db["ID"],
|
|
522
|
-
"name": db["NAME"],
|
|
523
|
-
"created_by": db["CREATED_BY"],
|
|
524
|
-
"created_on": db["CREATED_ON"],
|
|
525
|
-
"deleted_by": db["DELETED_BY"],
|
|
526
|
-
"deleted_on": db["DELETED_ON"],
|
|
527
|
-
"state": db["STATE"],
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
def get_installed_packages(self, database: str) -> Dict | None:
|
|
531
|
-
query = f"call {APP_NAME}.api.get_installed_package_versions('{database}');"
|
|
532
|
-
try:
|
|
533
|
-
results = self._exec(query)
|
|
534
|
-
except Exception as e:
|
|
535
|
-
messages = collect_error_messages(e)
|
|
536
|
-
if any("database does not exist" in msg for msg in messages):
|
|
537
|
-
return None
|
|
538
|
-
# fallback to None for old sql-lib versions
|
|
539
|
-
if any("unknown user-defined function" in msg for msg in messages):
|
|
540
|
-
return None
|
|
541
|
-
raise e
|
|
542
|
-
|
|
543
|
-
if not results:
|
|
544
|
-
return None
|
|
545
|
-
|
|
546
|
-
row = results[0]
|
|
547
|
-
if not row:
|
|
548
|
-
return None
|
|
549
|
-
|
|
550
|
-
return safe_json_loads(row["PACKAGE_VERSIONS"])
|
|
551
|
-
|
|
552
|
-
#--------------------------------------------------
|
|
553
|
-
# Engines
|
|
554
|
-
#--------------------------------------------------
|
|
555
|
-
|
|
556
|
-
def _prepare_engine_params(
|
|
557
|
-
self,
|
|
558
|
-
name: str | None,
|
|
559
|
-
size: str | None,
|
|
560
|
-
use_default_size: bool = False
|
|
561
|
-
) -> tuple[str, str | None]:
|
|
562
|
-
"""
|
|
563
|
-
Prepare engine parameters by resolving and validating name and size.
|
|
564
|
-
|
|
565
|
-
Args:
|
|
566
|
-
name: Engine name (None to use default)
|
|
567
|
-
size: Engine size (None to use config or default)
|
|
568
|
-
use_default_size: If True and size is None, use get_default_engine_size()
|
|
569
|
-
|
|
570
|
-
Returns:
|
|
571
|
-
Tuple of (engine_name, engine_size)
|
|
572
|
-
|
|
573
|
-
Raises:
|
|
574
|
-
EngineNameValidationException: If engine name is invalid
|
|
575
|
-
Exception: If engine size is invalid
|
|
576
|
-
"""
|
|
577
|
-
from relationalai.tools.cli_helpers import validate_engine_name
|
|
578
|
-
|
|
579
|
-
engine_name = name or self.get_default_engine_name()
|
|
580
|
-
|
|
581
|
-
# Resolve engine size
|
|
582
|
-
if size:
|
|
583
|
-
engine_size = size
|
|
584
|
-
else:
|
|
585
|
-
if use_default_size:
|
|
586
|
-
engine_size = self.config.get_default_engine_size()
|
|
587
|
-
else:
|
|
588
|
-
engine_size = self.config.get("engine_size", None)
|
|
589
|
-
|
|
590
|
-
# Validate engine size
|
|
591
|
-
if engine_size:
|
|
592
|
-
is_size_valid, sizes = self.validate_engine_size(engine_size)
|
|
593
|
-
if not is_size_valid:
|
|
594
|
-
error_msg = f"Invalid engine size '{engine_size}'. Valid sizes are: {', '.join(sizes)}"
|
|
595
|
-
if use_default_size:
|
|
596
|
-
error_msg = f"Invalid engine size in config: '{engine_size}'. Valid sizes are: {', '.join(sizes)}"
|
|
597
|
-
raise Exception(error_msg)
|
|
598
|
-
|
|
599
|
-
# Validate engine name
|
|
600
|
-
is_name_valid, _ = validate_engine_name(engine_name)
|
|
601
|
-
if not is_name_valid:
|
|
602
|
-
raise EngineNameValidationException(engine_name)
|
|
603
|
-
|
|
604
|
-
return engine_name, engine_size
|
|
605
|
-
|
|
606
|
-
def _get_state_handler(self, state: str | None, handlers: list[EngineStateHandler]) -> EngineStateHandler:
|
|
607
|
-
"""Find the appropriate state handler for the given state."""
|
|
608
|
-
for handler in handlers:
|
|
609
|
-
if handler.handles_state(state):
|
|
610
|
-
return handler
|
|
611
|
-
# Fallback to missing engine handler if no match
|
|
612
|
-
return handlers[-1] # Last handler should be MissingEngineHandler
|
|
613
|
-
|
|
614
|
-
def _process_engine_state(
|
|
615
|
-
self,
|
|
616
|
-
engine: EngineState | Dict[str, Any] | None,
|
|
617
|
-
context: EngineContext,
|
|
618
|
-
handlers: list[EngineStateHandler],
|
|
619
|
-
set_active_on_success: bool = False
|
|
620
|
-
) -> EngineState | Dict[str, Any] | None:
|
|
621
|
-
"""
|
|
622
|
-
Process engine state using appropriate state handler.
|
|
623
|
-
|
|
624
|
-
Args:
|
|
625
|
-
engine: Current engine state (or None if missing)
|
|
626
|
-
context: Engine context for state handling
|
|
627
|
-
handlers: List of state handlers to use (sync or async)
|
|
628
|
-
set_active_on_success: If True, set engine as active when handler returns engine
|
|
629
|
-
|
|
630
|
-
Returns:
|
|
631
|
-
Engine state after processing, or None if engine needs to be created
|
|
632
|
-
"""
|
|
633
|
-
# Find and execute appropriate state handler
|
|
634
|
-
state = engine["state"] if engine else None
|
|
635
|
-
handler = self._get_state_handler(state, handlers)
|
|
636
|
-
engine = handler.handle(engine, context, self)
|
|
637
|
-
|
|
638
|
-
# If handler returned None and we didn't start with None state, engine needs to be created
|
|
639
|
-
# (e.g., GONE state deleted the engine, so we need to create a new one)
|
|
640
|
-
if not engine and state is not None:
|
|
641
|
-
handler = self._get_state_handler(None, handlers)
|
|
642
|
-
handler.handle(None, context, self)
|
|
643
|
-
elif set_active_on_success:
|
|
644
|
-
# Cast to EngineState for type safety (handlers return EngineDict which is compatible)
|
|
645
|
-
self._set_active_engine(cast(EngineState, engine))
|
|
646
|
-
|
|
647
|
-
return engine
|
|
648
|
-
|
|
649
|
-
def _handle_engine_creation_errors(self, error: Exception, engine_name: str, preserve_rai_exception: bool = False) -> None:
|
|
650
|
-
"""
|
|
651
|
-
Handle errors during engine creation using error handlers.
|
|
652
|
-
|
|
653
|
-
Args:
|
|
654
|
-
error: The exception that occurred
|
|
655
|
-
engine_name: Name of the engine being created
|
|
656
|
-
preserve_rai_exception: If True, re-raise RAIException without wrapping
|
|
657
|
-
|
|
658
|
-
Raises:
|
|
659
|
-
RAIException: If preserve_rai_exception is True and error is RAIException
|
|
660
|
-
EngineProvisioningFailed: If error is not handled by error handlers
|
|
661
|
-
"""
|
|
662
|
-
# Preserve RAIException passthrough if requested (for async mode)
|
|
663
|
-
if preserve_rai_exception and isinstance(error, RAIException):
|
|
664
|
-
raise error
|
|
665
|
-
|
|
666
|
-
# Check if this is a known error type that should be handled by error handlers
|
|
667
|
-
message = str(error).lower()
|
|
668
|
-
handled = False
|
|
669
|
-
# Engine creation isn't tied to a specific SQL ExecContext; pass a context that
|
|
670
|
-
# disables use_index retry behavior (and any future ctx-dependent handlers).
|
|
671
|
-
ctx = ExecContext(code="", help=True, skip_engine_db_error_retry=True)
|
|
672
|
-
for handler in self._error_handlers:
|
|
673
|
-
if handler.matches(error, message, ctx, self):
|
|
674
|
-
handler.handle(error, ctx, self)
|
|
675
|
-
handled = True
|
|
676
|
-
break # Handler raised exception, we're done
|
|
677
|
-
|
|
678
|
-
# If not handled by error handlers, wrap in EngineProvisioningFailed
|
|
679
|
-
if not handled:
|
|
680
|
-
raise EngineProvisioningFailed(engine_name, error) from error
|
|
681
|
-
|
|
682
|
-
def validate_engine_size(self, size: str) -> Tuple[bool, List[str]]:
|
|
683
|
-
if size is not None:
|
|
684
|
-
sizes = self.get_engine_sizes()
|
|
685
|
-
if size not in sizes:
|
|
686
|
-
return False, sizes
|
|
687
|
-
return True, []
|
|
688
|
-
|
|
689
|
-
def get_engine_sizes(self, cloud_provider: str|None=None):
|
|
690
|
-
sizes = []
|
|
691
|
-
if cloud_provider is None:
|
|
692
|
-
cloud_provider = self.get_cloud_provider()
|
|
693
|
-
if cloud_provider == 'azure':
|
|
694
|
-
sizes = ENGINE_SIZES_AZURE
|
|
695
|
-
else:
|
|
696
|
-
sizes = ENGINE_SIZES_AWS
|
|
697
|
-
if self.config.show_all_engine_sizes():
|
|
698
|
-
return INTERNAL_ENGINE_SIZES + sizes
|
|
699
|
-
else:
|
|
700
|
-
return sizes
|
|
701
|
-
|
|
702
|
-
def list_engines(self, state: str | None = None):
|
|
703
|
-
where_clause = f"WHERE STATUS = '{state.upper()}'" if state else ""
|
|
704
|
-
statement = f"SELECT NAME, ID, SIZE, STATUS, CREATED_BY, CREATED_ON, UPDATED_ON FROM {APP_NAME}.api.engines {where_clause} ORDER BY NAME ASC;"
|
|
705
|
-
results = self._exec(statement)
|
|
706
|
-
if not results:
|
|
707
|
-
return []
|
|
708
|
-
return [
|
|
709
|
-
{
|
|
710
|
-
"name": row["NAME"],
|
|
711
|
-
"id": row["ID"],
|
|
712
|
-
"size": row["SIZE"],
|
|
713
|
-
"state": row["STATUS"], # callers are expecting 'state'
|
|
714
|
-
"created_by": row["CREATED_BY"],
|
|
715
|
-
"created_on": row["CREATED_ON"],
|
|
716
|
-
"updated_on": row["UPDATED_ON"],
|
|
717
|
-
}
|
|
718
|
-
for row in results
|
|
719
|
-
]
|
|
720
|
-
|
|
721
|
-
def get_engine(self, name: str):
|
|
722
|
-
results = self._exec(
|
|
723
|
-
f"SELECT NAME, ID, SIZE, STATUS, CREATED_BY, CREATED_ON, UPDATED_ON, VERSION, AUTO_SUSPEND_MINS, SUSPENDS_AT FROM {APP_NAME}.api.engines WHERE NAME='{name}';"
|
|
724
|
-
)
|
|
725
|
-
if not results:
|
|
726
|
-
return None
|
|
727
|
-
engine = results[0]
|
|
728
|
-
if not engine:
|
|
729
|
-
return None
|
|
730
|
-
engine_state: EngineState = {
|
|
731
|
-
"name": engine["NAME"],
|
|
732
|
-
"id": engine["ID"],
|
|
733
|
-
"size": engine["SIZE"],
|
|
734
|
-
"state": engine["STATUS"], # callers are expecting 'state'
|
|
735
|
-
"created_by": engine["CREATED_BY"],
|
|
736
|
-
"created_on": engine["CREATED_ON"],
|
|
737
|
-
"updated_on": engine["UPDATED_ON"],
|
|
738
|
-
"version": engine["VERSION"],
|
|
739
|
-
"auto_suspend": engine["AUTO_SUSPEND_MINS"],
|
|
740
|
-
"suspends_at": engine["SUSPENDS_AT"]
|
|
741
|
-
}
|
|
742
|
-
return engine_state
|
|
743
|
-
|
|
744
|
-
def get_default_engine_name(self) -> str:
|
|
745
|
-
if self.config.get("engine_name", None) is not None:
|
|
746
|
-
profile = self.config.profile
|
|
747
|
-
raise InvalidAliasError(f"""
|
|
748
|
-
'engine_name' is not a valid config option.
|
|
749
|
-
If you meant to use a specific engine, use 'engine' instead.
|
|
750
|
-
Otherwise, remove it from your '{profile}' configuration profile.
|
|
751
|
-
""")
|
|
752
|
-
engine = self.config.get("engine", None)
|
|
753
|
-
if not engine and self.config.get("user", None):
|
|
754
|
-
engine = _sanitize_user_name(str(self.config.get("user")))
|
|
755
|
-
if not engine:
|
|
756
|
-
engine = self.get_user_based_engine_name()
|
|
757
|
-
self.config.set("engine", engine)
|
|
758
|
-
return engine
|
|
759
|
-
|
|
760
|
-
def is_valid_engine_state(self, name:str):
|
|
761
|
-
return name in VALID_ENGINE_STATES
|
|
762
|
-
|
|
763
|
-
def _create_engine(
|
|
764
|
-
self,
|
|
765
|
-
name: str,
|
|
766
|
-
size: str | None = None,
|
|
767
|
-
auto_suspend_mins: int | None= None,
|
|
768
|
-
is_async: bool = False,
|
|
769
|
-
headers: Dict | None = None,
|
|
770
|
-
):
|
|
771
|
-
api = "create_engine_async" if is_async else "create_engine"
|
|
772
|
-
if size is None:
|
|
773
|
-
size = self.config.get_default_engine_size()
|
|
774
|
-
# if auto_suspend_mins is None, get the default value from the config
|
|
775
|
-
if auto_suspend_mins is None:
|
|
776
|
-
auto_suspend_mins = self.config.get_default_auto_suspend_mins()
|
|
777
|
-
try:
|
|
778
|
-
headers = debugging.gen_current_propagation_headers()
|
|
779
|
-
with debugging.span(api, name=name, size=size, auto_suspend_mins=auto_suspend_mins):
|
|
780
|
-
# check in case the config default is missing
|
|
781
|
-
if auto_suspend_mins is None:
|
|
782
|
-
self._exec(f"call {APP_NAME}.api.{api}('{name}', '{size}', null, {headers});")
|
|
783
|
-
else:
|
|
784
|
-
self._exec(f"call {APP_NAME}.api.{api}('{name}', '{size}', PARSE_JSON('{{\"auto_suspend_mins\": {auto_suspend_mins}}}'), {headers});")
|
|
785
|
-
except Exception as e:
|
|
786
|
-
raise EngineProvisioningFailed(name, e) from e
|
|
787
|
-
|
|
788
|
-
def create_engine(self, name:str, size:str|None=None, auto_suspend_mins:int|None=None, headers: Dict | None = None):
|
|
789
|
-
self._create_engine(name, size, auto_suspend_mins, headers=headers)
|
|
790
|
-
|
|
791
|
-
def create_engine_async(self, name:str, size:str|None=None, auto_suspend_mins:int|None=None):
|
|
792
|
-
self._create_engine(name, size, auto_suspend_mins, True)
|
|
793
|
-
|
|
794
|
-
def delete_engine(self, name:str, force:bool = False, headers: Dict | None = None):
|
|
795
|
-
request_headers = debugging.add_current_propagation_headers(headers)
|
|
796
|
-
self._exec(f"call {APP_NAME}.api.delete_engine('{name}', {force},{request_headers});")
|
|
797
|
-
|
|
798
|
-
def suspend_engine(self, name:str):
|
|
799
|
-
self._exec(f"call {APP_NAME}.api.suspend_engine('{name}');")
|
|
800
|
-
|
|
801
|
-
def resume_engine(self, name:str, headers: Dict | None = None) -> Dict:
|
|
802
|
-
request_headers = debugging.add_current_propagation_headers(headers)
|
|
803
|
-
self._exec(f"call {APP_NAME}.api.resume_engine('{name}',{request_headers});")
|
|
804
|
-
# returning empty dict to match the expected return type
|
|
805
|
-
return {}
|
|
806
|
-
|
|
807
|
-
def resume_engine_async(self, name:str, headers: Dict | None = None) -> Dict:
|
|
808
|
-
if headers is None:
|
|
809
|
-
headers = {}
|
|
810
|
-
self._exec(f"call {APP_NAME}.api.resume_engine_async('{name}',{headers});")
|
|
811
|
-
# returning empty dict to match the expected return type
|
|
812
|
-
return {}
|
|
813
|
-
|
|
814
|
-
def alter_engine_pool(self, size:str|None=None, mins:int|None=None, maxs:int|None=None):
|
|
815
|
-
"""Alter engine pool node limits for Snowflake."""
|
|
816
|
-
self._exec(f"call {APP_NAME}.api.alter_engine_pool_node_limits('{size}', {mins}, {maxs});")
|
|
817
|
-
|
|
818
|
-
#--------------------------------------------------
|
|
819
|
-
# Graphs
|
|
820
|
-
#--------------------------------------------------
|
|
821
|
-
|
|
822
|
-
def list_graphs(self) -> List[AvailableModel]:
|
|
823
|
-
with debugging.span("list_models"):
|
|
824
|
-
query = textwrap.dedent(f"""
|
|
825
|
-
SELECT NAME, ID, CREATED_BY, CREATED_ON, STATE, DELETED_BY, DELETED_ON
|
|
826
|
-
FROM {APP_NAME}.api.databases
|
|
827
|
-
WHERE state <> 'DELETED'
|
|
828
|
-
ORDER BY NAME ASC;
|
|
829
|
-
""")
|
|
830
|
-
results = self._exec(query)
|
|
831
|
-
if not results:
|
|
832
|
-
return []
|
|
833
|
-
return [
|
|
834
|
-
{
|
|
835
|
-
"name": row["NAME"],
|
|
836
|
-
"id": row["ID"],
|
|
837
|
-
"created_by": row["CREATED_BY"],
|
|
838
|
-
"created_on": row["CREATED_ON"],
|
|
839
|
-
"state": row["STATE"],
|
|
840
|
-
"deleted_by": row["DELETED_BY"],
|
|
841
|
-
"deleted_on": row["DELETED_ON"],
|
|
842
|
-
}
|
|
843
|
-
for row in results
|
|
844
|
-
]
|
|
845
|
-
|
|
846
|
-
def get_graph(self, name: str):
|
|
847
|
-
res = self.get_database(name)
|
|
848
|
-
if res and res.get("state") != "DELETED":
|
|
849
|
-
return res
|
|
850
|
-
|
|
851
|
-
def create_graph(self, name: str):
|
|
852
|
-
with debugging.span("create_model", name=name):
|
|
853
|
-
self._exec(f"call {APP_NAME}.api.create_database('{name}', false, {debugging.gen_current_propagation_headers()});")
|
|
854
|
-
|
|
855
|
-
def delete_graph(self, name:str, force=False, language:str="rel"):
|
|
856
|
-
prop_hdrs = debugging.gen_current_propagation_headers()
|
|
857
|
-
if self.config.get("use_graph_index", USE_GRAPH_INDEX):
|
|
858
|
-
keep_database = not force and self.config.get("reuse_model", True)
|
|
859
|
-
with debugging.span("release_index", name=name, keep_database=keep_database, language=language):
|
|
860
|
-
#TODO add headers to release_index
|
|
861
|
-
response = self._exec(f"call {APP_NAME}.api.release_index('{name}', OBJECT_CONSTRUCT('keep_database', {keep_database}, 'language', '{language}', 'user_agent', '{get_pyrel_version(self.generation)}'));")
|
|
862
|
-
if response:
|
|
863
|
-
result = next(iter(response))
|
|
864
|
-
obj = json.loads(result["RELEASE_INDEX"])
|
|
865
|
-
error = obj.get('error', None)
|
|
866
|
-
if error and "Model database not found" not in error:
|
|
867
|
-
raise Exception(f"Error releasing index: {error}")
|
|
868
|
-
else:
|
|
869
|
-
raise Exception("There was no response from the release index call.")
|
|
870
|
-
else:
|
|
871
|
-
with debugging.span("delete_model", name=name):
|
|
872
|
-
self._exec(f"call {APP_NAME}.api.delete_database('{name}', false, {prop_hdrs});")
|
|
873
|
-
|
|
874
|
-
def clone_graph(self, target_name:str, source_name:str, nowait_durable=True, force=False):
|
|
875
|
-
if force and self.get_graph(target_name):
|
|
876
|
-
self.delete_graph(target_name)
|
|
877
|
-
with debugging.span("clone_model", target_name=target_name, source_name=source_name):
|
|
878
|
-
# not a mistake: the clone_database argument order is indeed target then source:
|
|
879
|
-
headers = debugging.gen_current_propagation_headers()
|
|
880
|
-
self._exec(f"call {APP_NAME}.api.clone_database('{target_name}', '{source_name}', {nowait_durable}, {headers});")
|
|
881
|
-
|
|
882
|
-
def _poll_use_index(
|
|
883
|
-
self,
|
|
884
|
-
app_name: str,
|
|
885
|
-
sources: Iterable[str],
|
|
886
|
-
model: str,
|
|
887
|
-
engine_name: str,
|
|
888
|
-
engine_size: str | None = None,
|
|
889
|
-
program_span_id: str | None = None,
|
|
890
|
-
headers: Dict | None = None,
|
|
891
|
-
) -> None:
|
|
892
|
-
"""
|
|
893
|
-
Poll use_index to prepare indices for the given sources.
|
|
894
|
-
|
|
895
|
-
This is an optional interface method. Base Resources provides a no-op implementation.
|
|
896
|
-
UseIndexResources and DirectAccessResources override this to provide actual polling.
|
|
897
|
-
|
|
898
|
-
Returns:
|
|
899
|
-
None for base implementation. Child classes may return poller results.
|
|
900
|
-
"""
|
|
901
|
-
return None
|
|
902
|
-
|
|
903
|
-
def maybe_poll_use_index(
|
|
904
|
-
self,
|
|
905
|
-
app_name: str,
|
|
906
|
-
sources: Iterable[str],
|
|
907
|
-
model: str,
|
|
908
|
-
engine_name: str,
|
|
909
|
-
engine_size: str | None = None,
|
|
910
|
-
program_span_id: str | None = None,
|
|
911
|
-
headers: Dict | None = None,
|
|
912
|
-
) -> None:
|
|
913
|
-
"""
|
|
914
|
-
Only call _poll_use_index if there are sources to process.
|
|
915
|
-
|
|
916
|
-
This is an optional interface method. Base Resources provides a no-op implementation.
|
|
917
|
-
UseIndexResources and DirectAccessResources override this to provide actual polling with caching.
|
|
918
|
-
|
|
919
|
-
Returns:
|
|
920
|
-
None for base implementation. Child classes may return poller results.
|
|
921
|
-
"""
|
|
922
|
-
return None
|
|
923
|
-
|
|
924
|
-
#--------------------------------------------------
|
|
925
|
-
# Models
|
|
926
|
-
#--------------------------------------------------
|
|
927
|
-
|
|
928
|
-
def list_models(self, database: str, engine: str):
|
|
929
|
-
pass
|
|
930
|
-
|
|
931
|
-
def create_models(self, database: str, engine: str | None, models:List[Tuple[str, str]]) -> List[Any]:
|
|
932
|
-
rel_code = self.create_models_code(models)
|
|
933
|
-
self.exec_raw(database, engine, rel_code, readonly=False)
|
|
934
|
-
# TODO: handle SPCS errors once they're figured out
|
|
935
|
-
return []
|
|
936
|
-
|
|
937
|
-
def delete_model(self, database:str, engine:str | None, name:str):
|
|
938
|
-
self.exec_raw(database, engine, f"def delete[:rel, :catalog, :model, \"{name}\"]: rel[:catalog, :model, \"{name}\"]", readonly=False)
|
|
939
|
-
|
|
940
|
-
def create_models_code(self, models:List[Tuple[str, str]]) -> str:
|
|
941
|
-
lines = []
|
|
942
|
-
for (name, code) in models:
|
|
943
|
-
name = name.replace("\"", "\\\"")
|
|
944
|
-
assert "\"\"\"\"\"\"\"" not in code, "Code literals must use fewer than 7 quotes."
|
|
945
|
-
|
|
946
|
-
lines.append(textwrap.dedent(f"""
|
|
947
|
-
def delete[:rel, :catalog, :model, "{name}"]: rel[:catalog, :model, "{name}"]
|
|
948
|
-
def insert[:rel, :catalog, :model, "{name}"]: raw\"\"\"\"\"\"\"
|
|
949
|
-
""") + code + "\n\"\"\"\"\"\"\"")
|
|
950
|
-
rel_code = "\n\n".join(lines)
|
|
951
|
-
return rel_code
|
|
952
|
-
|
|
953
|
-
#--------------------------------------------------
|
|
954
|
-
# Exports
|
|
955
|
-
#--------------------------------------------------
|
|
956
|
-
|
|
957
|
-
def list_exports(self, database: str, engine: str):
|
|
958
|
-
return []
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
def get_export_code(self, params: ExportParams, all_installs):
|
|
962
|
-
sql_inputs = ", ".join([f"{name} {type_to_sql(type)}" for (name, _, type) in params.inputs])
|
|
963
|
-
input_names = [name for (name, *_) in params.inputs]
|
|
964
|
-
has_return_hint = params.out_fields and isinstance(params.out_fields[0], tuple)
|
|
965
|
-
if has_return_hint:
|
|
966
|
-
sql_out = ", ".join([f"\"{name}\" {type_to_sql(type)}" for (name, type) in params.out_fields])
|
|
967
|
-
sql_out_names = ", ".join([f"('{name}', '{type_to_sql(type)}')" for (ix, (name, type)) in enumerate(params.out_fields)])
|
|
968
|
-
py_outs = ", ".join([f"StructField(\"{name}\", {type_to_snowpark(type)})" for (name, type) in params.out_fields])
|
|
969
|
-
else:
|
|
970
|
-
sql_out = ""
|
|
971
|
-
sql_out_names = ", ".join([f"'{name}'" for name in params.out_fields])
|
|
972
|
-
py_outs = ", ".join([f"StructField(\"{name}\", {type_to_snowpark(str)})" for name in params.out_fields])
|
|
973
|
-
py_inputs = ", ".join([name for (name, *_) in params.inputs])
|
|
974
|
-
safe_rel = escape_for_f_string(params.code).strip()
|
|
975
|
-
clean_inputs = []
|
|
976
|
-
for (name, var, type) in params.inputs:
|
|
977
|
-
if type is str:
|
|
978
|
-
clean_inputs.append(f"{name} = '\"' + escape({name}) + '\"'")
|
|
979
|
-
# Replace `var` with `name` and keep the following non-word character unchanged
|
|
980
|
-
pattern = re.compile(re.escape(var) + r'(\W)')
|
|
981
|
-
value = format_sproc_name(name, type)
|
|
982
|
-
safe_rel = re.sub(pattern, rf"{{{value}}}\1", safe_rel)
|
|
983
|
-
if py_inputs:
|
|
984
|
-
py_inputs = f", {py_inputs}"
|
|
985
|
-
clean_inputs = ("\n").join(clean_inputs)
|
|
986
|
-
file = "export_procedure.py.jinja"
|
|
987
|
-
with importlib.resources.open_text(
|
|
988
|
-
"relationalai.clients.resources.snowflake", file
|
|
989
|
-
) as f:
|
|
990
|
-
template = f.read()
|
|
991
|
-
def quote(s: str, f = False) -> str:
|
|
992
|
-
return '"' + s + '"' if not f else 'f"' + s + '"'
|
|
993
|
-
|
|
994
|
-
wait_for_stream_sync = self.config.get("wait_for_stream_sync", WAIT_FOR_STREAM_SYNC)
|
|
995
|
-
# 1. Check the sources for staled sources
|
|
996
|
-
# 2. Get the object references for the sources
|
|
997
|
-
# TODO: this could be optimized to do it in the run time of the stored procedure
|
|
998
|
-
# instead of doing it here. It will make it more reliable when sources are
|
|
999
|
-
# modified after the stored procedure is created.
|
|
1000
|
-
checked_sources = self._check_source_updates(self.sources)
|
|
1001
|
-
source_obj_references = self._get_source_references(checked_sources)
|
|
1002
|
-
|
|
1003
|
-
# Escape double quotes in the source object references
|
|
1004
|
-
escaped_source_obj_references = [source.replace('"', '\\"') for source in source_obj_references]
|
|
1005
|
-
escaped_proc_database = params.proc_database.replace('"', '\\"')
|
|
1006
|
-
|
|
1007
|
-
normalized_func_name = IdentityParser(params.func_name).identity
|
|
1008
|
-
assert normalized_func_name is not None, "Function name must be set"
|
|
1009
|
-
skip_invalid_data = params.skip_invalid_data
|
|
1010
|
-
python_code = process_jinja_template(
|
|
1011
|
-
template,
|
|
1012
|
-
func_name=quote(normalized_func_name),
|
|
1013
|
-
database=quote(params.root_database),
|
|
1014
|
-
proc_database=quote(escaped_proc_database),
|
|
1015
|
-
engine=quote(params.engine),
|
|
1016
|
-
rel_code=quote(safe_rel, f=True),
|
|
1017
|
-
APP_NAME=quote(APP_NAME),
|
|
1018
|
-
input_names=input_names,
|
|
1019
|
-
outputs=sql_out,
|
|
1020
|
-
sql_out_names=sql_out_names,
|
|
1021
|
-
clean_inputs=clean_inputs,
|
|
1022
|
-
py_inputs=py_inputs,
|
|
1023
|
-
py_outs=py_outs,
|
|
1024
|
-
skip_invalid_data=skip_invalid_data,
|
|
1025
|
-
source_references=", ".join(escaped_source_obj_references),
|
|
1026
|
-
install_code=all_installs.replace("\\", "\\\\").replace("\n", "\\n"),
|
|
1027
|
-
has_return_hint=has_return_hint,
|
|
1028
|
-
wait_for_stream_sync=wait_for_stream_sync,
|
|
1029
|
-
).strip()
|
|
1030
|
-
return_clause = f"TABLE({sql_out})" if sql_out else "STRING"
|
|
1031
|
-
destination_input = "" if sql_out else "save_as_table STRING DEFAULT NULL,"
|
|
1032
|
-
module_name = sanitize_module_name(normalized_func_name)
|
|
1033
|
-
stage = f"@{self.get_app_name()}.app_state.stored_proc_code_stage"
|
|
1034
|
-
file_loc = f"{stage}/{module_name}.py"
|
|
1035
|
-
python_code = python_code.replace(APP_NAME, self.get_app_name())
|
|
1036
|
-
|
|
1037
|
-
hash = hashlib.sha256()
|
|
1038
|
-
hash.update(python_code.encode('utf-8'))
|
|
1039
|
-
code_hash = hash.hexdigest()
|
|
1040
|
-
print(code_hash)
|
|
1041
|
-
|
|
1042
|
-
sql_code = textwrap.dedent(f"""
|
|
1043
|
-
CREATE OR REPLACE PROCEDURE {normalized_func_name}({sql_inputs}{sql_inputs and ',' or ''} {destination_input} engine STRING DEFAULT NULL)
|
|
1044
|
-
RETURNS {return_clause}
|
|
1045
|
-
LANGUAGE PYTHON
|
|
1046
|
-
RUNTIME_VERSION = '3.10'
|
|
1047
|
-
IMPORTS = ('{file_loc}')
|
|
1048
|
-
PACKAGES = ('snowflake-snowpark-python')
|
|
1049
|
-
HANDLER = 'checked_handle'
|
|
1050
|
-
EXECUTE AS CALLER
|
|
1051
|
-
AS
|
|
1052
|
-
$$
|
|
1053
|
-
import {module_name}
|
|
1054
|
-
import inspect, hashlib, os, sys
|
|
1055
|
-
def checked_handle(*args, **kwargs):
|
|
1056
|
-
import_dir = sys._xoptions["snowflake_import_directory"]
|
|
1057
|
-
wheel_path = os.path.join(import_dir, '{module_name}.py')
|
|
1058
|
-
h = hashlib.sha256()
|
|
1059
|
-
with open(wheel_path, 'rb') as f:
|
|
1060
|
-
for chunk in iter(lambda: f.read(1<<20), b''):
|
|
1061
|
-
h.update(chunk)
|
|
1062
|
-
code_hash = h.hexdigest()
|
|
1063
|
-
if code_hash != '{code_hash}':
|
|
1064
|
-
raise RuntimeError("Code hash mismatch. The code has been modified since it was uploaded.")
|
|
1065
|
-
# Call the handle function with the provided arguments
|
|
1066
|
-
return {module_name}.handle(*args, **kwargs)
|
|
1067
|
-
|
|
1068
|
-
$$;
|
|
1069
|
-
""")
|
|
1070
|
-
# print(f"\n--- python---\n{python_code}\n--- end python---\n")
|
|
1071
|
-
# This check helps catch invalid code early and for dry runs:
|
|
1072
|
-
try:
|
|
1073
|
-
ast.parse(python_code)
|
|
1074
|
-
except SyntaxError:
|
|
1075
|
-
raise ValueError(f"Internal error: invalid Python code generated:\n{python_code}")
|
|
1076
|
-
return (sql_code, python_code, file_loc)
|
|
1077
|
-
|
|
1078
|
-
def get_sproc_models(self, params: ExportParams):
|
|
1079
|
-
if self._sproc_models is not None:
|
|
1080
|
-
return self._sproc_models
|
|
1081
|
-
|
|
1082
|
-
with debugging.span("get_sproc_models"):
|
|
1083
|
-
code = """
|
|
1084
|
-
def output(name, model):
|
|
1085
|
-
rel(:catalog, :model, name, model)
|
|
1086
|
-
and not starts_with(name, "rel/")
|
|
1087
|
-
and not starts_with(name, "pkg/rel")
|
|
1088
|
-
and not starts_with(name, "pkg/std")
|
|
1089
|
-
and starts_with(name, "pkg/")
|
|
1090
|
-
"""
|
|
1091
|
-
res = self.exec_raw(params.model_database, params.engine, code, readonly=True, nowait_durable=True)
|
|
1092
|
-
df, errors = result_helpers.format_results(res, None, ["name", "model"])
|
|
1093
|
-
models = []
|
|
1094
|
-
for row in df.itertuples():
|
|
1095
|
-
models.append((row.name, row.model))
|
|
1096
|
-
self._sproc_models = models
|
|
1097
|
-
return models
|
|
1098
|
-
|
|
1099
|
-
def create_export(self, params: ExportParams):
|
|
1100
|
-
with debugging.span("create_export") as span:
|
|
1101
|
-
if params.dry_run:
|
|
1102
|
-
(sql_code, python_code, file_loc) = self.get_export_code(params, params.install_code)
|
|
1103
|
-
span["sql"] = sql_code
|
|
1104
|
-
return
|
|
1105
|
-
|
|
1106
|
-
start = time.perf_counter()
|
|
1107
|
-
use_graph_index = self.config.get("use_graph_index", USE_GRAPH_INDEX)
|
|
1108
|
-
# for the non graph index case we need to create the cloned proc database
|
|
1109
|
-
if not use_graph_index:
|
|
1110
|
-
raise RAIException(
|
|
1111
|
-
"To ensure permissions are properly accounted for, stored procedures require using the graph index. "
|
|
1112
|
-
"Set use_graph_index=True in your config to proceed."
|
|
1113
|
-
)
|
|
1114
|
-
|
|
1115
|
-
models = self.get_sproc_models(params)
|
|
1116
|
-
lib_installs = self.create_models_code(models)
|
|
1117
|
-
all_installs = lib_installs + "\n\n" + params.install_code
|
|
1118
|
-
|
|
1119
|
-
(sql_code, python_code, file_loc) = self.get_export_code(params, all_installs)
|
|
1120
|
-
|
|
1121
|
-
span["sql"] = sql_code
|
|
1122
|
-
assert self._session
|
|
1123
|
-
|
|
1124
|
-
with debugging.span("upload_sproc_code"):
|
|
1125
|
-
code_bytes = python_code.encode('utf-8')
|
|
1126
|
-
code_stream = io.BytesIO(code_bytes)
|
|
1127
|
-
self._session.file.put_stream(code_stream, file_loc, auto_compress=False, overwrite=True)
|
|
1128
|
-
|
|
1129
|
-
with debugging.span("sql_install"):
|
|
1130
|
-
self._exec(sql_code)
|
|
1131
|
-
|
|
1132
|
-
debugging.time("export", time.perf_counter() - start, DataFrame(), code=sql_code.replace(APP_NAME, self.get_app_name()))
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
def create_export_table(self, database: str, engine: str, table: str, relation: str, columns: Dict[str, str], code: str, refresh: str|None=None):
|
|
1136
|
-
print("Snowflake doesn't support creating export tables yet. Try creating the table manually first.")
|
|
1137
|
-
pass
|
|
1138
|
-
|
|
1139
|
-
def delete_export(self, database: str, engine: str, name: str):
|
|
1140
|
-
pass
|
|
1141
|
-
|
|
1142
|
-
#--------------------------------------------------
|
|
1143
|
-
# Imports
|
|
1144
|
-
#--------------------------------------------------
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
def change_stream_status(self, stream_id: str, model:str, suspend: bool):
|
|
1148
|
-
if stream_id and model:
|
|
1149
|
-
if suspend:
|
|
1150
|
-
self._exec(f"CALL {APP_NAME}.api.suspend_data_stream('{stream_id}', '{model}');")
|
|
1151
|
-
else:
|
|
1152
|
-
self._exec(f"CALL {APP_NAME}.api.resume_data_stream('{stream_id}', '{model}');")
|
|
1153
|
-
|
|
1154
|
-
def change_imports_status(self, suspend: bool):
|
|
1155
|
-
if suspend:
|
|
1156
|
-
self._exec(f"CALL {APP_NAME}.app.suspend_cdc();")
|
|
1157
|
-
else:
|
|
1158
|
-
self._exec(f"CALL {APP_NAME}.app.resume_cdc();")
|
|
1159
|
-
|
|
1160
|
-
def get_imports_status(self) -> ImportsStatus|None:
|
|
1161
|
-
# NOTE: We expect there to only ever be one result?
|
|
1162
|
-
results = self._exec(f"CALL {APP_NAME}.app.cdc_status();")
|
|
1163
|
-
if results:
|
|
1164
|
-
result = next(iter(results))
|
|
1165
|
-
engine = result['CDC_ENGINE_NAME']
|
|
1166
|
-
engine_status = result['CDC_ENGINE_STATUS']
|
|
1167
|
-
engine_size = result['CDC_ENGINE_SIZE']
|
|
1168
|
-
task_status = result['CDC_TASK_STATUS']
|
|
1169
|
-
info = result['CDC_TASK_INFO']
|
|
1170
|
-
enabled = result['CDC_ENABLED']
|
|
1171
|
-
return {"engine": engine, "engine_size": engine_size, "engine_status": engine_status, "status": task_status, "enabled": enabled, "info": info }
|
|
1172
|
-
return None
|
|
1173
|
-
|
|
1174
|
-
def set_imports_engine_size(self, size:str):
|
|
1175
|
-
try:
|
|
1176
|
-
self._exec(f"CALL {APP_NAME}.app.alter_cdc_engine_size('{size}');")
|
|
1177
|
-
except Exception as e:
|
|
1178
|
-
raise e
|
|
1179
|
-
|
|
1180
|
-
def list_imports(
|
|
1181
|
-
self,
|
|
1182
|
-
id:str|None = None,
|
|
1183
|
-
name:str|None = None,
|
|
1184
|
-
model:str|None = None,
|
|
1185
|
-
status:str|None = None,
|
|
1186
|
-
creator:str|None = None,
|
|
1187
|
-
) -> list[Import]:
|
|
1188
|
-
where = []
|
|
1189
|
-
if id and isinstance(id, str):
|
|
1190
|
-
where.append(f"LOWER(ID) = '{id.lower()}'")
|
|
1191
|
-
if name and isinstance(name, str):
|
|
1192
|
-
where.append(f"LOWER(FQ_OBJECT_NAME) = '{name.lower()}'")
|
|
1193
|
-
if model and isinstance(model, str):
|
|
1194
|
-
where.append(f"LOWER(RAI_DATABASE) = '{model.lower()}'")
|
|
1195
|
-
if creator and isinstance(creator, str):
|
|
1196
|
-
where.append(f"LOWER(CREATED_BY) = '{creator.lower()}'")
|
|
1197
|
-
if status and isinstance(status, str):
|
|
1198
|
-
where.append(f"LOWER(batch_status) = '{status.lower()}'")
|
|
1199
|
-
where_clause = " AND ".join(where)
|
|
1200
|
-
|
|
1201
|
-
# This is roughly inspired by the native app code because we don't have a way to
|
|
1202
|
-
# get the status of multiple streams at once and doing them individually is way
|
|
1203
|
-
# too slow. We use window functions to get the status of the stream and the batch
|
|
1204
|
-
# details.
|
|
1205
|
-
statement = f"""
|
|
1206
|
-
SELECT
|
|
1207
|
-
ID,
|
|
1208
|
-
RAI_DATABASE,
|
|
1209
|
-
FQ_OBJECT_NAME,
|
|
1210
|
-
CREATED_AT,
|
|
1211
|
-
CREATED_BY,
|
|
1212
|
-
CASE
|
|
1213
|
-
WHEN nextBatch.quarantined > 0 THEN 'quarantined'
|
|
1214
|
-
ELSE nextBatch.status
|
|
1215
|
-
END as batch_status,
|
|
1216
|
-
nextBatch.processing_errors,
|
|
1217
|
-
nextBatch.batches
|
|
1218
|
-
FROM {APP_NAME}.api.data_streams as ds
|
|
1219
|
-
LEFT JOIN (
|
|
1220
|
-
SELECT DISTINCT
|
|
1221
|
-
data_stream_id,
|
|
1222
|
-
-- Get status from the progress record using window functions
|
|
1223
|
-
FIRST_VALUE(status) OVER (
|
|
1224
|
-
PARTITION BY data_stream_id
|
|
1225
|
-
ORDER BY
|
|
1226
|
-
CASE WHEN unloaded IS NOT NULL THEN 1 ELSE 0 END DESC,
|
|
1227
|
-
unloaded ASC
|
|
1228
|
-
) as status,
|
|
1229
|
-
-- Get batch_details from the same record
|
|
1230
|
-
FIRST_VALUE(batch_details) OVER (
|
|
1231
|
-
PARTITION BY data_stream_id
|
|
1232
|
-
ORDER BY
|
|
1233
|
-
CASE WHEN unloaded IS NOT NULL THEN 1 ELSE 0 END DESC,
|
|
1234
|
-
unloaded ASC
|
|
1235
|
-
) as batch_details,
|
|
1236
|
-
-- Aggregate the other fields
|
|
1237
|
-
FIRST_VALUE(processing_details:processingErrors) OVER (
|
|
1238
|
-
PARTITION BY data_stream_id
|
|
1239
|
-
ORDER BY
|
|
1240
|
-
CASE WHEN unloaded IS NOT NULL THEN 1 ELSE 0 END DESC,
|
|
1241
|
-
unloaded ASC
|
|
1242
|
-
) as processing_errors,
|
|
1243
|
-
MIN(unloaded) OVER (PARTITION BY data_stream_id) as unloaded,
|
|
1244
|
-
COUNT(*) OVER (PARTITION BY data_stream_id) as batches,
|
|
1245
|
-
COUNT_IF(status = 'quarantined') OVER (PARTITION BY data_stream_id) as quarantined
|
|
1246
|
-
FROM {APP_NAME}.api.data_stream_batches
|
|
1247
|
-
) nextBatch
|
|
1248
|
-
ON ds.id = nextBatch.data_stream_id
|
|
1249
|
-
{f"where {where_clause}" if where_clause else ""}
|
|
1250
|
-
ORDER BY FQ_OBJECT_NAME ASC;
|
|
1251
|
-
"""
|
|
1252
|
-
results = self._exec(statement)
|
|
1253
|
-
items = []
|
|
1254
|
-
if results:
|
|
1255
|
-
for stream in results:
|
|
1256
|
-
(id, db, name, created_at, created_by, status, processing_errors, batches) = stream
|
|
1257
|
-
if status and isinstance(status, str):
|
|
1258
|
-
status = status.upper()
|
|
1259
|
-
if processing_errors:
|
|
1260
|
-
if status in ["QUARANTINED", "PENDING"]:
|
|
1261
|
-
start = processing_errors.rfind("Error")
|
|
1262
|
-
if start != -1:
|
|
1263
|
-
processing_errors = processing_errors[start:-1]
|
|
1264
|
-
else:
|
|
1265
|
-
processing_errors = None
|
|
1266
|
-
items.append(cast(Import, {
|
|
1267
|
-
"id": id,
|
|
1268
|
-
"model": db,
|
|
1269
|
-
"name": name,
|
|
1270
|
-
"created": created_at,
|
|
1271
|
-
"creator": created_by,
|
|
1272
|
-
"status": status.upper() if status else None,
|
|
1273
|
-
"errors": processing_errors if processing_errors != "[]" else None,
|
|
1274
|
-
"batches": f"{batches}" if batches else "",
|
|
1275
|
-
}))
|
|
1276
|
-
return items
|
|
1277
|
-
|
|
1278
|
-
def poll_imports(self, sources:List[str], model:str):
|
|
1279
|
-
source_set = self._create_source_set(sources)
|
|
1280
|
-
def check_imports():
|
|
1281
|
-
imports = [
|
|
1282
|
-
import_
|
|
1283
|
-
for import_ in self.list_imports(model=model)
|
|
1284
|
-
if import_["name"] in source_set
|
|
1285
|
-
]
|
|
1286
|
-
# loop through printing status for each in the format (index): (name) - (status)
|
|
1287
|
-
statuses = [import_["status"] for import_ in imports]
|
|
1288
|
-
if all(status == "LOADED" for status in statuses):
|
|
1289
|
-
return True
|
|
1290
|
-
if any(status == "QUARANTINED" for status in statuses):
|
|
1291
|
-
failed_imports = [import_["name"] for import_ in imports if import_["status"] == "QUARANTINED"]
|
|
1292
|
-
raise RAIException("Imports failed:" + ", ".join(failed_imports)) from None
|
|
1293
|
-
# this check is necessary in case some of the tables are empty;
|
|
1294
|
-
# such tables may be synced even though their status is None:
|
|
1295
|
-
def synced(import_):
|
|
1296
|
-
if import_["status"] == "LOADED":
|
|
1297
|
-
return True
|
|
1298
|
-
if import_["status"] is None:
|
|
1299
|
-
import_full_status = self.get_import_stream(import_["name"], model)
|
|
1300
|
-
if import_full_status and import_full_status[0]["data_sync_status"] == "SYNCED":
|
|
1301
|
-
return True
|
|
1302
|
-
return False
|
|
1303
|
-
if all(synced(import_) for import_ in imports):
|
|
1304
|
-
return True
|
|
1305
|
-
poll_with_specified_overhead(check_imports, overhead_rate=0.1, max_delay=10)
|
|
1306
|
-
|
|
1307
|
-
def _create_source_set(self, sources: List[str]) -> set:
|
|
1308
|
-
return {
|
|
1309
|
-
source.upper() if not IdentityParser(source).has_double_quoted_identifier else IdentityParser(source).identity
|
|
1310
|
-
for source in sources
|
|
1311
|
-
}
|
|
1312
|
-
|
|
1313
|
-
def get_import_stream(self, name:str|None, model:str|None):
|
|
1314
|
-
results = self._exec(f"CALL {APP_NAME}.api.get_data_stream('{name}', '{model}');")
|
|
1315
|
-
if not results:
|
|
1316
|
-
return None
|
|
1317
|
-
return imports_to_dicts(results)
|
|
1318
|
-
|
|
1319
|
-
def create_import_stream(self, source:ImportSource, model:str, rate = 1, options: dict|None = None):
|
|
1320
|
-
assert isinstance(source, ImportSourceTable), "Snowflake integration only supports loading from SF Tables. Try loading your data as a table via the Snowflake interface first."
|
|
1321
|
-
object = source.fqn
|
|
1322
|
-
|
|
1323
|
-
# Parse only to the schema level
|
|
1324
|
-
schemaParser = IdentityParser(f"{source.database}.{source.schema}")
|
|
1325
|
-
|
|
1326
|
-
if object.lower() in [x["name"].lower() for x in self.list_imports(model=model)]:
|
|
1327
|
-
return
|
|
1328
|
-
|
|
1329
|
-
query = f"SHOW OBJECTS LIKE '{source.table}' IN {schemaParser.identity}"
|
|
1330
|
-
|
|
1331
|
-
info = self._exec(query)
|
|
1332
|
-
if not info:
|
|
1333
|
-
raise ValueError(f"Object {source.table} not found in schema {schemaParser.identity}")
|
|
1334
|
-
else:
|
|
1335
|
-
data = info[0]
|
|
1336
|
-
if not data:
|
|
1337
|
-
raise ValueError(f"Object {source.table} not found in {schemaParser.identity}")
|
|
1338
|
-
# (time, name, db_name, schema_name, kind, *rest)
|
|
1339
|
-
kind = data["kind"]
|
|
1340
|
-
|
|
1341
|
-
relation_name = to_fqn_relation_name(object)
|
|
1342
|
-
|
|
1343
|
-
command = f"""call {APP_NAME}.api.create_data_stream(
|
|
1344
|
-
{APP_NAME}.api.object_reference('{kind}', '{object}'),
|
|
1345
|
-
'{model}',
|
|
1346
|
-
'{relation_name}');"""
|
|
1347
|
-
|
|
1348
|
-
def create_stream(tracking_just_changed=False):
|
|
1349
|
-
try:
|
|
1350
|
-
self._exec(command)
|
|
1351
|
-
except Exception as e:
|
|
1352
|
-
messages = collect_error_messages(e)
|
|
1353
|
-
if any("ensure that change_tracking is enabled on the source object" in msg for msg in messages):
|
|
1354
|
-
if self.config.get("ensure_change_tracking", False) and not tracking_just_changed:
|
|
1355
|
-
try:
|
|
1356
|
-
self._exec(f"ALTER {kind} {object} SET CHANGE_TRACKING = TRUE;")
|
|
1357
|
-
create_stream(tracking_just_changed=True)
|
|
1358
|
-
except Exception:
|
|
1359
|
-
pass
|
|
1360
|
-
else:
|
|
1361
|
-
print("\n")
|
|
1362
|
-
exception = SnowflakeChangeTrackingNotEnabledException((object, kind))
|
|
1363
|
-
raise exception from None
|
|
1364
|
-
elif any("database does not exist" in msg for msg in messages):
|
|
1365
|
-
print("\n")
|
|
1366
|
-
raise ModelNotFoundException(model) from None
|
|
1367
|
-
raise e
|
|
1368
|
-
|
|
1369
|
-
create_stream()
|
|
1370
|
-
|
|
1371
|
-
def create_import_snapshot(self, source:ImportSource, model:str, options: dict|None = None):
|
|
1372
|
-
raise Exception("Snowflake integration doesn't support snapshot imports yet")
|
|
1373
|
-
|
|
1374
|
-
def delete_import(self, import_name:str, model:str, force = False):
|
|
1375
|
-
engine = self.get_default_engine_name()
|
|
1376
|
-
rel_name = to_fqn_relation_name(import_name)
|
|
1377
|
-
try:
|
|
1378
|
-
self._exec(f"""call {APP_NAME}.api.delete_data_stream(
|
|
1379
|
-
'{import_name}',
|
|
1380
|
-
'{model}'
|
|
1381
|
-
);""")
|
|
1382
|
-
except RAIException as err:
|
|
1383
|
-
if "streams do not exist" not in str(err) or not force:
|
|
1384
|
-
raise
|
|
1385
|
-
|
|
1386
|
-
# if force is true, we delete the leftover relation to free up the name (in case the user re-creates the stream)
|
|
1387
|
-
if force:
|
|
1388
|
-
self.exec_raw(model, engine, f"""
|
|
1389
|
-
declare ::{rel_name}
|
|
1390
|
-
def delete[:\"{rel_name}\"]: {{ {rel_name} }}
|
|
1391
|
-
""", readonly=False, bypass_index=True)
|
|
1392
|
-
|
|
1393
|
-
#--------------------------------------------------
|
|
1394
|
-
# Exec Async
|
|
1395
|
-
#--------------------------------------------------
|
|
1396
|
-
|
|
1397
|
-
def _check_exec_async_status(self, txn_id: str, headers: Dict | None = None):
|
|
1398
|
-
"""Check whether the given transaction has completed."""
|
|
1399
|
-
if headers is None:
|
|
1400
|
-
headers = {}
|
|
1401
|
-
|
|
1402
|
-
with debugging.span("check_status"):
|
|
1403
|
-
response = self._exec(f"CALL {APP_NAME}.api.get_transaction('{txn_id}',{headers});")
|
|
1404
|
-
assert response, f"No results from get_transaction('{txn_id}')"
|
|
1405
|
-
|
|
1406
|
-
response_row = next(iter(response)).asDict()
|
|
1407
|
-
status: str = response_row['STATE']
|
|
1408
|
-
|
|
1409
|
-
# remove the transaction from the pending list if it's completed or aborted
|
|
1410
|
-
if status in ["COMPLETED", "ABORTED"]:
|
|
1411
|
-
if txn_id in self._pending_transactions:
|
|
1412
|
-
self._pending_transactions.remove(txn_id)
|
|
1413
|
-
|
|
1414
|
-
if status == "ABORTED" and response_row.get("ABORT_REASON", "") == TXN_ABORT_REASON_TIMEOUT:
|
|
1415
|
-
config_file_path = getattr(self.config, 'file_path', None)
|
|
1416
|
-
# todo: use the timeout returned alongside the transaction as soon as it's exposed
|
|
1417
|
-
timeout_mins = int(self.config.get("query_timeout_mins", DEFAULT_QUERY_TIMEOUT_MINS) or DEFAULT_QUERY_TIMEOUT_MINS)
|
|
1418
|
-
raise QueryTimeoutExceededException(
|
|
1419
|
-
timeout_mins=timeout_mins,
|
|
1420
|
-
query_id=txn_id,
|
|
1421
|
-
config_file_path=config_file_path,
|
|
1422
|
-
)
|
|
1423
|
-
|
|
1424
|
-
# @TODO: Find some way to tunnel the ABORT_REASON out. Azure doesn't have this, but it's handy
|
|
1425
|
-
return status == "COMPLETED" or status == "ABORTED"
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
def _list_exec_async_artifacts(self, txn_id: str, headers: Dict | None = None) -> Dict[str, Dict]:
|
|
1429
|
-
"""Grab the list of artifacts produced in the transaction and the URLs to retrieve their contents."""
|
|
1430
|
-
if headers is None:
|
|
1431
|
-
headers = {}
|
|
1432
|
-
with debugging.span("list_results"):
|
|
1433
|
-
response = self._exec(
|
|
1434
|
-
f"CALL {APP_NAME}.api.get_own_transaction_artifacts('{txn_id}',{headers});"
|
|
1435
|
-
)
|
|
1436
|
-
assert response, f"No results from get_own_transaction_artifacts('{txn_id}')"
|
|
1437
|
-
return {row["FILENAME"]: row for row in response}
|
|
1438
|
-
|
|
1439
|
-
def _fetch_exec_async_artifacts(
|
|
1440
|
-
self, artifact_info: Dict[str, Dict[str, Any]]
|
|
1441
|
-
) -> Dict[str, Any]:
|
|
1442
|
-
"""Grab the contents of the given artifacts from SF in parallel using threads."""
|
|
1443
|
-
|
|
1444
|
-
with requests.Session() as session:
|
|
1445
|
-
def _fetch_data(name_info):
|
|
1446
|
-
filename, metadata = name_info
|
|
1447
|
-
|
|
1448
|
-
try:
|
|
1449
|
-
# Extract the presigned URL and encryption material from metadata
|
|
1450
|
-
url_key = self.get_url_key(metadata)
|
|
1451
|
-
presigned_url = metadata[url_key]
|
|
1452
|
-
encryption_material = metadata["ENCRYPTION_MATERIAL"]
|
|
1453
|
-
|
|
1454
|
-
response = get_with_retries(session, presigned_url, config=self.config)
|
|
1455
|
-
response.raise_for_status() # Throw if something goes wrong
|
|
1456
|
-
|
|
1457
|
-
decrypted = self._maybe_decrypt(response.content, encryption_material)
|
|
1458
|
-
return (filename, decrypted)
|
|
1459
|
-
|
|
1460
|
-
except requests.RequestException as e:
|
|
1461
|
-
raise scrub_exception(wrap_with_request_id(e))
|
|
1462
|
-
|
|
1463
|
-
# Create a list of tuples for the map function
|
|
1464
|
-
name_info_pairs = list(artifact_info.items())
|
|
1465
|
-
|
|
1466
|
-
with ThreadPoolExecutor(max_workers=5) as executor:
|
|
1467
|
-
results = executor.map(_fetch_data, name_info_pairs)
|
|
1468
|
-
|
|
1469
|
-
return {name: data for (name, data) in results}
|
|
1470
|
-
|
|
1471
|
-
def _maybe_decrypt(self, content: bytes, encryption_material: str) -> bytes:
|
|
1472
|
-
# Decrypt if encryption material is present
|
|
1473
|
-
if encryption_material:
|
|
1474
|
-
# if there's no padding, the initial file was empty
|
|
1475
|
-
if len(content) == 0:
|
|
1476
|
-
return b""
|
|
1477
|
-
|
|
1478
|
-
return decrypt_artifact(content, encryption_material)
|
|
1479
|
-
|
|
1480
|
-
# otherwise, return content directly
|
|
1481
|
-
return content
|
|
1482
|
-
|
|
1483
|
-
def _parse_exec_async_results(self, arrow_files: List[Tuple[str, bytes]]):
|
|
1484
|
-
"""Mimics the logic in _parse_arrow_results of railib/api.py#L303 without requiring a wrapping multipart form."""
|
|
1485
|
-
results = []
|
|
1486
|
-
|
|
1487
|
-
for file_name, file_content in arrow_files:
|
|
1488
|
-
with pa.ipc.open_stream(file_content) as reader:
|
|
1489
|
-
schema = reader.schema
|
|
1490
|
-
batches = [batch for batch in reader]
|
|
1491
|
-
table = pa.Table.from_batches(batches=batches, schema=schema)
|
|
1492
|
-
results.append({"relationId": file_name, "table": table})
|
|
1493
|
-
|
|
1494
|
-
return results
|
|
1495
|
-
|
|
1496
|
-
def _download_results(
|
|
1497
|
-
self, artifact_info: Dict[str, Dict], txn_id: str, state: str
|
|
1498
|
-
) -> TransactionAsyncResponse:
|
|
1499
|
-
with debugging.span("download_results"):
|
|
1500
|
-
# Fetch artifacts
|
|
1501
|
-
artifacts = self._fetch_exec_async_artifacts(artifact_info)
|
|
1502
|
-
|
|
1503
|
-
# Directly use meta_json as it is fetched
|
|
1504
|
-
meta_json_bytes = artifacts["metadata.json"]
|
|
1505
|
-
|
|
1506
|
-
# Decode the bytes and parse the JSON
|
|
1507
|
-
meta_json_str = meta_json_bytes.decode('utf-8')
|
|
1508
|
-
meta_json = json.loads(meta_json_str) # Parse the JSON string
|
|
1509
|
-
|
|
1510
|
-
# Use the metadata to map arrow files to the relations they contain
|
|
1511
|
-
try:
|
|
1512
|
-
arrow_files_to_relations = {
|
|
1513
|
-
artifact["filename"]: artifact["relationId"]
|
|
1514
|
-
for artifact in meta_json
|
|
1515
|
-
}
|
|
1516
|
-
except KeyError:
|
|
1517
|
-
# TODO: Remove this fallback mechanism later once several engine versions are updated
|
|
1518
|
-
arrow_files_to_relations = {
|
|
1519
|
-
f"{ix}.arrow": artifact["relationId"]
|
|
1520
|
-
for ix, artifact in enumerate(meta_json)
|
|
1521
|
-
}
|
|
1522
|
-
|
|
1523
|
-
# Hydrate the arrow files into tables
|
|
1524
|
-
results = self._parse_exec_async_results(
|
|
1525
|
-
[
|
|
1526
|
-
(arrow_files_to_relations[name], content)
|
|
1527
|
-
for name, content in artifacts.items()
|
|
1528
|
-
if name.endswith(".arrow")
|
|
1529
|
-
]
|
|
1530
|
-
)
|
|
1531
|
-
|
|
1532
|
-
# Create and return the response
|
|
1533
|
-
rsp = TransactionAsyncResponse()
|
|
1534
|
-
rsp.transaction = {
|
|
1535
|
-
"id": txn_id,
|
|
1536
|
-
"state": state,
|
|
1537
|
-
"response_format_version": None,
|
|
1538
|
-
}
|
|
1539
|
-
rsp.metadata = meta_json
|
|
1540
|
-
rsp.problems = artifacts.get(
|
|
1541
|
-
"problems.json"
|
|
1542
|
-
) # Safely access possible missing keys
|
|
1543
|
-
rsp.results = results
|
|
1544
|
-
return rsp
|
|
1545
|
-
|
|
1546
|
-
def get_transaction_problems(self, txn_id: str) -> List[Dict[str, Any]]:
|
|
1547
|
-
with debugging.span("get_own_transaction_problems"):
|
|
1548
|
-
response = self._exec(
|
|
1549
|
-
f"select * from table({APP_NAME}.api.get_own_transaction_problems('{txn_id}'));"
|
|
1550
|
-
)
|
|
1551
|
-
if not response:
|
|
1552
|
-
return []
|
|
1553
|
-
return response
|
|
1554
|
-
|
|
1555
|
-
def get_url_key(self, metadata) -> str:
|
|
1556
|
-
# In Azure, there is only one type of URL, which is used for both internal and
|
|
1557
|
-
# external access; always use that one
|
|
1558
|
-
if is_azure_url(metadata['PRESIGNED_URL']):
|
|
1559
|
-
return 'PRESIGNED_URL'
|
|
1560
|
-
|
|
1561
|
-
configured = self.config.get("download_url_type", None)
|
|
1562
|
-
if configured == "internal":
|
|
1563
|
-
return 'PRESIGNED_URL_AP'
|
|
1564
|
-
elif configured == "external":
|
|
1565
|
-
return "PRESIGNED_URL"
|
|
1566
|
-
|
|
1567
|
-
if is_container_runtime():
|
|
1568
|
-
return 'PRESIGNED_URL_AP'
|
|
1569
|
-
|
|
1570
|
-
return 'PRESIGNED_URL'
|
|
1571
|
-
|
|
1572
|
-
def _exec_rai_app(
|
|
1573
|
-
self,
|
|
1574
|
-
database: str,
|
|
1575
|
-
engine: str | None,
|
|
1576
|
-
raw_code: str,
|
|
1577
|
-
inputs: Dict,
|
|
1578
|
-
readonly=True,
|
|
1579
|
-
nowait_durable=False,
|
|
1580
|
-
request_headers: Dict | None = None,
|
|
1581
|
-
bypass_index=False,
|
|
1582
|
-
language: str = "rel",
|
|
1583
|
-
query_timeout_mins: int | None = None,
|
|
1584
|
-
):
|
|
1585
|
-
"""
|
|
1586
|
-
High-level method to execute RAI app stored procedures.
|
|
1587
|
-
|
|
1588
|
-
Builds and executes SQL to call the RAI app's exec_async_v2 stored procedure.
|
|
1589
|
-
This method handles the SQL string construction for two different formats:
|
|
1590
|
-
1. New format (with graph index): Uses object payload with parameterized query
|
|
1591
|
-
2. Legacy format: Uses positional parameters
|
|
1592
|
-
|
|
1593
|
-
The choice between formats depends on the use_graph_index configuration.
|
|
1594
|
-
The new format allows the stored procedure to hash the model and username
|
|
1595
|
-
to determine the database, while the legacy format uses the passed database directly.
|
|
1596
|
-
|
|
1597
|
-
This method is called by _exec_async_v2 to create transactions. It skips
|
|
1598
|
-
use_index retry logic (skip_engine_db_error_retry=True) because that
|
|
1599
|
-
is handled at a higher level by exec_raw/exec_lqp.
|
|
1600
|
-
|
|
1601
|
-
Args:
|
|
1602
|
-
database: Database/model name
|
|
1603
|
-
engine: Engine name (optional)
|
|
1604
|
-
raw_code: Code to execute (REL, LQP, or SQL)
|
|
1605
|
-
inputs: Input parameters for the query
|
|
1606
|
-
readonly: Whether the transaction is read-only
|
|
1607
|
-
nowait_durable: Whether to wait for durable writes
|
|
1608
|
-
request_headers: Optional HTTP headers
|
|
1609
|
-
bypass_index: Whether to bypass graph index setup
|
|
1610
|
-
language: Query language ("rel" or "lqp")
|
|
1611
|
-
query_timeout_mins: Optional query timeout in minutes
|
|
1612
|
-
|
|
1613
|
-
Returns:
|
|
1614
|
-
Response from the stored procedure call (transaction creation result)
|
|
1615
|
-
|
|
1616
|
-
Raises:
|
|
1617
|
-
Exception: If transaction creation fails
|
|
1618
|
-
"""
|
|
1619
|
-
assert language == "rel" or language == "lqp", "Only 'rel' and 'lqp' languages are supported"
|
|
1620
|
-
if query_timeout_mins is None and (timeout_value := self.config.get("query_timeout_mins", DEFAULT_QUERY_TIMEOUT_MINS)) is not None:
|
|
1621
|
-
query_timeout_mins = int(timeout_value)
|
|
1622
|
-
# Depending on the shape of the input, the behavior of exec_async_v2 changes.
|
|
1623
|
-
# When using the new format (with an object), the function retrieves the
|
|
1624
|
-
# 'rai' database by hashing the model and username. In contrast, the
|
|
1625
|
-
# current version directly uses the passed database value.
|
|
1626
|
-
# Therefore, we must use the original exec_async_v2 when not using the
|
|
1627
|
-
# graph index to ensure the correct database is utilized.
|
|
1628
|
-
use_graph_index = self.config.get("use_graph_index", USE_GRAPH_INDEX)
|
|
1629
|
-
if use_graph_index and not bypass_index:
|
|
1630
|
-
payload = {
|
|
1631
|
-
'database': database,
|
|
1632
|
-
'engine': engine,
|
|
1633
|
-
'inputs': inputs,
|
|
1634
|
-
'readonly': readonly,
|
|
1635
|
-
'nowait_durable': nowait_durable,
|
|
1636
|
-
'language': language,
|
|
1637
|
-
'headers': request_headers
|
|
1638
|
-
}
|
|
1639
|
-
if query_timeout_mins is not None:
|
|
1640
|
-
payload["timeout_mins"] = query_timeout_mins
|
|
1641
|
-
sql_string = f"CALL {APP_NAME}.api.exec_async_v2(?, {payload});"
|
|
1642
|
-
else:
|
|
1643
|
-
if query_timeout_mins is not None:
|
|
1644
|
-
sql_string = f"CALL {APP_NAME}.api.exec_async_v2('{database}','{engine}', ?, {inputs}, {readonly}, {nowait_durable}, '{language}', {query_timeout_mins}, {request_headers});"
|
|
1645
|
-
else:
|
|
1646
|
-
sql_string = f"CALL {APP_NAME}.api.exec_async_v2('{database}','{engine}', ?, {inputs}, {readonly}, {nowait_durable}, '{language}', {request_headers});"
|
|
1647
|
-
# Don't let exec setup GI on failure, exec_raw and exec_lqp will do that and add the correct headers.
|
|
1648
|
-
response = self._exec(
|
|
1649
|
-
sql_string,
|
|
1650
|
-
raw_code,
|
|
1651
|
-
skip_engine_db_error_retry=True,
|
|
1652
|
-
)
|
|
1653
|
-
if not response:
|
|
1654
|
-
raise Exception("Failed to create transaction")
|
|
1655
|
-
return response
|
|
1656
|
-
|
|
1657
|
-
def _exec_async_v2(
|
|
1658
|
-
self,
|
|
1659
|
-
database: str,
|
|
1660
|
-
engine: str | None,
|
|
1661
|
-
raw_code: str,
|
|
1662
|
-
inputs: Dict | None = None,
|
|
1663
|
-
readonly=True,
|
|
1664
|
-
nowait_durable=False,
|
|
1665
|
-
headers: Dict | None = None,
|
|
1666
|
-
bypass_index=False,
|
|
1667
|
-
language: str = "rel",
|
|
1668
|
-
query_timeout_mins: int | None = None,
|
|
1669
|
-
gi_setup_skipped: bool = False,
|
|
1670
|
-
):
|
|
1671
|
-
"""
|
|
1672
|
-
High-level async execution method with transaction polling and artifact management.
|
|
1673
|
-
|
|
1674
|
-
This is the core method for executing queries asynchronously. It:
|
|
1675
|
-
1. Creates a transaction by calling _exec_rai_app
|
|
1676
|
-
2. Handles two execution paths:
|
|
1677
|
-
- Fast path: Transaction completes immediately (COMPLETED/ABORTED)
|
|
1678
|
-
- Slow path: Transaction is pending, requires polling until completion
|
|
1679
|
-
3. Manages pending transactions list
|
|
1680
|
-
4. Downloads and returns query results/artifacts
|
|
1681
|
-
|
|
1682
|
-
This method is called by _execute_code (base implementation) and can be
|
|
1683
|
-
overridden by child classes (e.g., DirectAccessResources uses HTTP instead).
|
|
1684
|
-
|
|
1685
|
-
Args:
|
|
1686
|
-
database: Database/model name
|
|
1687
|
-
engine: Engine name (optional)
|
|
1688
|
-
raw_code: Code to execute (REL, LQP, or SQL)
|
|
1689
|
-
inputs: Input parameters for the query
|
|
1690
|
-
readonly: Whether the transaction is read-only
|
|
1691
|
-
nowait_durable: Whether to wait for durable writes
|
|
1692
|
-
headers: Optional HTTP headers
|
|
1693
|
-
bypass_index: Whether to bypass graph index setup
|
|
1694
|
-
language: Query language ("rel" or "lqp")
|
|
1695
|
-
query_timeout_mins: Optional query timeout in minutes
|
|
1696
|
-
gi_setup_skipped: Whether graph index setup was skipped (for retry logic)
|
|
1697
|
-
|
|
1698
|
-
Returns:
|
|
1699
|
-
Query results (downloaded artifacts)
|
|
1700
|
-
"""
|
|
1701
|
-
if inputs is None:
|
|
1702
|
-
inputs = {}
|
|
1703
|
-
request_headers = debugging.add_current_propagation_headers(headers)
|
|
1704
|
-
query_attrs_dict = json.loads(request_headers.get("X-Query-Attributes", "{}"))
|
|
1705
|
-
|
|
1706
|
-
with debugging.span("transaction", **query_attrs_dict) as txn_span:
|
|
1707
|
-
with debugging.span("create_v2", **query_attrs_dict) as create_span:
|
|
1708
|
-
request_headers['user-agent'] = get_pyrel_version(self.generation)
|
|
1709
|
-
request_headers['gi_setup_skipped'] = str(gi_setup_skipped)
|
|
1710
|
-
request_headers['pyrel_program_id'] = debugging.get_program_span_id() or ""
|
|
1711
|
-
response = self._exec_rai_app(
|
|
1712
|
-
database=database,
|
|
1713
|
-
engine=engine,
|
|
1714
|
-
raw_code=raw_code,
|
|
1715
|
-
inputs=inputs,
|
|
1716
|
-
readonly=readonly,
|
|
1717
|
-
nowait_durable=nowait_durable,
|
|
1718
|
-
request_headers=request_headers,
|
|
1719
|
-
bypass_index=bypass_index,
|
|
1720
|
-
language=language,
|
|
1721
|
-
query_timeout_mins=query_timeout_mins,
|
|
1722
|
-
)
|
|
1723
|
-
|
|
1724
|
-
artifact_info = {}
|
|
1725
|
-
rows = list(iter(response))
|
|
1726
|
-
|
|
1727
|
-
# process the first row since txn_id and state are the same for all rows
|
|
1728
|
-
first_row = rows[0]
|
|
1729
|
-
txn_id = first_row['ID']
|
|
1730
|
-
state = first_row['STATE']
|
|
1731
|
-
filename = first_row['FILENAME']
|
|
1732
|
-
|
|
1733
|
-
txn_span["txn_id"] = txn_id
|
|
1734
|
-
create_span["txn_id"] = txn_id
|
|
1735
|
-
debugging.event("transaction_created", txn_span, txn_id=txn_id)
|
|
1736
|
-
|
|
1737
|
-
# fast path: transaction already finished
|
|
1738
|
-
if state in ["COMPLETED", "ABORTED"]:
|
|
1739
|
-
if txn_id in self._pending_transactions:
|
|
1740
|
-
self._pending_transactions.remove(txn_id)
|
|
1741
|
-
|
|
1742
|
-
# Process rows to get the rest of the artifacts
|
|
1743
|
-
for row in rows:
|
|
1744
|
-
filename = row['FILENAME']
|
|
1745
|
-
artifact_info[filename] = row
|
|
1746
|
-
|
|
1747
|
-
# Slow path: transaction not done yet; start polling
|
|
1748
|
-
else:
|
|
1749
|
-
self._pending_transactions.append(txn_id)
|
|
1750
|
-
with debugging.span("wait", txn_id=txn_id):
|
|
1751
|
-
poll_with_specified_overhead(
|
|
1752
|
-
lambda: self._check_exec_async_status(txn_id, headers=request_headers), 0.1
|
|
1753
|
-
)
|
|
1754
|
-
artifact_info = self._list_exec_async_artifacts(txn_id, headers=request_headers)
|
|
1755
|
-
|
|
1756
|
-
with debugging.span("fetch"):
|
|
1757
|
-
return self._download_results(artifact_info, txn_id, state)
|
|
1758
|
-
|
|
1759
|
-
def get_user_based_engine_name(self):
|
|
1760
|
-
if not self._session:
|
|
1761
|
-
self._session = self.get_sf_session()
|
|
1762
|
-
user_table = self._session.sql("select current_user()").collect()
|
|
1763
|
-
user = user_table[0][0]
|
|
1764
|
-
assert isinstance(user, str), f"current_user() must return a string, not {type(user)}"
|
|
1765
|
-
return _sanitize_user_name(user)
|
|
1766
|
-
|
|
1767
|
-
def is_engine_ready(self, engine_name: str):
|
|
1768
|
-
engine = self.get_engine(engine_name)
|
|
1769
|
-
return engine and engine["state"] == "READY"
|
|
1770
|
-
|
|
1771
|
-
def auto_create_engine(self, name: str | None = None, size: str | None = None, headers: Dict | None = None):
|
|
1772
|
-
"""Synchronously create/ensure an engine is ready, blocking until ready."""
|
|
1773
|
-
with debugging.span("auto_create_engine", active=self._active_engine) as span:
|
|
1774
|
-
active = self._get_active_engine()
|
|
1775
|
-
if active:
|
|
1776
|
-
return active
|
|
1777
|
-
|
|
1778
|
-
# Resolve and validate parameters
|
|
1779
|
-
engine_name, engine_size = self._prepare_engine_params(name, size)
|
|
1780
|
-
|
|
1781
|
-
try:
|
|
1782
|
-
# Get current engine state
|
|
1783
|
-
engine = self.get_engine(engine_name)
|
|
1784
|
-
if engine:
|
|
1785
|
-
span.update(cast(dict, engine))
|
|
1786
|
-
|
|
1787
|
-
# Create context for state handling
|
|
1788
|
-
context = EngineContext(
|
|
1789
|
-
engine_name=engine_name,
|
|
1790
|
-
engine_size=engine_size,
|
|
1791
|
-
headers=headers,
|
|
1792
|
-
requested_size=size,
|
|
1793
|
-
span=span,
|
|
1794
|
-
)
|
|
1795
|
-
|
|
1796
|
-
# Process engine state using sync handlers
|
|
1797
|
-
self._process_engine_state(engine, context, self._sync_engine_state_handlers)
|
|
1798
|
-
|
|
1799
|
-
except Exception as e:
|
|
1800
|
-
self._handle_engine_creation_errors(e, engine_name)
|
|
1801
|
-
|
|
1802
|
-
return engine_name
|
|
1803
|
-
|
|
1804
|
-
def auto_create_engine_async(self, name: str | None = None):
|
|
1805
|
-
"""Asynchronously create/ensure an engine, returns immediately."""
|
|
1806
|
-
active = self._get_active_engine()
|
|
1807
|
-
if active and (active == name or name is None):
|
|
1808
|
-
return active
|
|
1809
|
-
|
|
1810
|
-
with Spinner(
|
|
1811
|
-
"Checking engine status",
|
|
1812
|
-
leading_newline=True,
|
|
1813
|
-
) as spinner:
|
|
1814
|
-
with debugging.span("auto_create_engine_async", active=self._active_engine):
|
|
1815
|
-
# Resolve and validate parameters (use_default_size=True for async)
|
|
1816
|
-
engine_name, engine_size = self._prepare_engine_params(name, None, use_default_size=True)
|
|
1817
|
-
|
|
1818
|
-
try:
|
|
1819
|
-
# Get current engine state
|
|
1820
|
-
engine = self.get_engine(engine_name)
|
|
1821
|
-
|
|
1822
|
-
# Create context for state handling
|
|
1823
|
-
context = EngineContext(
|
|
1824
|
-
engine_name=engine_name,
|
|
1825
|
-
engine_size=engine_size,
|
|
1826
|
-
headers=None,
|
|
1827
|
-
requested_size=None,
|
|
1828
|
-
spinner=spinner,
|
|
1829
|
-
)
|
|
1830
|
-
|
|
1831
|
-
# Process engine state using async handlers
|
|
1832
|
-
self._process_engine_state(engine, context, self._async_engine_state_handlers, set_active_on_success=True)
|
|
1833
|
-
|
|
1834
|
-
except Exception as e:
|
|
1835
|
-
spinner.update_messages({
|
|
1836
|
-
"finished_message": f"Failed to create engine {engine_name}",
|
|
1837
|
-
})
|
|
1838
|
-
self._handle_engine_creation_errors(e, engine_name, preserve_rai_exception=True)
|
|
1839
|
-
|
|
1840
|
-
return engine_name
|
|
1841
|
-
|
|
1842
|
-
#--------------------------------------------------
|
|
1843
|
-
# Exec
|
|
1844
|
-
#--------------------------------------------------
|
|
1845
|
-
|
|
1846
|
-
def _execute_code(
|
|
1847
|
-
self,
|
|
1848
|
-
database: str,
|
|
1849
|
-
engine: str | None,
|
|
1850
|
-
raw_code: str,
|
|
1851
|
-
inputs: Dict | None,
|
|
1852
|
-
readonly: bool,
|
|
1853
|
-
nowait_durable: bool,
|
|
1854
|
-
headers: Dict | None,
|
|
1855
|
-
bypass_index: bool,
|
|
1856
|
-
language: str,
|
|
1857
|
-
query_timeout_mins: int | None,
|
|
1858
|
-
) -> Any:
|
|
1859
|
-
"""
|
|
1860
|
-
Template method for code execution - can be overridden by child classes.
|
|
1861
|
-
|
|
1862
|
-
This is a template method that provides a hook for child classes to add
|
|
1863
|
-
execution logic (like retry mechanisms). The base implementation simply
|
|
1864
|
-
calls _exec_async_v2 directly.
|
|
1865
|
-
|
|
1866
|
-
UseIndexResources overrides this method to use _exec_with_gi_retry, which
|
|
1867
|
-
adds automatic use_index polling on engine/database errors.
|
|
1868
|
-
|
|
1869
|
-
This method is called by exec_lqp() and exec_raw() to provide a single
|
|
1870
|
-
execution point that can be customized per resource class.
|
|
1871
|
-
|
|
1872
|
-
Args:
|
|
1873
|
-
database: Database/model name
|
|
1874
|
-
engine: Engine name (optional)
|
|
1875
|
-
raw_code: Code to execute (already processed/encoded)
|
|
1876
|
-
inputs: Input parameters for the query
|
|
1877
|
-
readonly: Whether the transaction is read-only
|
|
1878
|
-
nowait_durable: Whether to wait for durable writes
|
|
1879
|
-
headers: Optional HTTP headers
|
|
1880
|
-
bypass_index: Whether to bypass graph index setup
|
|
1881
|
-
language: Query language ("rel" or "lqp")
|
|
1882
|
-
query_timeout_mins: Optional query timeout in minutes
|
|
1883
|
-
|
|
1884
|
-
Returns:
|
|
1885
|
-
Query results
|
|
1886
|
-
"""
|
|
1887
|
-
return self._exec_async_v2(
|
|
1888
|
-
database, engine, raw_code, inputs, readonly, nowait_durable,
|
|
1889
|
-
headers=headers, bypass_index=bypass_index, language=language,
|
|
1890
|
-
query_timeout_mins=query_timeout_mins, gi_setup_skipped=True,
|
|
1891
|
-
)
|
|
1892
|
-
|
|
1893
|
-
def exec_lqp(
|
|
1894
|
-
self,
|
|
1895
|
-
database: str,
|
|
1896
|
-
engine: str | None,
|
|
1897
|
-
raw_code: bytes,
|
|
1898
|
-
readonly=True,
|
|
1899
|
-
*,
|
|
1900
|
-
inputs: Dict | None = None,
|
|
1901
|
-
nowait_durable=False,
|
|
1902
|
-
headers: Dict | None = None,
|
|
1903
|
-
bypass_index=False,
|
|
1904
|
-
query_timeout_mins: int | None = None,
|
|
1905
|
-
):
|
|
1906
|
-
"""Execute LQP code."""
|
|
1907
|
-
raw_code_b64 = base64.b64encode(raw_code).decode("utf-8")
|
|
1908
|
-
return self._execute_code(
|
|
1909
|
-
database, engine, raw_code_b64, inputs, readonly, nowait_durable,
|
|
1910
|
-
headers, bypass_index, 'lqp', query_timeout_mins
|
|
1911
|
-
)
|
|
1912
|
-
|
|
1913
|
-
def exec_raw(
|
|
1914
|
-
self,
|
|
1915
|
-
database: str,
|
|
1916
|
-
engine: str | None,
|
|
1917
|
-
raw_code: str,
|
|
1918
|
-
readonly=True,
|
|
1919
|
-
*,
|
|
1920
|
-
inputs: Dict | None = None,
|
|
1921
|
-
nowait_durable=False,
|
|
1922
|
-
headers: Dict | None = None,
|
|
1923
|
-
bypass_index=False,
|
|
1924
|
-
query_timeout_mins: int | None = None,
|
|
1925
|
-
):
|
|
1926
|
-
"""Execute raw code."""
|
|
1927
|
-
raw_code = raw_code.replace("'", "\\'")
|
|
1928
|
-
return self._execute_code(
|
|
1929
|
-
database, engine, raw_code, inputs, readonly, nowait_durable,
|
|
1930
|
-
headers, bypass_index, 'rel', query_timeout_mins
|
|
1931
|
-
)
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
def format_results(self, results, task:m.Task|None=None) -> Tuple[DataFrame, List[Any]]:
|
|
1935
|
-
return result_helpers.format_results(results, task)
|
|
1936
|
-
|
|
1937
|
-
#--------------------------------------------------
|
|
1938
|
-
# Exec format
|
|
1939
|
-
#--------------------------------------------------
|
|
1940
|
-
|
|
1941
|
-
def exec_format(
|
|
1942
|
-
self,
|
|
1943
|
-
database: str,
|
|
1944
|
-
engine: str,
|
|
1945
|
-
raw_code: str,
|
|
1946
|
-
cols: List[str],
|
|
1947
|
-
format: str,
|
|
1948
|
-
inputs: Dict | None = None,
|
|
1949
|
-
readonly=True,
|
|
1950
|
-
nowait_durable=False,
|
|
1951
|
-
skip_invalid_data=False,
|
|
1952
|
-
headers: Dict | None = None,
|
|
1953
|
-
query_timeout_mins: int | None = None,
|
|
1954
|
-
):
|
|
1955
|
-
if inputs is None:
|
|
1956
|
-
inputs = {}
|
|
1957
|
-
if headers is None:
|
|
1958
|
-
headers = {}
|
|
1959
|
-
if 'user-agent' not in headers:
|
|
1960
|
-
headers['user-agent'] = get_pyrel_version(self.generation)
|
|
1961
|
-
if query_timeout_mins is None and (timeout_value := self.config.get("query_timeout_mins", DEFAULT_QUERY_TIMEOUT_MINS)) is not None:
|
|
1962
|
-
query_timeout_mins = int(timeout_value)
|
|
1963
|
-
# TODO: add headers
|
|
1964
|
-
start = time.perf_counter()
|
|
1965
|
-
output_table = "out" + str(uuid.uuid4()).replace("-", "_")
|
|
1966
|
-
temp_table = f"temp_{output_table}"
|
|
1967
|
-
use_graph_index = self.config.get("use_graph_index", USE_GRAPH_INDEX)
|
|
1968
|
-
txn_id = None
|
|
1969
|
-
rejected_rows = None
|
|
1970
|
-
col_names_map = None
|
|
1971
|
-
artifacts = None
|
|
1972
|
-
assert self._session
|
|
1973
|
-
temp = self._session.createDataFrame([], StructType([StructField(name, StringType()) for name in cols]))
|
|
1974
|
-
with debugging.span("transaction") as txn_span:
|
|
1975
|
-
try:
|
|
1976
|
-
# In the graph index case we need to use the new exec_into_table proc as it obfuscates the db name
|
|
1977
|
-
with debugging.span("exec_format"):
|
|
1978
|
-
if use_graph_index:
|
|
1979
|
-
# we do not provide a default value for query_timeout_mins so that we can control the default on app level
|
|
1980
|
-
if query_timeout_mins is not None:
|
|
1981
|
-
res = self._exec(f"call {APP_NAME}.api.exec_into_table(?, ?, ?, ?, ?, NULL, ?, {headers}, ?, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data, query_timeout_mins])
|
|
1982
|
-
else:
|
|
1983
|
-
res = self._exec(f"call {APP_NAME}.api.exec_into_table(?, ?, ?, ?, ?, NULL, ?, {headers}, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data])
|
|
1984
|
-
txn_id = json.loads(res[0]["EXEC_INTO_TABLE"])["rai_transaction_id"]
|
|
1985
|
-
rejected_rows = json.loads(res[0]["EXEC_INTO_TABLE"]).get("rejected_rows", [])
|
|
1986
|
-
rejected_rows_count = json.loads(res[0]["EXEC_INTO_TABLE"]).get("rejected_rows_count", 0)
|
|
1987
|
-
else:
|
|
1988
|
-
if query_timeout_mins is not None:
|
|
1989
|
-
res = self._exec(f"call {APP_NAME}.api.exec_into(?, ?, ?, ?, ?, {inputs}, ?, {headers}, ?, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data, query_timeout_mins])
|
|
1990
|
-
else:
|
|
1991
|
-
res = self._exec(f"call {APP_NAME}.api.exec_into(?, ?, ?, ?, ?, {inputs}, ?, {headers}, ?);", [database, engine, raw_code, output_table, readonly, nowait_durable, skip_invalid_data])
|
|
1992
|
-
txn_id = json.loads(res[0]["EXEC_INTO"])["rai_transaction_id"]
|
|
1993
|
-
rejected_rows = json.loads(res[0]["EXEC_INTO"]).get("rejected_rows", [])
|
|
1994
|
-
rejected_rows_count = json.loads(res[0]["EXEC_INTO"]).get("rejected_rows_count", 0)
|
|
1995
|
-
debugging.event("transaction_created", txn_span, txn_id=txn_id)
|
|
1996
|
-
debugging.time("exec_format", time.perf_counter() - start, DataFrame())
|
|
1997
|
-
|
|
1998
|
-
with debugging.span("temp_table_swap", txn_id=txn_id):
|
|
1999
|
-
out_sample = self._exec(f"select * from {APP_NAME}.results.{output_table} limit 1;")
|
|
2000
|
-
if out_sample:
|
|
2001
|
-
keys = set([k.lower() for k in out_sample[0].as_dict().keys()])
|
|
2002
|
-
col_names_map = {}
|
|
2003
|
-
for ix, name in enumerate(cols):
|
|
2004
|
-
col_key = f"col{ix:03}"
|
|
2005
|
-
if col_key in keys:
|
|
2006
|
-
col_names_map[col_key] = IdentityParser(name).identity
|
|
2007
|
-
else:
|
|
2008
|
-
col_names_map[col_key] = name
|
|
2009
|
-
|
|
2010
|
-
names = ", ".join([
|
|
2011
|
-
f"{col_key} as {alias}" if col_key in keys else f"NULL as {alias}"
|
|
2012
|
-
for col_key, alias in col_names_map.items()
|
|
2013
|
-
])
|
|
2014
|
-
self._exec(f"CREATE TEMPORARY TABLE {APP_NAME}.results.{temp_table} AS SELECT {names} FROM {APP_NAME}.results.{output_table};")
|
|
2015
|
-
self._exec(f"call {APP_NAME}.api.drop_result_table(?)", [output_table])
|
|
2016
|
-
temp = cast(snowflake.snowpark.DataFrame, self._exec(f"select * from {APP_NAME}.results.{temp_table}", raw=True))
|
|
2017
|
-
if rejected_rows:
|
|
2018
|
-
debugging.warn(RowsDroppedFromTargetTableWarning(rejected_rows, rejected_rows_count, col_names_map))
|
|
2019
|
-
except Exception as e:
|
|
2020
|
-
messages = collect_error_messages(e)
|
|
2021
|
-
if any("no columns returned" in msg or "columns of results could not be determined" in msg for msg in messages):
|
|
2022
|
-
pass
|
|
2023
|
-
else:
|
|
2024
|
-
raise e
|
|
2025
|
-
if txn_id:
|
|
2026
|
-
artifact_info = self._list_exec_async_artifacts(txn_id)
|
|
2027
|
-
with debugging.span("fetch"):
|
|
2028
|
-
artifacts = self._download_results(artifact_info, txn_id, "ABORTED")
|
|
2029
|
-
return (temp, artifacts)
|
|
2030
|
-
|
|
2031
|
-
#--------------------------------------------------
|
|
2032
|
-
# Custom model types
|
|
2033
|
-
#--------------------------------------------------
|
|
2034
|
-
|
|
2035
|
-
def _get_ns(self, model:dsl.Graph):
|
|
2036
|
-
if model not in self._ns_cache:
|
|
2037
|
-
self._ns_cache[model] = _Snowflake(model)
|
|
2038
|
-
return self._ns_cache[model]
|
|
2039
|
-
|
|
2040
|
-
def to_model_type(self, model:dsl.Graph, name: str, source:str):
|
|
2041
|
-
parser = IdentityParser(source)
|
|
2042
|
-
if not parser.is_complete:
|
|
2043
|
-
raise SnowflakeInvalidSource(Errors.call_source(), source)
|
|
2044
|
-
ns = self._get_ns(model)
|
|
2045
|
-
# skip the last item in the list (the full identifier)
|
|
2046
|
-
for part in parser.to_list()[:-1]:
|
|
2047
|
-
ns = ns._safe_get(part)
|
|
2048
|
-
assert parser.identity, f"Error parsing source in to_model_type: {source}"
|
|
2049
|
-
self.sources.add(parser.identity)
|
|
2050
|
-
return ns
|
|
2051
|
-
|
|
2052
|
-
#--------------------------------------------------
|
|
2053
|
-
# Source Management
|
|
2054
|
-
#--------------------------------------------------
|
|
2055
|
-
|
|
2056
|
-
def _check_source_updates(self, sources: Iterable[str]):
|
|
2057
|
-
if not sources:
|
|
2058
|
-
return {}
|
|
2059
|
-
app_name = self.get_app_name()
|
|
2060
|
-
|
|
2061
|
-
source_types = dict[str, SourceInfo]()
|
|
2062
|
-
partitioned_sources: dict[str, dict[str, list[dict[str, str]]]] = defaultdict(
|
|
2063
|
-
lambda: defaultdict(list)
|
|
2064
|
-
)
|
|
2065
|
-
fqn_to_parts: dict[str, tuple[str, str, str]] = {}
|
|
2066
|
-
|
|
2067
|
-
for source in sources:
|
|
2068
|
-
parser = IdentityParser(source, True)
|
|
2069
|
-
parsed = parser.to_list()
|
|
2070
|
-
assert len(parsed) == 4, f"Invalid source: {source}"
|
|
2071
|
-
db, schema, entity, identity = parsed
|
|
2072
|
-
assert db and schema and entity and identity, f"Invalid source: {source}"
|
|
2073
|
-
source_types[identity] = cast(
|
|
2074
|
-
SourceInfo,
|
|
2075
|
-
{
|
|
2076
|
-
"type": None,
|
|
2077
|
-
"state": "",
|
|
2078
|
-
"columns_hash": None,
|
|
2079
|
-
"table_created_at": None,
|
|
2080
|
-
"stream_created_at": None,
|
|
2081
|
-
"last_ddl": None,
|
|
2082
|
-
},
|
|
2083
|
-
)
|
|
2084
|
-
partitioned_sources[db][schema].append({"entity": entity, "identity": identity})
|
|
2085
|
-
fqn_to_parts[identity] = (db, schema, entity)
|
|
2086
|
-
|
|
2087
|
-
if not partitioned_sources:
|
|
2088
|
-
return source_types
|
|
2089
|
-
|
|
2090
|
-
state_queries: list[str] = []
|
|
2091
|
-
for db, schemas in partitioned_sources.items():
|
|
2092
|
-
select_rows: list[str] = []
|
|
2093
|
-
for schema, tables in schemas.items():
|
|
2094
|
-
for table_info in tables:
|
|
2095
|
-
select_rows.append(
|
|
2096
|
-
"SELECT "
|
|
2097
|
-
f"{IdentityParser.to_sql_value(db)} AS catalog_name, "
|
|
2098
|
-
f"{IdentityParser.to_sql_value(schema)} AS schema_name, "
|
|
2099
|
-
f"{IdentityParser.to_sql_value(table_info['entity'])} AS table_name"
|
|
2100
|
-
)
|
|
2101
|
-
|
|
2102
|
-
if not select_rows:
|
|
2103
|
-
continue
|
|
2104
|
-
|
|
2105
|
-
target_entities_clause = "\n UNION ALL\n ".join(select_rows)
|
|
2106
|
-
# Main query:
|
|
2107
|
-
# 1. Enumerate the target tables via target_entities.
|
|
2108
|
-
# 2. Pull their metadata (last_altered, type) from INFORMATION_SCHEMA.TABLES.
|
|
2109
|
-
# 3. Look up the most recent stream activity for those FQNs only.
|
|
2110
|
-
# 4. Capture creation timestamps and use last_ddl vs created_at to classify each target,
|
|
2111
|
-
# so we mark tables as stale when they were recreated even if column hashes still match.
|
|
2112
|
-
state_queries.append(
|
|
2113
|
-
f"""WITH target_entities AS (
|
|
2114
|
-
{target_entities_clause}
|
|
2115
|
-
),
|
|
2116
|
-
table_info AS (
|
|
2117
|
-
SELECT
|
|
2118
|
-
{app_name}.api.normalize_fq_ids(
|
|
2119
|
-
ARRAY_CONSTRUCT(
|
|
2120
|
-
CASE
|
|
2121
|
-
WHEN t.table_catalog = UPPER(t.table_catalog) THEN t.table_catalog
|
|
2122
|
-
ELSE '"' || t.table_catalog || '"'
|
|
2123
|
-
END || '.' ||
|
|
2124
|
-
CASE
|
|
2125
|
-
WHEN t.table_schema = UPPER(t.table_schema) THEN t.table_schema
|
|
2126
|
-
ELSE '"' || t.table_schema || '"'
|
|
2127
|
-
END || '.' ||
|
|
2128
|
-
CASE
|
|
2129
|
-
WHEN t.table_name = UPPER(t.table_name) THEN t.table_name
|
|
2130
|
-
ELSE '"' || t.table_name || '"'
|
|
2131
|
-
END
|
|
2132
|
-
)
|
|
2133
|
-
)[0]:identifier::string AS fqn,
|
|
2134
|
-
CONVERT_TIMEZONE('UTC', t.last_altered) AS last_ddl,
|
|
2135
|
-
CONVERT_TIMEZONE('UTC', t.created) AS table_created_at,
|
|
2136
|
-
t.table_type AS kind
|
|
2137
|
-
FROM {db}.INFORMATION_SCHEMA.tables t
|
|
2138
|
-
JOIN target_entities te
|
|
2139
|
-
ON t.table_catalog = te.catalog_name
|
|
2140
|
-
AND t.table_schema = te.schema_name
|
|
2141
|
-
AND t.table_name = te.table_name
|
|
2142
|
-
),
|
|
2143
|
-
stream_activity AS (
|
|
2144
|
-
SELECT
|
|
2145
|
-
sa.fqn,
|
|
2146
|
-
MAX(sa.created_at) AS created_at
|
|
2147
|
-
FROM (
|
|
2148
|
-
SELECT
|
|
2149
|
-
{app_name}.api.normalize_fq_ids(ARRAY_CONSTRUCT(fq_object_name))[0]:identifier::string AS fqn,
|
|
2150
|
-
created_at
|
|
2151
|
-
FROM {app_name}.api.data_streams
|
|
2152
|
-
WHERE rai_database = '{PYREL_ROOT_DB}'
|
|
2153
|
-
) sa
|
|
2154
|
-
JOIN table_info ti
|
|
2155
|
-
ON sa.fqn = ti.fqn
|
|
2156
|
-
GROUP BY sa.fqn
|
|
2157
|
-
)
|
|
2158
|
-
SELECT
|
|
2159
|
-
ti.fqn,
|
|
2160
|
-
ti.kind,
|
|
2161
|
-
ti.last_ddl,
|
|
2162
|
-
ti.table_created_at,
|
|
2163
|
-
sa.created_at AS stream_created_at,
|
|
2164
|
-
IFF(
|
|
2165
|
-
DATEDIFF(second, sa.created_at::timestamp, ti.last_ddl::timestamp) > 0,
|
|
2166
|
-
'STALE',
|
|
2167
|
-
'CURRENT'
|
|
2168
|
-
) AS state
|
|
2169
|
-
FROM table_info ti
|
|
2170
|
-
LEFT JOIN stream_activity sa
|
|
2171
|
-
ON sa.fqn = ti.fqn
|
|
2172
|
-
"""
|
|
2173
|
-
)
|
|
2174
|
-
|
|
2175
|
-
stale_fqns: list[str] = []
|
|
2176
|
-
for state_query in state_queries:
|
|
2177
|
-
for row in self._exec(state_query):
|
|
2178
|
-
row_dict = row.as_dict() if hasattr(row, "as_dict") else dict(row)
|
|
2179
|
-
row_fqn = row_dict["FQN"]
|
|
2180
|
-
parser = IdentityParser(row_fqn, True)
|
|
2181
|
-
fqn = parser.identity
|
|
2182
|
-
assert fqn, f"Error parsing returned FQN: {row_fqn}"
|
|
2183
|
-
|
|
2184
|
-
source_types[fqn]["type"] = (
|
|
2185
|
-
"TABLE" if row_dict["KIND"] == "BASE TABLE" else row_dict["KIND"]
|
|
2186
|
-
)
|
|
2187
|
-
source_types[fqn]["state"] = row_dict["STATE"]
|
|
2188
|
-
source_types[fqn]["last_ddl"] = normalize_datetime(row_dict.get("LAST_DDL"))
|
|
2189
|
-
source_types[fqn]["table_created_at"] = normalize_datetime(row_dict.get("TABLE_CREATED_AT"))
|
|
2190
|
-
source_types[fqn]["stream_created_at"] = normalize_datetime(row_dict.get("STREAM_CREATED_AT"))
|
|
2191
|
-
if row_dict["STATE"] == "STALE":
|
|
2192
|
-
stale_fqns.append(fqn)
|
|
2193
|
-
|
|
2194
|
-
if not stale_fqns:
|
|
2195
|
-
return source_types
|
|
2196
|
-
|
|
2197
|
-
# We batch stale tables by database/schema so each Snowflake query can hash
|
|
2198
|
-
# multiple objects at once instead of issuing one statement per table.
|
|
2199
|
-
stale_partitioned: dict[str, dict[str, list[dict[str, str]]]] = defaultdict(
|
|
2200
|
-
lambda: defaultdict(list)
|
|
2201
|
-
)
|
|
2202
|
-
for fqn in stale_fqns:
|
|
2203
|
-
db, schema, table = fqn_to_parts[fqn]
|
|
2204
|
-
stale_partitioned[db][schema].append({"table": table, "identity": fqn})
|
|
2205
|
-
|
|
2206
|
-
# Build one hash query per database, grouping schemas/tables inside so we submit
|
|
2207
|
-
# at most a handful of set-based statements to Snowflake.
|
|
2208
|
-
for db, schemas in stale_partitioned.items():
|
|
2209
|
-
column_select_rows: list[str] = []
|
|
2210
|
-
for schema, tables in schemas.items():
|
|
2211
|
-
for table_info in tables:
|
|
2212
|
-
# Build the literal rows for this db/schema so we can join back
|
|
2213
|
-
# against INFORMATION_SCHEMA.COLUMNS in a single statement.
|
|
2214
|
-
column_select_rows.append(
|
|
2215
|
-
"SELECT "
|
|
2216
|
-
f"{IdentityParser.to_sql_value(db)} AS catalog_name, "
|
|
2217
|
-
f"{IdentityParser.to_sql_value(schema)} AS schema_name, "
|
|
2218
|
-
f"{IdentityParser.to_sql_value(table_info['table'])} AS table_name"
|
|
2219
|
-
)
|
|
2220
|
-
|
|
2221
|
-
if not column_select_rows:
|
|
2222
|
-
continue
|
|
2223
|
-
|
|
2224
|
-
target_entities_clause = "\n UNION ALL\n ".join(column_select_rows)
|
|
2225
|
-
# Main query: compute deterministic column hashes for every stale table
|
|
2226
|
-
# in this database/schema batch so we can compare schemas without a round trip per table.
|
|
2227
|
-
column_query = f"""WITH target_entities AS (
|
|
2228
|
-
{target_entities_clause}
|
|
2229
|
-
),
|
|
2230
|
-
column_info AS (
|
|
2231
|
-
SELECT
|
|
2232
|
-
{app_name}.api.normalize_fq_ids(
|
|
2233
|
-
ARRAY_CONSTRUCT(
|
|
2234
|
-
CASE
|
|
2235
|
-
WHEN c.table_catalog = UPPER(c.table_catalog) THEN c.table_catalog
|
|
2236
|
-
ELSE '"' || c.table_catalog || '"'
|
|
2237
|
-
END || '.' ||
|
|
2238
|
-
CASE
|
|
2239
|
-
WHEN c.table_schema = UPPER(c.table_schema) THEN c.table_schema
|
|
2240
|
-
ELSE '"' || c.table_schema || '"'
|
|
2241
|
-
END || '.' ||
|
|
2242
|
-
CASE
|
|
2243
|
-
WHEN c.table_name = UPPER(c.table_name) THEN c.table_name
|
|
2244
|
-
ELSE '"' || c.table_name || '"'
|
|
2245
|
-
END
|
|
2246
|
-
)
|
|
2247
|
-
)[0]:identifier::string AS fqn,
|
|
2248
|
-
c.column_name,
|
|
2249
|
-
CASE
|
|
2250
|
-
WHEN c.numeric_precision IS NOT NULL AND c.numeric_scale IS NOT NULL
|
|
2251
|
-
THEN c.data_type || '(' || c.numeric_precision || ',' || c.numeric_scale || ')'
|
|
2252
|
-
WHEN c.datetime_precision IS NOT NULL
|
|
2253
|
-
THEN c.data_type || '(0,' || c.datetime_precision || ')'
|
|
2254
|
-
WHEN c.character_maximum_length IS NOT NULL
|
|
2255
|
-
THEN c.data_type || '(' || c.character_maximum_length || ')'
|
|
2256
|
-
ELSE c.data_type
|
|
2257
|
-
END AS type_signature,
|
|
2258
|
-
IFF(c.is_nullable = 'YES', 'YES', 'NO') AS nullable_flag
|
|
2259
|
-
FROM {db}.INFORMATION_SCHEMA.COLUMNS c
|
|
2260
|
-
JOIN target_entities te
|
|
2261
|
-
ON c.table_catalog = te.catalog_name
|
|
2262
|
-
AND c.table_schema = te.schema_name
|
|
2263
|
-
AND c.table_name = te.table_name
|
|
2264
|
-
)
|
|
2265
|
-
SELECT
|
|
2266
|
-
fqn,
|
|
2267
|
-
HEX_ENCODE(
|
|
2268
|
-
HASH_AGG(
|
|
2269
|
-
HASH(
|
|
2270
|
-
column_name,
|
|
2271
|
-
type_signature,
|
|
2272
|
-
nullable_flag
|
|
2273
|
-
)
|
|
2274
|
-
)
|
|
2275
|
-
) AS columns_hash
|
|
2276
|
-
FROM column_info
|
|
2277
|
-
GROUP BY fqn
|
|
2278
|
-
"""
|
|
2279
|
-
|
|
2280
|
-
for row in self._exec(column_query):
|
|
2281
|
-
row_fqn = row["FQN"]
|
|
2282
|
-
parser = IdentityParser(row_fqn, True)
|
|
2283
|
-
fqn = parser.identity
|
|
2284
|
-
assert fqn, f"Error parsing returned FQN: {row_fqn}"
|
|
2285
|
-
source_types[fqn]["columns_hash"] = row["COLUMNS_HASH"]
|
|
2286
|
-
|
|
2287
|
-
return source_types
|
|
2288
|
-
|
|
2289
|
-
def _get_source_references(self, source_info: dict[str, SourceInfo]):
|
|
2290
|
-
app_name = self.get_app_name()
|
|
2291
|
-
missing_sources = []
|
|
2292
|
-
invalid_sources = {}
|
|
2293
|
-
source_references = []
|
|
2294
|
-
for source, info in source_info.items():
|
|
2295
|
-
source_type = info.get("type")
|
|
2296
|
-
if source_type is None:
|
|
2297
|
-
missing_sources.append(source)
|
|
2298
|
-
elif source_type not in ("TABLE", "VIEW"):
|
|
2299
|
-
invalid_sources[source] = source_type
|
|
2300
|
-
else:
|
|
2301
|
-
source_references.append(f"{app_name}.api.object_reference('{source_type}', '{source}')")
|
|
2302
|
-
|
|
2303
|
-
if missing_sources:
|
|
2304
|
-
current_role = self.get_sf_session().get_current_role()
|
|
2305
|
-
if current_role is None:
|
|
2306
|
-
current_role = self.config.get("role", None)
|
|
2307
|
-
debugging.warn(UnknownSourceWarning(missing_sources, current_role))
|
|
2308
|
-
|
|
2309
|
-
if invalid_sources:
|
|
2310
|
-
debugging.warn(InvalidSourceTypeWarning(invalid_sources))
|
|
2311
|
-
|
|
2312
|
-
self.source_references = source_references
|
|
2313
|
-
return source_references
|
|
2314
|
-
|
|
2315
|
-
#--------------------------------------------------
|
|
2316
|
-
# Transactions
|
|
2317
|
-
#--------------------------------------------------
|
|
2318
|
-
|
|
2319
|
-
def get_transaction(self, transaction_id):
|
|
2320
|
-
results = self._exec(
|
|
2321
|
-
f"CALL {APP_NAME}.api.get_transaction(?);", [transaction_id])
|
|
2322
|
-
if not results:
|
|
2323
|
-
return None
|
|
2324
|
-
|
|
2325
|
-
results = txn_list_to_dicts(results)
|
|
2326
|
-
|
|
2327
|
-
txn = {field: results[0][field] for field in GET_TXN_SQL_FIELDS}
|
|
2328
|
-
|
|
2329
|
-
state = txn.get("state")
|
|
2330
|
-
created_on = txn.get("created_on")
|
|
2331
|
-
finished_at = txn.get("finished_at")
|
|
2332
|
-
if created_on:
|
|
2333
|
-
# Transaction is still running
|
|
2334
|
-
if state not in TERMINAL_TXN_STATES:
|
|
2335
|
-
tz_info = created_on.tzinfo
|
|
2336
|
-
txn['duration'] = datetime.now(tz_info) - created_on
|
|
2337
|
-
# Transaction is terminal
|
|
2338
|
-
elif finished_at:
|
|
2339
|
-
txn['duration'] = finished_at - created_on
|
|
2340
|
-
# Transaction is still running and we have no state or finished_at
|
|
2341
|
-
else:
|
|
2342
|
-
txn['duration'] = timedelta(0)
|
|
2343
|
-
return txn
|
|
2344
|
-
|
|
2345
|
-
def list_transactions(self, **kwargs):
|
|
2346
|
-
id = kwargs.get("id", None)
|
|
2347
|
-
state = kwargs.get("state", None)
|
|
2348
|
-
engine = kwargs.get("engine", None)
|
|
2349
|
-
limit = kwargs.get("limit", 100)
|
|
2350
|
-
all_users = kwargs.get("all_users", False)
|
|
2351
|
-
created_by = kwargs.get("created_by", None)
|
|
2352
|
-
only_active = kwargs.get("only_active", False)
|
|
2353
|
-
where_clause_arr = []
|
|
2354
|
-
|
|
2355
|
-
if id:
|
|
2356
|
-
where_clause_arr.append(f"id = '{id}'")
|
|
2357
|
-
if state:
|
|
2358
|
-
where_clause_arr.append(f"state = '{state.upper()}'")
|
|
2359
|
-
if engine:
|
|
2360
|
-
where_clause_arr.append(f"LOWER(engine_name) = '{engine.lower()}'")
|
|
2361
|
-
else:
|
|
2362
|
-
if only_active:
|
|
2363
|
-
where_clause_arr.append("state in ('CREATED', 'RUNNING', 'PENDING')")
|
|
2364
|
-
if not all_users and created_by is not None:
|
|
2365
|
-
where_clause_arr.append(f"LOWER(created_by) = '{created_by.lower()}'")
|
|
2366
|
-
|
|
2367
|
-
if len(where_clause_arr):
|
|
2368
|
-
where_clause = f'WHERE {" AND ".join(where_clause_arr)}'
|
|
2369
|
-
else:
|
|
2370
|
-
where_clause = ""
|
|
2371
|
-
|
|
2372
|
-
sql_fields = ", ".join(LIST_TXN_SQL_FIELDS)
|
|
2373
|
-
query = f"SELECT {sql_fields} from {APP_NAME}.api.transactions {where_clause} ORDER BY created_on DESC LIMIT ?"
|
|
2374
|
-
results = self._exec(query, [limit])
|
|
2375
|
-
if not results:
|
|
2376
|
-
return []
|
|
2377
|
-
return txn_list_to_dicts(results)
|
|
2378
|
-
|
|
2379
|
-
def cancel_transaction(self, transaction_id):
|
|
2380
|
-
self._exec(f"CALL {APP_NAME}.api.cancel_own_transaction(?);", [transaction_id])
|
|
2381
|
-
if transaction_id in self._pending_transactions:
|
|
2382
|
-
self._pending_transactions.remove(transaction_id)
|
|
2383
|
-
|
|
2384
|
-
def cancel_pending_transactions(self):
|
|
2385
|
-
for txn_id in self._pending_transactions:
|
|
2386
|
-
self.cancel_transaction(txn_id)
|
|
2387
|
-
|
|
2388
|
-
def get_transaction_events(self, transaction_id: str, continuation_token:str=''):
|
|
2389
|
-
results = self._exec(
|
|
2390
|
-
f"SELECT {APP_NAME}.api.get_own_transaction_events(?, ?);",
|
|
2391
|
-
[transaction_id, continuation_token],
|
|
2392
|
-
)
|
|
2393
|
-
if not results:
|
|
2394
|
-
return {
|
|
2395
|
-
"events": [],
|
|
2396
|
-
"continuation_token": None
|
|
2397
|
-
}
|
|
2398
|
-
row = results[0][0]
|
|
2399
|
-
return json.loads(row)
|
|
2400
|
-
|
|
2401
|
-
#--------------------------------------------------
|
|
2402
|
-
# Snowflake specific
|
|
2403
|
-
#--------------------------------------------------
|
|
2404
|
-
|
|
2405
|
-
def get_version(self):
|
|
2406
|
-
results = self._exec(f"SELECT {APP_NAME}.app.get_release()")
|
|
2407
|
-
if not results:
|
|
2408
|
-
return None
|
|
2409
|
-
return results[0][0]
|
|
2410
|
-
|
|
2411
|
-
# CLI methods (list_warehouses, list_compute_pools, list_roles, list_apps,
|
|
2412
|
-
# list_databases, list_sf_schemas, list_tables) are now in CLIResources class
|
|
2413
|
-
# schema_info is kept in base Resources class since it's used by SnowflakeSchema._fetch_info()
|
|
2414
|
-
|
|
2415
|
-
def schema_info(self, database: str, schema: str, tables: Iterable[str]):
|
|
2416
|
-
"""Get detailed schema information including primary keys, foreign keys, and columns."""
|
|
2417
|
-
app_name = self.get_app_name()
|
|
2418
|
-
# Only pass the db + schema as the identifier so that the resulting identity is correct
|
|
2419
|
-
parser = IdentityParser(f"{database}.{schema}")
|
|
2420
|
-
|
|
2421
|
-
with debugging.span("schema_info"):
|
|
2422
|
-
with debugging.span("primary_keys") as span:
|
|
2423
|
-
pk_query = f"SHOW PRIMARY KEYS IN SCHEMA {parser.identity};"
|
|
2424
|
-
pks = self._exec(pk_query)
|
|
2425
|
-
span["sql"] = pk_query
|
|
2426
|
-
|
|
2427
|
-
with debugging.span("foreign_keys") as span:
|
|
2428
|
-
fk_query = f"SHOW IMPORTED KEYS IN SCHEMA {parser.identity};"
|
|
2429
|
-
fks = self._exec(fk_query)
|
|
2430
|
-
span["sql"] = fk_query
|
|
2431
|
-
|
|
2432
|
-
# IdentityParser will parse a single value (with no ".") and store it in this case in the db field
|
|
2433
|
-
with debugging.span("columns") as span:
|
|
2434
|
-
tables_str = ", ".join([f"'{IdentityParser(t).db}'" for t in tables])
|
|
2435
|
-
query = textwrap.dedent(f"""
|
|
2436
|
-
begin
|
|
2437
|
-
SHOW COLUMNS IN SCHEMA {parser.identity};
|
|
2438
|
-
let r resultset := (
|
|
2439
|
-
SELECT
|
|
2440
|
-
CASE
|
|
2441
|
-
WHEN "table_name" = UPPER("table_name") THEN "table_name"
|
|
2442
|
-
ELSE '"' || "table_name" || '"'
|
|
2443
|
-
END as "table_name",
|
|
2444
|
-
"column_name",
|
|
2445
|
-
"data_type",
|
|
2446
|
-
CASE
|
|
2447
|
-
WHEN ARRAY_CONTAINS(PARSE_JSON("data_type"):"type", {app_name}.app.get_supported_column_types()) THEN TRUE
|
|
2448
|
-
ELSE FALSE
|
|
2449
|
-
END as "supported_type"
|
|
2450
|
-
FROM table(result_scan(-1)) as t
|
|
2451
|
-
WHERE "table_name" in ({tables_str})
|
|
2452
|
-
);
|
|
2453
|
-
return table(r);
|
|
2454
|
-
end;
|
|
2455
|
-
""")
|
|
2456
|
-
span["sql"] = query
|
|
2457
|
-
columns = self._exec(query)
|
|
2458
|
-
|
|
2459
|
-
results = defaultdict(lambda: {"pks": [], "fks": {}, "columns": {}, "invalid_columns": {}})
|
|
2460
|
-
if pks:
|
|
2461
|
-
for row in pks:
|
|
2462
|
-
results[row[3]]["pks"].append(row[4]) # type: ignore
|
|
2463
|
-
if fks:
|
|
2464
|
-
for row in fks:
|
|
2465
|
-
results[row[7]]["fks"][row[8]] = row[3]
|
|
2466
|
-
if columns:
|
|
2467
|
-
# It seems that a SF parameter (QUOTED_IDENTIFIERS_IGNORE_CASE) can control
|
|
2468
|
-
# whether snowflake will ignore case on `row.data_type`,
|
|
2469
|
-
# so we have to use column indexes instead :(
|
|
2470
|
-
for row in columns:
|
|
2471
|
-
table_name = row[0]
|
|
2472
|
-
column_name = row[1]
|
|
2473
|
-
data_type = row[2]
|
|
2474
|
-
supported_type = row[3]
|
|
2475
|
-
# Filter out unsupported types
|
|
2476
|
-
if supported_type:
|
|
2477
|
-
results[table_name]["columns"][column_name] = data_type
|
|
2478
|
-
else:
|
|
2479
|
-
results[table_name]["invalid_columns"][column_name] = data_type
|
|
2480
|
-
return results
|
|
2481
|
-
|
|
2482
|
-
def get_cloud_provider(self) -> str:
|
|
2483
|
-
"""
|
|
2484
|
-
Detect whether this is Snowflake on Azure, or AWS using Snowflake's CURRENT_REGION().
|
|
2485
|
-
Returns 'azure' or 'aws'.
|
|
2486
|
-
"""
|
|
2487
|
-
if self._session:
|
|
2488
|
-
try:
|
|
2489
|
-
# Query Snowflake's current region using the built-in function
|
|
2490
|
-
result = self._session.sql("SELECT CURRENT_REGION()").collect()
|
|
2491
|
-
if result:
|
|
2492
|
-
region_info = result[0][0]
|
|
2493
|
-
# Check if the region string contains the cloud provider name
|
|
2494
|
-
if isinstance(region_info, str):
|
|
2495
|
-
region_str = region_info.lower()
|
|
2496
|
-
# Check for cloud providers in the region string
|
|
2497
|
-
if 'azure' in region_str:
|
|
2498
|
-
return 'azure'
|
|
2499
|
-
else:
|
|
2500
|
-
return 'aws'
|
|
2501
|
-
except Exception:
|
|
2502
|
-
pass
|
|
2503
|
-
|
|
2504
|
-
# Fallback to AWS as default if detection fails
|
|
2505
|
-
return 'aws'
|
|
2506
|
-
|
|
2507
|
-
#--------------------------------------------------
|
|
2508
|
-
# Snowflake Wrapper
|
|
2509
|
-
#--------------------------------------------------
|
|
2510
|
-
|
|
2511
|
-
class PrimaryKey:
|
|
2512
|
-
pass
|
|
2513
|
-
|
|
2514
|
-
class _Snowflake:
|
|
2515
|
-
def __init__(self, model, auto_import=False):
|
|
2516
|
-
self._model = model
|
|
2517
|
-
self._auto_import = auto_import
|
|
2518
|
-
if not isinstance(model._client.resources, Resources):
|
|
2519
|
-
raise ValueError("Snowflake model must be used with a snowflake config")
|
|
2520
|
-
self._dbs = {}
|
|
2521
|
-
imports = model._client.resources.list_imports(model=model.name)
|
|
2522
|
-
self._import_structure(imports)
|
|
2523
|
-
|
|
2524
|
-
def _import_structure(self, imports: list[Import]):
|
|
2525
|
-
tree = self._dbs
|
|
2526
|
-
# pre-create existing imports
|
|
2527
|
-
schemas = set()
|
|
2528
|
-
for item in imports:
|
|
2529
|
-
parser = IdentityParser(item["name"])
|
|
2530
|
-
database_name, schema_name, table_name = parser.to_list()[:-1]
|
|
2531
|
-
database = getattr(self, database_name)
|
|
2532
|
-
schema = getattr(database, schema_name)
|
|
2533
|
-
schemas.add(schema)
|
|
2534
|
-
schema._add(table_name, is_imported=True)
|
|
2535
|
-
return tree
|
|
2536
|
-
|
|
2537
|
-
def _safe_get(self, name:str) -> 'SnowflakeDB':
|
|
2538
|
-
name = name
|
|
2539
|
-
if name in self._dbs:
|
|
2540
|
-
return self._dbs[name]
|
|
2541
|
-
self._dbs[name] = SnowflakeDB(self, name)
|
|
2542
|
-
return self._dbs[name]
|
|
2543
|
-
|
|
2544
|
-
def __getattr__(self, name: str) -> 'SnowflakeDB':
|
|
2545
|
-
return self._safe_get(name)
|
|
2546
|
-
|
|
2547
|
-
|
|
2548
|
-
class Snowflake(_Snowflake):
|
|
2549
|
-
def __init__(self, model: dsl.Graph, auto_import=False):
|
|
2550
|
-
if model._config.get_bool("use_graph_index", USE_GRAPH_INDEX):
|
|
2551
|
-
raise SnowflakeProxySourceError()
|
|
2552
|
-
else:
|
|
2553
|
-
debugging.warn(SnowflakeProxyAPIDeprecationWarning())
|
|
2554
|
-
|
|
2555
|
-
super().__init__(model, auto_import)
|
|
2556
|
-
|
|
2557
|
-
class SnowflakeDB:
|
|
2558
|
-
def __init__(self, parent, name):
|
|
2559
|
-
self._name = name
|
|
2560
|
-
self._parent = parent
|
|
2561
|
-
self._model = parent._model
|
|
2562
|
-
self._schemas = {}
|
|
2563
|
-
|
|
2564
|
-
def _safe_get(self, name: str) -> 'SnowflakeSchema':
|
|
2565
|
-
name = name
|
|
2566
|
-
if name in self._schemas:
|
|
2567
|
-
return self._schemas[name]
|
|
2568
|
-
self._schemas[name] = SnowflakeSchema(self, name)
|
|
2569
|
-
return self._schemas[name]
|
|
2570
|
-
|
|
2571
|
-
def __getattr__(self, name: str) -> 'SnowflakeSchema':
|
|
2572
|
-
return self._safe_get(name)
|
|
2573
|
-
|
|
2574
|
-
class SnowflakeSchema:
|
|
2575
|
-
def __init__(self, parent, name):
|
|
2576
|
-
self._name = name
|
|
2577
|
-
self._parent = parent
|
|
2578
|
-
self._model = parent._model
|
|
2579
|
-
self._tables = {}
|
|
2580
|
-
self._imported = set()
|
|
2581
|
-
self._table_info = defaultdict(lambda: {"pks": [], "fks": {}, "columns": {}, "invalid_columns": {}})
|
|
2582
|
-
self._dirty = True
|
|
2583
|
-
|
|
2584
|
-
def _fetch_info(self):
|
|
2585
|
-
if not self._dirty:
|
|
2586
|
-
return
|
|
2587
|
-
self._table_info = self._model._client.resources.schema_info(self._parent._name, self._name, list(self._tables.keys()))
|
|
2588
|
-
|
|
2589
|
-
check_column_types = self._model._config.get("check_column_types", True)
|
|
2590
|
-
|
|
2591
|
-
if check_column_types:
|
|
2592
|
-
self._check_and_confirm_invalid_columns()
|
|
2593
|
-
|
|
2594
|
-
self._dirty = False
|
|
2595
|
-
|
|
2596
|
-
def _check_and_confirm_invalid_columns(self):
|
|
2597
|
-
"""Check for invalid columns across the schema's tables."""
|
|
2598
|
-
tables_with_invalid_columns = {}
|
|
2599
|
-
for table_name, table_info in self._table_info.items():
|
|
2600
|
-
if table_info.get("invalid_columns"):
|
|
2601
|
-
tables_with_invalid_columns[table_name] = table_info["invalid_columns"]
|
|
2602
|
-
|
|
2603
|
-
if tables_with_invalid_columns:
|
|
2604
|
-
from relationalai.errors import UnsupportedColumnTypesWarning
|
|
2605
|
-
UnsupportedColumnTypesWarning(tables_with_invalid_columns)
|
|
2606
|
-
|
|
2607
|
-
def _add(self, name, is_imported=False):
|
|
2608
|
-
if name in self._tables:
|
|
2609
|
-
return self._tables[name]
|
|
2610
|
-
self._dirty = True
|
|
2611
|
-
if is_imported:
|
|
2612
|
-
self._imported.add(name)
|
|
2613
|
-
else:
|
|
2614
|
-
self._tables[name] = SnowflakeTable(self, name)
|
|
2615
|
-
return self._tables.get(name)
|
|
2616
|
-
|
|
2617
|
-
def _safe_get(self, name: str) -> 'SnowflakeTable | None':
|
|
2618
|
-
table = self._add(name)
|
|
2619
|
-
return table
|
|
2620
|
-
|
|
2621
|
-
def __getattr__(self, name: str) -> 'SnowflakeTable | None':
|
|
2622
|
-
return self._safe_get(name)
|
|
2623
|
-
|
|
2624
|
-
|
|
2625
|
-
class SnowflakeTable(dsl.Type):
|
|
2626
|
-
def __init__(self, parent, name):
|
|
2627
|
-
super().__init__(parent._model, f"sf_{name}")
|
|
2628
|
-
# hack to make this work for pathfinder
|
|
2629
|
-
self._type.parents.append(m.Builtins.PQFilterAnnotation)
|
|
2630
|
-
self._name = name
|
|
2631
|
-
self._model = parent._model
|
|
2632
|
-
self._parent = parent
|
|
2633
|
-
self._aliases = {}
|
|
2634
|
-
self._finalzed = False
|
|
2635
|
-
self._source = runtime_env.get_source()
|
|
2636
|
-
relation_name = to_fqn_relation_name(self.fqname())
|
|
2637
|
-
self._model.install_raw(f"declare {relation_name}")
|
|
2638
|
-
|
|
2639
|
-
def __call__(self, *args, **kwargs):
|
|
2640
|
-
self._lazy_init()
|
|
2641
|
-
return super().__call__(*args, **kwargs)
|
|
2642
|
-
|
|
2643
|
-
def add(self, *args, **kwargs):
|
|
2644
|
-
self._lazy_init()
|
|
2645
|
-
return super().add(*args, **kwargs)
|
|
2646
|
-
|
|
2647
|
-
def extend(self, *args, **kwargs):
|
|
2648
|
-
self._lazy_init()
|
|
2649
|
-
return super().extend(*args, **kwargs)
|
|
2650
|
-
|
|
2651
|
-
def known_properties(self):
|
|
2652
|
-
self._lazy_init()
|
|
2653
|
-
return super().known_properties()
|
|
2654
|
-
|
|
2655
|
-
def _lazy_init(self):
|
|
2656
|
-
if self._finalzed:
|
|
2657
|
-
return
|
|
2658
|
-
|
|
2659
|
-
parent = self._parent
|
|
2660
|
-
name = self._name
|
|
2661
|
-
use_graph_index = self._model._config.get("use_graph_index", USE_GRAPH_INDEX)
|
|
2662
|
-
|
|
2663
|
-
if not use_graph_index and name not in parent._imported:
|
|
2664
|
-
if self._parent._parent._parent._auto_import:
|
|
2665
|
-
with Spinner(f"Creating stream for {self.fqname()}", f"Stream for {self.fqname()} created successfully"):
|
|
2666
|
-
db_name = parent._parent._name
|
|
2667
|
-
schema_name = parent._name
|
|
2668
|
-
self._model._client.resources.create_import_stream(ImportSourceTable(db_name, schema_name, name), self._model.name)
|
|
2669
|
-
print("")
|
|
2670
|
-
parent._imported.add(name)
|
|
2671
|
-
else:
|
|
2672
|
-
imports = self._model._client.resources.list_imports(model=self._model.name)
|
|
2673
|
-
for item in imports:
|
|
2674
|
-
cur_name = item["name"].lower().split(".")[-1]
|
|
2675
|
-
parent._imported.add(cur_name)
|
|
2676
|
-
if name not in parent._imported:
|
|
2677
|
-
exception = SnowflakeImportMissingException(runtime_env.get_source(), self.fqname(), self._model.name)
|
|
2678
|
-
raise exception from None
|
|
2679
|
-
|
|
2680
|
-
parent._fetch_info()
|
|
2681
|
-
self._finalize()
|
|
2682
|
-
|
|
2683
|
-
def _finalize(self):
|
|
2684
|
-
if self._finalzed:
|
|
2685
|
-
return
|
|
2686
|
-
|
|
2687
|
-
self._finalzed = True
|
|
2688
|
-
self._schema = self._parent._table_info[self._name]
|
|
2689
|
-
|
|
2690
|
-
# Set the relation name to the sanitized version of the fully qualified name
|
|
2691
|
-
relation_name = to_fqn_relation_name(self.fqname())
|
|
2692
|
-
|
|
2693
|
-
model:dsl.Graph = self._model
|
|
2694
|
-
edb = getattr(std.rel, relation_name)
|
|
2695
|
-
edb._rel.parents.append(m.Builtins.EDB)
|
|
2696
|
-
id_rel = getattr(std.rel, f"{relation_name}_pyrel_id")
|
|
2697
|
-
|
|
2698
|
-
with model.rule(globalize=True, source=self._source):
|
|
2699
|
-
id, val = dsl.create_vars(2)
|
|
2700
|
-
edb(dsl.Symbol("METADATA$ROW_ID"), id, val)
|
|
2701
|
-
std.rel.SHA1(id)
|
|
2702
|
-
id_rel.add(id)
|
|
2703
|
-
|
|
2704
|
-
with model.rule(dynamic=True, globalize=True, source=self._source):
|
|
2705
|
-
prop, id, val = dsl.create_vars(3)
|
|
2706
|
-
id_rel(id)
|
|
2707
|
-
std.rel.SHA1(id)
|
|
2708
|
-
self.add(snowflake_id=id)
|
|
2709
|
-
|
|
2710
|
-
for prop, prop_type in self._schema["columns"].items():
|
|
2711
|
-
_prop = prop
|
|
2712
|
-
if _prop.startswith("_"):
|
|
2713
|
-
_prop = "col" + prop
|
|
2714
|
-
|
|
2715
|
-
prop_ident = sanitize_identifier(_prop.lower())
|
|
2716
|
-
|
|
2717
|
-
with model.rule(dynamic=True, globalize=True, source=self._source):
|
|
2718
|
-
id, val = dsl.create_vars(2)
|
|
2719
|
-
edb(dsl.Symbol(prop), id, val)
|
|
2720
|
-
std.rel.SHA1(id)
|
|
2721
|
-
_prop = getattr(self, prop_ident)
|
|
2722
|
-
if not _prop:
|
|
2723
|
-
raise ValueError(f"Property {_prop} couldn't be accessed on {self.fqname()}")
|
|
2724
|
-
if _prop.is_multi_valued:
|
|
2725
|
-
inst = self(snowflake_id=id)
|
|
2726
|
-
getattr(inst, prop_ident).add(val)
|
|
2727
|
-
else:
|
|
2728
|
-
self(snowflake_id=id).set(**{prop_ident: val})
|
|
2729
|
-
|
|
2730
|
-
# Because we're bypassing a bunch of the normal Type.add machinery here,
|
|
2731
|
-
# we need to manually account for the case where people are using value types.
|
|
2732
|
-
def wrapped(x):
|
|
2733
|
-
if not model._config.get("compiler.use_value_types", False):
|
|
2734
|
-
return x
|
|
2735
|
-
other_id = dsl.create_var()
|
|
2736
|
-
model._action(dsl.build.construct(self._type, [x, other_id]))
|
|
2737
|
-
return other_id
|
|
2738
|
-
|
|
2739
|
-
# new UInt128 schema mapping rules
|
|
2740
|
-
with model.rule(dynamic=True, globalize=True, source=self._source):
|
|
2741
|
-
id = dsl.create_var()
|
|
2742
|
-
# This will generate an arity mismatch warning when used with the old SHA-1 Data Streams.
|
|
2743
|
-
# Ideally we have the `@no_diagnostics(:ARITY_MISMATCH)` attribute on the relation using
|
|
2744
|
-
# the METADATA$KEY column but that ended up being a more involved change then expected
|
|
2745
|
-
# for avoiding a non-blocking warning
|
|
2746
|
-
edb(dsl.Symbol("METADATA$KEY"), id)
|
|
2747
|
-
std.rel.UInt128(id)
|
|
2748
|
-
self.add(wrapped(id), snowflake_id=id)
|
|
2749
|
-
|
|
2750
|
-
for prop, prop_type in self._schema["columns"].items():
|
|
2751
|
-
_prop = prop
|
|
2752
|
-
if _prop.startswith("_"):
|
|
2753
|
-
_prop = "col" + prop
|
|
2754
|
-
|
|
2755
|
-
prop_ident = sanitize_identifier(_prop.lower())
|
|
2756
|
-
with model.rule(dynamic=True, globalize=True, source=self._source):
|
|
2757
|
-
id, val = dsl.create_vars(2)
|
|
2758
|
-
edb(dsl.Symbol(prop), id, val)
|
|
2759
|
-
std.rel.UInt128(id)
|
|
2760
|
-
_prop = getattr(self, prop_ident)
|
|
2761
|
-
if not _prop:
|
|
2762
|
-
raise ValueError(f"Property {_prop} couldn't be accessed on {self.fqname()}")
|
|
2763
|
-
if _prop.is_multi_valued:
|
|
2764
|
-
inst = self(id)
|
|
2765
|
-
getattr(inst, prop_ident).add(val)
|
|
2766
|
-
else:
|
|
2767
|
-
model._check_property(_prop._prop)
|
|
2768
|
-
raw_relation = getattr(std.rel, prop_ident)
|
|
2769
|
-
dsl.tag(raw_relation, dsl.Builtins.FunctionAnnotation)
|
|
2770
|
-
raw_relation.add(wrapped(id), val)
|
|
2771
|
-
|
|
2772
|
-
def namespace(self):
|
|
2773
|
-
return f"{self._parent._parent._name}.{self._parent._name}"
|
|
2774
|
-
|
|
2775
|
-
def fqname(self):
|
|
2776
|
-
return f"{self.namespace()}.{self._name}"
|
|
2777
|
-
|
|
2778
|
-
def describe(self, **kwargs):
|
|
2779
|
-
model = self._model
|
|
2780
|
-
for k, v in kwargs.items():
|
|
2781
|
-
if v is PrimaryKey:
|
|
2782
|
-
self._schema["pks"] = [k]
|
|
2783
|
-
elif isinstance(v, tuple):
|
|
2784
|
-
(table, name) = v
|
|
2785
|
-
if isinstance(table, SnowflakeTable):
|
|
2786
|
-
fk_table = table
|
|
2787
|
-
pk = fk_table._schema["pks"]
|
|
2788
|
-
with model.rule():
|
|
2789
|
-
inst = fk_table()
|
|
2790
|
-
me = self()
|
|
2791
|
-
getattr(inst, pk[0]) == getattr(me, k)
|
|
2792
|
-
if getattr(self, name).is_multi_valued:
|
|
2793
|
-
getattr(me, name).add(inst)
|
|
2794
|
-
else:
|
|
2795
|
-
me.set(**{name: inst})
|
|
2796
|
-
else:
|
|
2797
|
-
raise ValueError(f"Invalid foreign key {v}")
|
|
2798
|
-
else:
|
|
2799
|
-
raise ValueError(f"Invalid column {k}={v}")
|
|
2800
|
-
return self
|
|
2801
|
-
|
|
2802
|
-
class Provider(ProviderBase):
|
|
2803
|
-
def __init__(
|
|
2804
|
-
self,
|
|
2805
|
-
profile: str | None = None,
|
|
2806
|
-
config: Config | None = None,
|
|
2807
|
-
resources: Resources | None = None,
|
|
2808
|
-
generation: Generation | None = None,
|
|
2809
|
-
):
|
|
2810
|
-
if resources:
|
|
2811
|
-
self.resources = resources
|
|
2812
|
-
else:
|
|
2813
|
-
from .resources_factory import create_resources_instance
|
|
2814
|
-
self.resources = create_resources_instance(
|
|
2815
|
-
config=config,
|
|
2816
|
-
profile=profile,
|
|
2817
|
-
generation=generation or Generation.V0,
|
|
2818
|
-
dry_run=False,
|
|
2819
|
-
language="rel",
|
|
2820
|
-
)
|
|
2821
|
-
|
|
2822
|
-
def list_streams(self, model:str):
|
|
2823
|
-
return self.resources.list_imports(model=model)
|
|
2824
|
-
|
|
2825
|
-
def create_streams(self, sources:List[str], model:str, force=False):
|
|
2826
|
-
if not self.resources.get_graph(model):
|
|
2827
|
-
self.resources.create_graph(model)
|
|
2828
|
-
def parse_source(raw:str):
|
|
2829
|
-
parser = IdentityParser(raw)
|
|
2830
|
-
assert parser.is_complete, "Snowflake table imports must be in `database.schema.table` format"
|
|
2831
|
-
return ImportSourceTable(*parser.to_list())
|
|
2832
|
-
for source in sources:
|
|
2833
|
-
source_table = parse_source(source)
|
|
2834
|
-
try:
|
|
2835
|
-
with Spinner(f"Creating stream for {source_table.name}", f"Stream for {source_table.name} created successfully"):
|
|
2836
|
-
if force:
|
|
2837
|
-
self.resources.delete_import(source_table.name, model, True)
|
|
2838
|
-
self.resources.create_import_stream(source_table, model)
|
|
2839
|
-
except Exception as e:
|
|
2840
|
-
if "stream already exists" in f"{e}":
|
|
2841
|
-
raise Exception(f"\n\nStream'{source_table.name.upper()}' already exists.")
|
|
2842
|
-
elif "engine not found" in f"{e}":
|
|
2843
|
-
raise Exception("\n\nNo engines found in a READY state. Please use `engines:create` to create an engine that will be used to initialize the target relation.")
|
|
2844
|
-
else:
|
|
2845
|
-
raise e
|
|
2846
|
-
with Spinner("Waiting for imports to complete", "Imports complete"):
|
|
2847
|
-
self.resources.poll_imports(sources, model)
|
|
2848
|
-
|
|
2849
|
-
def delete_stream(self, stream_id: str, model: str):
|
|
2850
|
-
return self.resources.delete_import(stream_id, model)
|
|
2851
|
-
|
|
2852
|
-
def sql(self, query:str, params:List[Any]=[], format:Literal["list", "pandas", "polars", "lazy"]="list"):
|
|
2853
|
-
# note: default format cannot be pandas because .to_pandas() only works on SELECT queries
|
|
2854
|
-
result = self.resources._exec(query, params, raw=True, help=False)
|
|
2855
|
-
if format == "lazy":
|
|
2856
|
-
return cast(snowflake.snowpark.DataFrame, result)
|
|
2857
|
-
elif format == "list":
|
|
2858
|
-
return cast(list, result.collect())
|
|
2859
|
-
elif format == "pandas":
|
|
2860
|
-
import pandas as pd
|
|
2861
|
-
try:
|
|
2862
|
-
# use to_pandas for SELECT queries
|
|
2863
|
-
return cast(pd.DataFrame, result.to_pandas())
|
|
2864
|
-
except Exception:
|
|
2865
|
-
# handle non-SELECT queries like SHOW
|
|
2866
|
-
return pd.DataFrame(result.collect())
|
|
2867
|
-
elif format == "polars":
|
|
2868
|
-
import polars as pl # type: ignore
|
|
2869
|
-
return pl.DataFrame(
|
|
2870
|
-
[row.as_dict() for row in result.collect()],
|
|
2871
|
-
orient="row",
|
|
2872
|
-
strict=False,
|
|
2873
|
-
infer_schema_length=None
|
|
2874
|
-
)
|
|
2875
|
-
else:
|
|
2876
|
-
raise ValueError(f"Invalid format {format}. Should be one of 'list', 'pandas', 'polars', 'lazy'")
|
|
2877
|
-
|
|
2878
|
-
def activate(self):
|
|
2879
|
-
with Spinner("Activating RelationalAI app...", "RelationalAI app activated"):
|
|
2880
|
-
self.sql("CALL RELATIONALAI.APP.ACTIVATE();")
|
|
2881
|
-
|
|
2882
|
-
def deactivate(self):
|
|
2883
|
-
with Spinner("Deactivating RelationalAI app...", "RelationalAI app deactivated"):
|
|
2884
|
-
self.sql("CALL RELATIONALAI.APP.DEACTIVATE();")
|
|
2885
|
-
|
|
2886
|
-
def drop_service(self):
|
|
2887
|
-
warnings.warn(
|
|
2888
|
-
"The drop_service method has been deprecated in favor of deactivate",
|
|
2889
|
-
DeprecationWarning,
|
|
2890
|
-
stacklevel=2,
|
|
2891
|
-
)
|
|
2892
|
-
self.deactivate()
|
|
2893
|
-
|
|
2894
|
-
def resume_service(self):
|
|
2895
|
-
warnings.warn(
|
|
2896
|
-
"The resume_service method has been deprecated in favor of activate",
|
|
2897
|
-
DeprecationWarning,
|
|
2898
|
-
stacklevel=2,
|
|
2899
|
-
)
|
|
2900
|
-
self.activate()
|
|
2901
|
-
|
|
2902
|
-
|
|
2903
|
-
#--------------------------------------------------
|
|
2904
|
-
# SnowflakeClient
|
|
2905
|
-
#--------------------------------------------------
|
|
2906
|
-
class SnowflakeClient(Client):
|
|
2907
|
-
def create_database(self, isolated=True, nowait_durable=True, headers: Dict | None = None):
|
|
2908
|
-
from relationalai.tools.cli_helpers import validate_engine_name
|
|
2909
|
-
|
|
2910
|
-
assert isinstance(self.resources, Resources)
|
|
2911
|
-
|
|
2912
|
-
if self.last_database_version == len(self.resources.sources):
|
|
2913
|
-
return
|
|
2914
|
-
|
|
2915
|
-
model = self._source_database
|
|
2916
|
-
app_name = self.resources.get_app_name()
|
|
2917
|
-
engine_name = self.resources.get_default_engine_name()
|
|
2918
|
-
engine_size = self.resources.config.get_default_engine_size()
|
|
2919
|
-
|
|
2920
|
-
# Validate engine name
|
|
2921
|
-
is_name_valid, _ = validate_engine_name(engine_name)
|
|
2922
|
-
if not is_name_valid:
|
|
2923
|
-
raise EngineNameValidationException(engine_name)
|
|
2924
|
-
|
|
2925
|
-
# Validate engine size
|
|
2926
|
-
valid_sizes = self.resources.get_engine_sizes()
|
|
2927
|
-
if not isinstance(engine_size, str) or engine_size not in valid_sizes:
|
|
2928
|
-
raise InvalidEngineSizeError(str(engine_size), valid_sizes)
|
|
2929
|
-
|
|
2930
|
-
program_span_id = debugging.get_program_span_id()
|
|
2931
|
-
|
|
2932
|
-
query_attrs_dict = json.loads(headers.get("X-Query-Attributes", "{}")) if headers else {}
|
|
2933
|
-
with debugging.span("poll_use_index", sources=self.resources.sources, model=model, engine=engine_name, **query_attrs_dict):
|
|
2934
|
-
self.maybe_poll_use_index(
|
|
2935
|
-
app_name=app_name,
|
|
2936
|
-
sources=self.resources.sources,
|
|
2937
|
-
model=model,
|
|
2938
|
-
engine_name=engine_name,
|
|
2939
|
-
engine_size=engine_size,
|
|
2940
|
-
program_span_id=program_span_id,
|
|
2941
|
-
headers=headers
|
|
2942
|
-
)
|
|
2943
|
-
|
|
2944
|
-
self.last_database_version = len(self.resources.sources)
|
|
2945
|
-
self._manage_packages()
|
|
2946
|
-
|
|
2947
|
-
if isolated and not self.keep_model:
|
|
2948
|
-
atexit.register(self.delete_database)
|
|
2949
|
-
|
|
2950
|
-
def maybe_poll_use_index(
|
|
2951
|
-
self,
|
|
2952
|
-
app_name: str,
|
|
2953
|
-
sources: Iterable[str],
|
|
2954
|
-
model: str,
|
|
2955
|
-
engine_name: str,
|
|
2956
|
-
engine_size: str | None = None,
|
|
2957
|
-
program_span_id: str | None = None,
|
|
2958
|
-
headers: Dict | None = None,
|
|
2959
|
-
):
|
|
2960
|
-
"""Only call _poll_use_index if there are sources to process."""
|
|
2961
|
-
assert isinstance(self.resources, Resources)
|
|
2962
|
-
return self.resources.maybe_poll_use_index(
|
|
2963
|
-
app_name=app_name,
|
|
2964
|
-
sources=sources,
|
|
2965
|
-
model=model,
|
|
2966
|
-
engine_name=engine_name,
|
|
2967
|
-
engine_size=engine_size,
|
|
2968
|
-
program_span_id=program_span_id,
|
|
2969
|
-
headers=headers
|
|
2970
|
-
)
|
|
2971
|
-
|
|
2972
|
-
|
|
2973
|
-
#--------------------------------------------------
|
|
2974
|
-
# Graph
|
|
2975
|
-
#--------------------------------------------------
|
|
2976
|
-
|
|
2977
|
-
def Graph(
|
|
2978
|
-
name,
|
|
2979
|
-
*,
|
|
2980
|
-
profile: str | None = None,
|
|
2981
|
-
config: Config,
|
|
2982
|
-
dry_run: bool = False,
|
|
2983
|
-
isolated: bool = True,
|
|
2984
|
-
connection: Session | None = None,
|
|
2985
|
-
keep_model: bool = False,
|
|
2986
|
-
nowait_durable: bool = True,
|
|
2987
|
-
format: str = "default",
|
|
2988
|
-
):
|
|
2989
|
-
from .resources_factory import create_resources_instance
|
|
2990
|
-
from .use_index_resources import UseIndexResources
|
|
2991
|
-
|
|
2992
|
-
use_graph_index = config.get("use_graph_index", USE_GRAPH_INDEX)
|
|
2993
|
-
use_monotype_operators = config.get("compiler.use_monotype_operators", False)
|
|
2994
|
-
|
|
2995
|
-
# Create resources instance using factory
|
|
2996
|
-
resources = create_resources_instance(
|
|
2997
|
-
config=config,
|
|
2998
|
-
profile=profile,
|
|
2999
|
-
connection=connection,
|
|
3000
|
-
generation=Generation.V0,
|
|
3001
|
-
dry_run=False, # Resources instance dry_run is separate from client dry_run
|
|
3002
|
-
language="rel",
|
|
3003
|
-
)
|
|
3004
|
-
|
|
3005
|
-
# Determine client class based on resources type and config
|
|
3006
|
-
# SnowflakeClient is used for resources that support use_index functionality
|
|
3007
|
-
if use_graph_index or isinstance(resources, UseIndexResources):
|
|
3008
|
-
client_class = SnowflakeClient
|
|
3009
|
-
else:
|
|
3010
|
-
client_class = Client
|
|
3011
|
-
|
|
3012
|
-
client = client_class(
|
|
3013
|
-
resources,
|
|
3014
|
-
rel.Compiler(config),
|
|
3015
|
-
name,
|
|
3016
|
-
config,
|
|
3017
|
-
dry_run=dry_run,
|
|
3018
|
-
isolated=isolated,
|
|
3019
|
-
keep_model=keep_model,
|
|
3020
|
-
nowait_durable=nowait_durable
|
|
3021
|
-
)
|
|
3022
|
-
base_rel = """
|
|
3023
|
-
@inline
|
|
3024
|
-
def make_identity(x..., z):
|
|
3025
|
-
rel_primitive_hash_tuple_uint128(x..., z)
|
|
3026
|
-
|
|
3027
|
-
@inline
|
|
3028
|
-
def pyrel_default({F}, c, k..., v):
|
|
3029
|
-
F(k..., v) or (not F(k..., _) and v = c)
|
|
3030
|
-
|
|
3031
|
-
@inline
|
|
3032
|
-
def pyrel_unwrap(x in UInt128, y): y = x
|
|
3033
|
-
|
|
3034
|
-
@inline
|
|
3035
|
-
def pyrel_dates_period_days(x in Date, y in Date, z in Int):
|
|
3036
|
-
exists((u) | dates_period_days(x, y , u) and u = ::std::common::^Day[z])
|
|
3037
|
-
|
|
3038
|
-
@inline
|
|
3039
|
-
def pyrel_datetimes_period_milliseconds(x in DateTime, y in DateTime, z in Int):
|
|
3040
|
-
exists((u) | datetimes_period_milliseconds(x, y , u) and u = ^Millisecond[z])
|
|
3041
|
-
|
|
3042
|
-
@inline
|
|
3043
|
-
def pyrel_bool_filter(a, b, {F}, z): { z = if_then_else[F(a, b), boolean_true, boolean_false] }
|
|
3044
|
-
|
|
3045
|
-
@inline
|
|
3046
|
-
def pyrel_strftime(v, fmt, tz in String, s in String):
|
|
3047
|
-
(Date(v) and s = format_date[v, fmt])
|
|
3048
|
-
or (DateTime(v) and s = format_datetime[v, fmt, tz])
|
|
3049
|
-
|
|
3050
|
-
@inline
|
|
3051
|
-
def pyrel_regex_match_all(pattern, string in String, pos in Int, offset in Int, match in String):
|
|
3052
|
-
regex_match_all(pattern, string, offset, match) and offset >= pos
|
|
3053
|
-
|
|
3054
|
-
@inline
|
|
3055
|
-
def pyrel_regex_match(pattern, string in String, pos in Int, offset in Int, match in String):
|
|
3056
|
-
pyrel_regex_match_all(pattern, string, pos, offset, match) and offset = pos
|
|
3057
|
-
|
|
3058
|
-
@inline
|
|
3059
|
-
def pyrel_regex_search(pattern, string in String, pos in Int, offset in Int, match in String):
|
|
3060
|
-
enumerate(pyrel_regex_match_all[pattern, string, pos], 1, offset, match)
|
|
3061
|
-
|
|
3062
|
-
@inline
|
|
3063
|
-
def pyrel_regex_sub(pattern, repl in String, string in String, result in String):
|
|
3064
|
-
string_replace_multiple(string, {(last[regex_match_all[pattern, string]], repl)}, result)
|
|
3065
|
-
|
|
3066
|
-
@inline
|
|
3067
|
-
def pyrel_capture_group(regex in Pattern, string in String, pos in Int, index, match in String):
|
|
3068
|
-
(Integer(index) and capture_group_by_index(regex, string, pos, index, match)) or
|
|
3069
|
-
(String(index) and capture_group_by_name(regex, string, pos, index, match))
|
|
3070
|
-
|
|
3071
|
-
declare __resource
|
|
3072
|
-
declare __compiled_patterns
|
|
3073
|
-
"""
|
|
3074
|
-
if use_monotype_operators:
|
|
3075
|
-
base_rel += """
|
|
3076
|
-
|
|
3077
|
-
// use monotyped operators
|
|
3078
|
-
from ::std::monotype import +, -, *, /, <, <=, >, >=
|
|
3079
|
-
"""
|
|
3080
|
-
pyrel_base = dsl.build.raw_task(base_rel)
|
|
3081
|
-
debugging.set_source(pyrel_base)
|
|
3082
|
-
client.install("pyrel_base", pyrel_base)
|
|
3083
|
-
return dsl.Graph(client, name, format=format)
|