relationalai 0.13.0__py3-none-any.whl → 0.13.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- relationalai/__init__.py +1 -256
- relationalai/config/__init__.py +56 -0
- relationalai/config/config.py +289 -0
- relationalai/config/config_fields.py +86 -0
- relationalai/config/connections/__init__.py +46 -0
- relationalai/config/connections/base.py +23 -0
- relationalai/config/connections/duckdb.py +29 -0
- relationalai/config/connections/snowflake.py +243 -0
- relationalai/config/external/__init__.py +17 -0
- relationalai/config/external/dbt_converter.py +101 -0
- relationalai/config/external/dbt_models.py +93 -0
- relationalai/config/external/snowflake_converter.py +41 -0
- relationalai/config/external/snowflake_models.py +85 -0
- relationalai/config/external/utils.py +19 -0
- relationalai/semantics/__init__.py +146 -22
- relationalai/semantics/backends/lqp/annotations.py +11 -0
- relationalai/semantics/backends/sql/sql_compiler.py +327 -0
- relationalai/semantics/frontend/base.py +1707 -0
- relationalai/semantics/frontend/core.py +179 -0
- relationalai/semantics/frontend/front_compiler.py +1313 -0
- relationalai/semantics/frontend/pprint.py +408 -0
- relationalai/semantics/metamodel/__init__.py +6 -40
- relationalai/semantics/metamodel/builtins.py +205 -771
- relationalai/semantics/metamodel/metamodel.py +437 -0
- relationalai/semantics/metamodel/metamodel_analyzer.py +519 -0
- relationalai/semantics/metamodel/pprint.py +412 -0
- relationalai/semantics/metamodel/rewriter.py +266 -0
- relationalai/semantics/metamodel/typer.py +1378 -0
- relationalai/semantics/std/__init__.py +60 -40
- relationalai/semantics/std/aggregates.py +149 -0
- relationalai/semantics/std/common.py +44 -0
- relationalai/semantics/std/constraints.py +37 -43
- relationalai/semantics/std/datetime.py +246 -135
- relationalai/semantics/std/decimals.py +45 -52
- relationalai/semantics/std/floats.py +13 -5
- relationalai/semantics/std/integers.py +26 -11
- relationalai/semantics/std/math.py +183 -112
- relationalai/semantics/std/numbers.py +86 -0
- relationalai/semantics/std/re.py +80 -62
- relationalai/semantics/std/strings.py +117 -60
- relationalai/shims/executor.py +147 -0
- relationalai/shims/helpers.py +126 -0
- relationalai/shims/hoister.py +221 -0
- relationalai/shims/mm2v0.py +1290 -0
- relationalai/tools/cli/__init__.py +6 -0
- relationalai/tools/cli/cli.py +90 -0
- relationalai/tools/cli/components/__init__.py +5 -0
- relationalai/tools/cli/components/progress_reader.py +1524 -0
- relationalai/tools/cli/components/utils.py +58 -0
- relationalai/tools/cli/config_template.py +45 -0
- relationalai/tools/cli/dev.py +19 -0
- relationalai/tools/debugger.py +289 -183
- relationalai/tools/typer_debugger.py +93 -0
- relationalai/util/dataclasses.py +43 -0
- relationalai/util/docutils.py +40 -0
- relationalai/util/error.py +199 -0
- relationalai/util/format.py +48 -106
- relationalai/util/naming.py +145 -0
- relationalai/util/python.py +35 -0
- relationalai/util/runtime.py +156 -0
- relationalai/util/schema.py +197 -0
- relationalai/util/source.py +185 -0
- relationalai/util/structures.py +163 -0
- relationalai/util/tracing.py +261 -0
- relationalai-0.13.0.dev0.dist-info/METADATA +46 -0
- relationalai-0.13.0.dev0.dist-info/RECORD +488 -0
- relationalai-0.13.0.dev0.dist-info/WHEEL +5 -0
- relationalai-0.13.0.dev0.dist-info/entry_points.txt +3 -0
- relationalai-0.13.0.dev0.dist-info/top_level.txt +2 -0
- v0/relationalai/__init__.py +216 -0
- v0/relationalai/clients/__init__.py +5 -0
- v0/relationalai/clients/azure.py +477 -0
- v0/relationalai/clients/client.py +912 -0
- v0/relationalai/clients/config.py +673 -0
- v0/relationalai/clients/direct_access_client.py +118 -0
- v0/relationalai/clients/hash_util.py +31 -0
- v0/relationalai/clients/local.py +571 -0
- v0/relationalai/clients/profile_polling.py +73 -0
- v0/relationalai/clients/result_helpers.py +420 -0
- v0/relationalai/clients/snowflake.py +3869 -0
- v0/relationalai/clients/types.py +113 -0
- v0/relationalai/clients/use_index_poller.py +980 -0
- v0/relationalai/clients/util.py +356 -0
- v0/relationalai/debugging.py +389 -0
- v0/relationalai/dsl.py +1749 -0
- v0/relationalai/early_access/builder/__init__.py +30 -0
- v0/relationalai/early_access/builder/builder/__init__.py +35 -0
- v0/relationalai/early_access/builder/snowflake/__init__.py +12 -0
- v0/relationalai/early_access/builder/std/__init__.py +25 -0
- v0/relationalai/early_access/builder/std/decimals/__init__.py +12 -0
- v0/relationalai/early_access/builder/std/integers/__init__.py +12 -0
- v0/relationalai/early_access/builder/std/math/__init__.py +12 -0
- v0/relationalai/early_access/builder/std/strings/__init__.py +14 -0
- v0/relationalai/early_access/devtools/__init__.py +12 -0
- v0/relationalai/early_access/devtools/benchmark_lqp/__init__.py +12 -0
- v0/relationalai/early_access/devtools/extract_lqp/__init__.py +12 -0
- v0/relationalai/early_access/dsl/adapters/orm/adapter_qb.py +427 -0
- v0/relationalai/early_access/dsl/adapters/orm/parser.py +636 -0
- v0/relationalai/early_access/dsl/adapters/owl/adapter.py +176 -0
- v0/relationalai/early_access/dsl/adapters/owl/parser.py +160 -0
- v0/relationalai/early_access/dsl/bindings/common.py +402 -0
- v0/relationalai/early_access/dsl/bindings/csv.py +170 -0
- v0/relationalai/early_access/dsl/bindings/legacy/binding_models.py +143 -0
- v0/relationalai/early_access/dsl/bindings/snowflake.py +64 -0
- v0/relationalai/early_access/dsl/codegen/binder.py +411 -0
- v0/relationalai/early_access/dsl/codegen/common.py +79 -0
- v0/relationalai/early_access/dsl/codegen/helpers.py +23 -0
- v0/relationalai/early_access/dsl/codegen/relations.py +700 -0
- v0/relationalai/early_access/dsl/codegen/weaver.py +417 -0
- v0/relationalai/early_access/dsl/core/builders/__init__.py +47 -0
- v0/relationalai/early_access/dsl/core/builders/logic.py +19 -0
- v0/relationalai/early_access/dsl/core/builders/scalar_constraint.py +11 -0
- v0/relationalai/early_access/dsl/core/constraints/predicate/atomic.py +455 -0
- v0/relationalai/early_access/dsl/core/constraints/predicate/universal.py +73 -0
- v0/relationalai/early_access/dsl/core/constraints/scalar.py +310 -0
- v0/relationalai/early_access/dsl/core/context.py +13 -0
- v0/relationalai/early_access/dsl/core/cset.py +132 -0
- v0/relationalai/early_access/dsl/core/exprs/__init__.py +116 -0
- v0/relationalai/early_access/dsl/core/exprs/relational.py +18 -0
- v0/relationalai/early_access/dsl/core/exprs/scalar.py +412 -0
- v0/relationalai/early_access/dsl/core/instances.py +44 -0
- v0/relationalai/early_access/dsl/core/logic/__init__.py +193 -0
- v0/relationalai/early_access/dsl/core/logic/aggregation.py +98 -0
- v0/relationalai/early_access/dsl/core/logic/exists.py +223 -0
- v0/relationalai/early_access/dsl/core/logic/helper.py +163 -0
- v0/relationalai/early_access/dsl/core/namespaces.py +32 -0
- v0/relationalai/early_access/dsl/core/relations.py +276 -0
- v0/relationalai/early_access/dsl/core/rules.py +112 -0
- v0/relationalai/early_access/dsl/core/std/__init__.py +45 -0
- v0/relationalai/early_access/dsl/core/temporal/recall.py +6 -0
- v0/relationalai/early_access/dsl/core/types/__init__.py +270 -0
- v0/relationalai/early_access/dsl/core/types/concepts.py +128 -0
- v0/relationalai/early_access/dsl/core/types/constrained/__init__.py +267 -0
- v0/relationalai/early_access/dsl/core/types/constrained/nominal.py +143 -0
- v0/relationalai/early_access/dsl/core/types/constrained/subtype.py +124 -0
- v0/relationalai/early_access/dsl/core/types/standard.py +92 -0
- v0/relationalai/early_access/dsl/core/types/unconstrained.py +50 -0
- v0/relationalai/early_access/dsl/core/types/variables.py +203 -0
- v0/relationalai/early_access/dsl/ir/compiler.py +318 -0
- v0/relationalai/early_access/dsl/ir/executor.py +260 -0
- v0/relationalai/early_access/dsl/ontologies/constraints.py +88 -0
- v0/relationalai/early_access/dsl/ontologies/export.py +30 -0
- v0/relationalai/early_access/dsl/ontologies/models.py +453 -0
- v0/relationalai/early_access/dsl/ontologies/python_printer.py +303 -0
- v0/relationalai/early_access/dsl/ontologies/readings.py +60 -0
- v0/relationalai/early_access/dsl/ontologies/relationships.py +322 -0
- v0/relationalai/early_access/dsl/ontologies/roles.py +87 -0
- v0/relationalai/early_access/dsl/ontologies/subtyping.py +55 -0
- v0/relationalai/early_access/dsl/orm/constraints.py +438 -0
- v0/relationalai/early_access/dsl/orm/measures/dimensions.py +200 -0
- v0/relationalai/early_access/dsl/orm/measures/initializer.py +16 -0
- v0/relationalai/early_access/dsl/orm/measures/measure_rules.py +275 -0
- v0/relationalai/early_access/dsl/orm/measures/measures.py +299 -0
- v0/relationalai/early_access/dsl/orm/measures/role_exprs.py +268 -0
- v0/relationalai/early_access/dsl/orm/models.py +256 -0
- v0/relationalai/early_access/dsl/orm/object_oriented_printer.py +344 -0
- v0/relationalai/early_access/dsl/orm/printer.py +469 -0
- v0/relationalai/early_access/dsl/orm/reasoners.py +480 -0
- v0/relationalai/early_access/dsl/orm/relations.py +19 -0
- v0/relationalai/early_access/dsl/orm/relationships.py +251 -0
- v0/relationalai/early_access/dsl/orm/types.py +42 -0
- v0/relationalai/early_access/dsl/orm/utils.py +79 -0
- v0/relationalai/early_access/dsl/orm/verb.py +204 -0
- v0/relationalai/early_access/dsl/physical_metadata/tables.py +133 -0
- v0/relationalai/early_access/dsl/relations.py +170 -0
- v0/relationalai/early_access/dsl/rulesets.py +69 -0
- v0/relationalai/early_access/dsl/schemas/__init__.py +450 -0
- v0/relationalai/early_access/dsl/schemas/builder.py +48 -0
- v0/relationalai/early_access/dsl/schemas/comp_names.py +51 -0
- v0/relationalai/early_access/dsl/schemas/components.py +203 -0
- v0/relationalai/early_access/dsl/schemas/contexts.py +156 -0
- v0/relationalai/early_access/dsl/schemas/exprs.py +89 -0
- v0/relationalai/early_access/dsl/schemas/fragments.py +464 -0
- v0/relationalai/early_access/dsl/serialization.py +79 -0
- v0/relationalai/early_access/dsl/serialize/exporter.py +163 -0
- v0/relationalai/early_access/dsl/snow/api.py +104 -0
- v0/relationalai/early_access/dsl/snow/common.py +76 -0
- v0/relationalai/early_access/dsl/state_mgmt/__init__.py +129 -0
- v0/relationalai/early_access/dsl/state_mgmt/state_charts.py +125 -0
- v0/relationalai/early_access/dsl/state_mgmt/transitions.py +130 -0
- v0/relationalai/early_access/dsl/types/__init__.py +40 -0
- v0/relationalai/early_access/dsl/types/concepts.py +12 -0
- v0/relationalai/early_access/dsl/types/entities.py +135 -0
- v0/relationalai/early_access/dsl/types/values.py +17 -0
- v0/relationalai/early_access/dsl/utils.py +102 -0
- v0/relationalai/early_access/graphs/__init__.py +13 -0
- v0/relationalai/early_access/lqp/__init__.py +12 -0
- v0/relationalai/early_access/lqp/compiler/__init__.py +12 -0
- v0/relationalai/early_access/lqp/constructors/__init__.py +18 -0
- v0/relationalai/early_access/lqp/executor/__init__.py +12 -0
- v0/relationalai/early_access/lqp/ir/__init__.py +12 -0
- v0/relationalai/early_access/lqp/passes/__init__.py +12 -0
- v0/relationalai/early_access/lqp/pragmas/__init__.py +12 -0
- v0/relationalai/early_access/lqp/primitives/__init__.py +12 -0
- v0/relationalai/early_access/lqp/types/__init__.py +12 -0
- v0/relationalai/early_access/lqp/utils/__init__.py +12 -0
- v0/relationalai/early_access/lqp/validators/__init__.py +12 -0
- v0/relationalai/early_access/metamodel/__init__.py +58 -0
- v0/relationalai/early_access/metamodel/builtins/__init__.py +12 -0
- v0/relationalai/early_access/metamodel/compiler/__init__.py +12 -0
- v0/relationalai/early_access/metamodel/dependency/__init__.py +12 -0
- v0/relationalai/early_access/metamodel/factory/__init__.py +17 -0
- v0/relationalai/early_access/metamodel/helpers/__init__.py +12 -0
- v0/relationalai/early_access/metamodel/ir/__init__.py +14 -0
- v0/relationalai/early_access/metamodel/rewrite/__init__.py +7 -0
- v0/relationalai/early_access/metamodel/typer/__init__.py +3 -0
- v0/relationalai/early_access/metamodel/typer/typer/__init__.py +12 -0
- v0/relationalai/early_access/metamodel/types/__init__.py +15 -0
- v0/relationalai/early_access/metamodel/util/__init__.py +15 -0
- v0/relationalai/early_access/metamodel/visitor/__init__.py +12 -0
- v0/relationalai/early_access/rel/__init__.py +12 -0
- v0/relationalai/early_access/rel/executor/__init__.py +12 -0
- v0/relationalai/early_access/rel/rel_utils/__init__.py +12 -0
- v0/relationalai/early_access/rel/rewrite/__init__.py +7 -0
- v0/relationalai/early_access/solvers/__init__.py +19 -0
- v0/relationalai/early_access/sql/__init__.py +11 -0
- v0/relationalai/early_access/sql/executor/__init__.py +3 -0
- v0/relationalai/early_access/sql/rewrite/__init__.py +3 -0
- v0/relationalai/early_access/tests/logging/__init__.py +12 -0
- v0/relationalai/early_access/tests/test_snapshot_base/__init__.py +12 -0
- v0/relationalai/early_access/tests/utils/__init__.py +12 -0
- v0/relationalai/environments/__init__.py +35 -0
- v0/relationalai/environments/base.py +381 -0
- v0/relationalai/environments/colab.py +14 -0
- v0/relationalai/environments/generic.py +71 -0
- v0/relationalai/environments/ipython.py +68 -0
- v0/relationalai/environments/jupyter.py +9 -0
- v0/relationalai/environments/snowbook.py +169 -0
- v0/relationalai/errors.py +2455 -0
- v0/relationalai/experimental/SF.py +38 -0
- v0/relationalai/experimental/inspect.py +47 -0
- v0/relationalai/experimental/pathfinder/__init__.py +158 -0
- v0/relationalai/experimental/pathfinder/api.py +160 -0
- v0/relationalai/experimental/pathfinder/automaton.py +584 -0
- v0/relationalai/experimental/pathfinder/bridge.py +226 -0
- v0/relationalai/experimental/pathfinder/compiler.py +416 -0
- v0/relationalai/experimental/pathfinder/datalog.py +214 -0
- v0/relationalai/experimental/pathfinder/diagnostics.py +56 -0
- v0/relationalai/experimental/pathfinder/filter.py +236 -0
- v0/relationalai/experimental/pathfinder/glushkov.py +439 -0
- v0/relationalai/experimental/pathfinder/options.py +265 -0
- v0/relationalai/experimental/pathfinder/rpq.py +344 -0
- v0/relationalai/experimental/pathfinder/transition.py +200 -0
- v0/relationalai/experimental/pathfinder/utils.py +26 -0
- v0/relationalai/experimental/paths/api.py +143 -0
- v0/relationalai/experimental/paths/benchmarks/grid_graph.py +37 -0
- v0/relationalai/experimental/paths/examples/basic_example.py +40 -0
- v0/relationalai/experimental/paths/examples/minimal_engine_warmup.py +3 -0
- v0/relationalai/experimental/paths/examples/movie_example.py +77 -0
- v0/relationalai/experimental/paths/examples/paths_benchmark.py +115 -0
- v0/relationalai/experimental/paths/examples/paths_example.py +116 -0
- v0/relationalai/experimental/paths/examples/pattern_to_automaton.py +28 -0
- v0/relationalai/experimental/paths/find_paths_via_automaton.py +85 -0
- v0/relationalai/experimental/paths/graph.py +185 -0
- v0/relationalai/experimental/paths/path_algorithms/find_paths.py +280 -0
- v0/relationalai/experimental/paths/path_algorithms/one_sided_ball_repetition.py +26 -0
- v0/relationalai/experimental/paths/path_algorithms/one_sided_ball_upto.py +111 -0
- v0/relationalai/experimental/paths/path_algorithms/single.py +59 -0
- v0/relationalai/experimental/paths/path_algorithms/two_sided_balls_repetition.py +39 -0
- v0/relationalai/experimental/paths/path_algorithms/two_sided_balls_upto.py +103 -0
- v0/relationalai/experimental/paths/path_algorithms/usp-old.py +130 -0
- v0/relationalai/experimental/paths/path_algorithms/usp-tuple.py +183 -0
- v0/relationalai/experimental/paths/path_algorithms/usp.py +150 -0
- v0/relationalai/experimental/paths/product_graph.py +93 -0
- v0/relationalai/experimental/paths/rpq/automaton.py +584 -0
- v0/relationalai/experimental/paths/rpq/diagnostics.py +56 -0
- v0/relationalai/experimental/paths/rpq/rpq.py +378 -0
- v0/relationalai/experimental/paths/tests/tests_limit_sp_max_length.py +90 -0
- v0/relationalai/experimental/paths/tests/tests_limit_sp_multiple.py +119 -0
- v0/relationalai/experimental/paths/tests/tests_limit_sp_single.py +104 -0
- v0/relationalai/experimental/paths/tests/tests_limit_walks_multiple.py +113 -0
- v0/relationalai/experimental/paths/tests/tests_limit_walks_single.py +149 -0
- v0/relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_multiple.py +70 -0
- v0/relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_single.py +64 -0
- v0/relationalai/experimental/paths/tests/tests_one_sided_ball_upto_multiple.py +115 -0
- v0/relationalai/experimental/paths/tests/tests_one_sided_ball_upto_single.py +75 -0
- v0/relationalai/experimental/paths/tests/tests_single_paths.py +152 -0
- v0/relationalai/experimental/paths/tests/tests_single_walks.py +208 -0
- v0/relationalai/experimental/paths/tests/tests_single_walks_undirected.py +297 -0
- v0/relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_multiple.py +107 -0
- v0/relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_single.py +76 -0
- v0/relationalai/experimental/paths/tests/tests_two_sided_balls_upto_multiple.py +76 -0
- v0/relationalai/experimental/paths/tests/tests_two_sided_balls_upto_single.py +110 -0
- v0/relationalai/experimental/paths/tests/tests_usp_nsp_multiple.py +229 -0
- v0/relationalai/experimental/paths/tests/tests_usp_nsp_single.py +108 -0
- v0/relationalai/experimental/paths/tree_agg.py +168 -0
- v0/relationalai/experimental/paths/utilities/iterators.py +27 -0
- v0/relationalai/experimental/paths/utilities/prefix_sum.py +91 -0
- v0/relationalai/experimental/solvers.py +1087 -0
- v0/relationalai/loaders/__init__.py +0 -0
- v0/relationalai/loaders/csv.py +195 -0
- v0/relationalai/loaders/loader.py +177 -0
- v0/relationalai/loaders/types.py +23 -0
- v0/relationalai/rel_emitter.py +373 -0
- v0/relationalai/rel_utils.py +185 -0
- v0/relationalai/semantics/__init__.py +29 -0
- v0/relationalai/semantics/devtools/benchmark_lqp.py +536 -0
- v0/relationalai/semantics/devtools/compilation_manager.py +294 -0
- v0/relationalai/semantics/devtools/extract_lqp.py +110 -0
- v0/relationalai/semantics/internal/internal.py +3785 -0
- v0/relationalai/semantics/internal/snowflake.py +324 -0
- v0/relationalai/semantics/lqp/builtins.py +16 -0
- v0/relationalai/semantics/lqp/compiler.py +22 -0
- v0/relationalai/semantics/lqp/constructors.py +68 -0
- v0/relationalai/semantics/lqp/executor.py +469 -0
- v0/relationalai/semantics/lqp/intrinsics.py +24 -0
- v0/relationalai/semantics/lqp/model2lqp.py +839 -0
- v0/relationalai/semantics/lqp/passes.py +680 -0
- v0/relationalai/semantics/lqp/primitives.py +252 -0
- v0/relationalai/semantics/lqp/result_helpers.py +202 -0
- v0/relationalai/semantics/lqp/rewrite/annotate_constraints.py +57 -0
- v0/relationalai/semantics/lqp/rewrite/cdc.py +216 -0
- v0/relationalai/semantics/lqp/rewrite/extract_common.py +338 -0
- v0/relationalai/semantics/lqp/rewrite/extract_keys.py +449 -0
- v0/relationalai/semantics/lqp/rewrite/function_annotations.py +114 -0
- v0/relationalai/semantics/lqp/rewrite/functional_dependencies.py +314 -0
- v0/relationalai/semantics/lqp/rewrite/quantify_vars.py +296 -0
- v0/relationalai/semantics/lqp/rewrite/splinter.py +76 -0
- v0/relationalai/semantics/lqp/types.py +101 -0
- v0/relationalai/semantics/lqp/utils.py +160 -0
- v0/relationalai/semantics/lqp/validators.py +57 -0
- v0/relationalai/semantics/metamodel/__init__.py +40 -0
- v0/relationalai/semantics/metamodel/builtins.py +774 -0
- v0/relationalai/semantics/metamodel/compiler.py +133 -0
- v0/relationalai/semantics/metamodel/dependency.py +862 -0
- v0/relationalai/semantics/metamodel/executor.py +61 -0
- v0/relationalai/semantics/metamodel/factory.py +287 -0
- v0/relationalai/semantics/metamodel/helpers.py +361 -0
- v0/relationalai/semantics/metamodel/rewrite/discharge_constraints.py +39 -0
- v0/relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +210 -0
- v0/relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py +78 -0
- v0/relationalai/semantics/metamodel/rewrite/flatten.py +549 -0
- v0/relationalai/semantics/metamodel/rewrite/format_outputs.py +165 -0
- v0/relationalai/semantics/metamodel/typer/checker.py +353 -0
- v0/relationalai/semantics/metamodel/typer/typer.py +1395 -0
- v0/relationalai/semantics/metamodel/util.py +505 -0
- v0/relationalai/semantics/reasoners/__init__.py +10 -0
- v0/relationalai/semantics/reasoners/graph/__init__.py +37 -0
- v0/relationalai/semantics/reasoners/graph/core.py +9020 -0
- v0/relationalai/semantics/reasoners/optimization/__init__.py +68 -0
- v0/relationalai/semantics/reasoners/optimization/common.py +88 -0
- v0/relationalai/semantics/reasoners/optimization/solvers_dev.py +568 -0
- v0/relationalai/semantics/reasoners/optimization/solvers_pb.py +1163 -0
- v0/relationalai/semantics/rel/builtins.py +40 -0
- v0/relationalai/semantics/rel/compiler.py +989 -0
- v0/relationalai/semantics/rel/executor.py +359 -0
- v0/relationalai/semantics/rel/rel.py +482 -0
- v0/relationalai/semantics/rel/rel_utils.py +276 -0
- v0/relationalai/semantics/snowflake/__init__.py +3 -0
- v0/relationalai/semantics/sql/compiler.py +2503 -0
- v0/relationalai/semantics/sql/executor/duck_db.py +52 -0
- v0/relationalai/semantics/sql/executor/result_helpers.py +64 -0
- v0/relationalai/semantics/sql/executor/snowflake.py +145 -0
- v0/relationalai/semantics/sql/rewrite/denormalize.py +222 -0
- v0/relationalai/semantics/sql/rewrite/double_negation.py +49 -0
- v0/relationalai/semantics/sql/rewrite/recursive_union.py +127 -0
- v0/relationalai/semantics/sql/rewrite/sort_output_query.py +246 -0
- v0/relationalai/semantics/sql/sql.py +504 -0
- v0/relationalai/semantics/std/__init__.py +54 -0
- v0/relationalai/semantics/std/constraints.py +43 -0
- v0/relationalai/semantics/std/datetime.py +363 -0
- v0/relationalai/semantics/std/decimals.py +62 -0
- v0/relationalai/semantics/std/floats.py +7 -0
- v0/relationalai/semantics/std/integers.py +22 -0
- v0/relationalai/semantics/std/math.py +141 -0
- v0/relationalai/semantics/std/pragmas.py +11 -0
- v0/relationalai/semantics/std/re.py +83 -0
- v0/relationalai/semantics/std/std.py +14 -0
- v0/relationalai/semantics/std/strings.py +63 -0
- v0/relationalai/semantics/tests/__init__.py +0 -0
- v0/relationalai/semantics/tests/test_snapshot_abstract.py +143 -0
- v0/relationalai/semantics/tests/test_snapshot_base.py +9 -0
- v0/relationalai/semantics/tests/utils.py +46 -0
- v0/relationalai/std/__init__.py +70 -0
- v0/relationalai/tools/__init__.py +0 -0
- v0/relationalai/tools/cli.py +1940 -0
- v0/relationalai/tools/cli_controls.py +1826 -0
- v0/relationalai/tools/cli_helpers.py +390 -0
- v0/relationalai/tools/debugger.py +183 -0
- v0/relationalai/tools/debugger_client.py +109 -0
- v0/relationalai/tools/debugger_server.py +302 -0
- v0/relationalai/tools/dev.py +685 -0
- v0/relationalai/tools/qb_debugger.py +425 -0
- v0/relationalai/util/clean_up_databases.py +95 -0
- v0/relationalai/util/format.py +123 -0
- v0/relationalai/util/list_databases.py +9 -0
- v0/relationalai/util/otel_configuration.py +25 -0
- v0/relationalai/util/otel_handler.py +484 -0
- v0/relationalai/util/snowflake_handler.py +88 -0
- v0/relationalai/util/span_format_test.py +43 -0
- v0/relationalai/util/span_tracker.py +207 -0
- v0/relationalai/util/spans_file_handler.py +72 -0
- v0/relationalai/util/tracing_handler.py +34 -0
- frontend/debugger/dist/.gitignore +0 -2
- frontend/debugger/dist/assets/favicon-Dy0ZgA6N.png +0 -0
- frontend/debugger/dist/assets/index-Cssla-O7.js +0 -208
- frontend/debugger/dist/assets/index-DlHsYx1V.css +0 -9
- frontend/debugger/dist/index.html +0 -17
- relationalai/clients/__init__.py +0 -18
- relationalai/clients/client.py +0 -912
- relationalai/clients/config.py +0 -673
- relationalai/clients/direct_access_client.py +0 -118
- relationalai/clients/hash_util.py +0 -31
- relationalai/clients/local.py +0 -571
- relationalai/clients/profile_polling.py +0 -73
- relationalai/clients/resources/__init__.py +0 -8
- relationalai/clients/resources/azure/azure.py +0 -477
- relationalai/clients/resources/snowflake/__init__.py +0 -20
- relationalai/clients/resources/snowflake/cli_resources.py +0 -87
- relationalai/clients/resources/snowflake/direct_access_resources.py +0 -711
- relationalai/clients/resources/snowflake/engine_state_handlers.py +0 -309
- relationalai/clients/resources/snowflake/error_handlers.py +0 -199
- relationalai/clients/resources/snowflake/export_procedure.py.jinja +0 -249
- relationalai/clients/resources/snowflake/resources_factory.py +0 -99
- relationalai/clients/resources/snowflake/snowflake.py +0 -3083
- relationalai/clients/resources/snowflake/use_index_poller.py +0 -1011
- relationalai/clients/resources/snowflake/use_index_resources.py +0 -188
- relationalai/clients/resources/snowflake/util.py +0 -387
- relationalai/clients/result_helpers.py +0 -420
- relationalai/clients/types.py +0 -113
- relationalai/clients/util.py +0 -356
- relationalai/debugging.py +0 -389
- relationalai/dsl.py +0 -1749
- relationalai/early_access/builder/__init__.py +0 -30
- relationalai/early_access/builder/builder/__init__.py +0 -35
- relationalai/early_access/builder/snowflake/__init__.py +0 -12
- relationalai/early_access/builder/std/__init__.py +0 -25
- relationalai/early_access/builder/std/decimals/__init__.py +0 -12
- relationalai/early_access/builder/std/integers/__init__.py +0 -12
- relationalai/early_access/builder/std/math/__init__.py +0 -12
- relationalai/early_access/builder/std/strings/__init__.py +0 -14
- relationalai/early_access/devtools/__init__.py +0 -12
- relationalai/early_access/devtools/benchmark_lqp/__init__.py +0 -12
- relationalai/early_access/devtools/extract_lqp/__init__.py +0 -12
- relationalai/early_access/dsl/adapters/orm/adapter_qb.py +0 -427
- relationalai/early_access/dsl/adapters/orm/parser.py +0 -636
- relationalai/early_access/dsl/adapters/owl/adapter.py +0 -176
- relationalai/early_access/dsl/adapters/owl/parser.py +0 -160
- relationalai/early_access/dsl/bindings/common.py +0 -402
- relationalai/early_access/dsl/bindings/csv.py +0 -170
- relationalai/early_access/dsl/bindings/legacy/binding_models.py +0 -143
- relationalai/early_access/dsl/bindings/snowflake.py +0 -64
- relationalai/early_access/dsl/codegen/binder.py +0 -411
- relationalai/early_access/dsl/codegen/common.py +0 -79
- relationalai/early_access/dsl/codegen/helpers.py +0 -23
- relationalai/early_access/dsl/codegen/relations.py +0 -700
- relationalai/early_access/dsl/codegen/weaver.py +0 -417
- relationalai/early_access/dsl/core/builders/__init__.py +0 -47
- relationalai/early_access/dsl/core/builders/logic.py +0 -19
- relationalai/early_access/dsl/core/builders/scalar_constraint.py +0 -11
- relationalai/early_access/dsl/core/constraints/predicate/atomic.py +0 -455
- relationalai/early_access/dsl/core/constraints/predicate/universal.py +0 -73
- relationalai/early_access/dsl/core/constraints/scalar.py +0 -310
- relationalai/early_access/dsl/core/context.py +0 -13
- relationalai/early_access/dsl/core/cset.py +0 -132
- relationalai/early_access/dsl/core/exprs/__init__.py +0 -116
- relationalai/early_access/dsl/core/exprs/relational.py +0 -18
- relationalai/early_access/dsl/core/exprs/scalar.py +0 -412
- relationalai/early_access/dsl/core/instances.py +0 -44
- relationalai/early_access/dsl/core/logic/__init__.py +0 -193
- relationalai/early_access/dsl/core/logic/aggregation.py +0 -98
- relationalai/early_access/dsl/core/logic/exists.py +0 -223
- relationalai/early_access/dsl/core/logic/helper.py +0 -163
- relationalai/early_access/dsl/core/namespaces.py +0 -32
- relationalai/early_access/dsl/core/relations.py +0 -276
- relationalai/early_access/dsl/core/rules.py +0 -112
- relationalai/early_access/dsl/core/std/__init__.py +0 -45
- relationalai/early_access/dsl/core/temporal/recall.py +0 -6
- relationalai/early_access/dsl/core/types/__init__.py +0 -270
- relationalai/early_access/dsl/core/types/concepts.py +0 -128
- relationalai/early_access/dsl/core/types/constrained/__init__.py +0 -267
- relationalai/early_access/dsl/core/types/constrained/nominal.py +0 -143
- relationalai/early_access/dsl/core/types/constrained/subtype.py +0 -124
- relationalai/early_access/dsl/core/types/standard.py +0 -92
- relationalai/early_access/dsl/core/types/unconstrained.py +0 -50
- relationalai/early_access/dsl/core/types/variables.py +0 -203
- relationalai/early_access/dsl/ir/compiler.py +0 -318
- relationalai/early_access/dsl/ir/executor.py +0 -260
- relationalai/early_access/dsl/ontologies/constraints.py +0 -88
- relationalai/early_access/dsl/ontologies/export.py +0 -30
- relationalai/early_access/dsl/ontologies/models.py +0 -453
- relationalai/early_access/dsl/ontologies/python_printer.py +0 -303
- relationalai/early_access/dsl/ontologies/readings.py +0 -60
- relationalai/early_access/dsl/ontologies/relationships.py +0 -322
- relationalai/early_access/dsl/ontologies/roles.py +0 -87
- relationalai/early_access/dsl/ontologies/subtyping.py +0 -55
- relationalai/early_access/dsl/orm/constraints.py +0 -438
- relationalai/early_access/dsl/orm/measures/dimensions.py +0 -200
- relationalai/early_access/dsl/orm/measures/initializer.py +0 -16
- relationalai/early_access/dsl/orm/measures/measure_rules.py +0 -275
- relationalai/early_access/dsl/orm/measures/measures.py +0 -299
- relationalai/early_access/dsl/orm/measures/role_exprs.py +0 -268
- relationalai/early_access/dsl/orm/models.py +0 -256
- relationalai/early_access/dsl/orm/object_oriented_printer.py +0 -344
- relationalai/early_access/dsl/orm/printer.py +0 -469
- relationalai/early_access/dsl/orm/reasoners.py +0 -480
- relationalai/early_access/dsl/orm/relations.py +0 -19
- relationalai/early_access/dsl/orm/relationships.py +0 -251
- relationalai/early_access/dsl/orm/types.py +0 -42
- relationalai/early_access/dsl/orm/utils.py +0 -79
- relationalai/early_access/dsl/orm/verb.py +0 -204
- relationalai/early_access/dsl/physical_metadata/tables.py +0 -133
- relationalai/early_access/dsl/relations.py +0 -170
- relationalai/early_access/dsl/rulesets.py +0 -69
- relationalai/early_access/dsl/schemas/__init__.py +0 -450
- relationalai/early_access/dsl/schemas/builder.py +0 -48
- relationalai/early_access/dsl/schemas/comp_names.py +0 -51
- relationalai/early_access/dsl/schemas/components.py +0 -203
- relationalai/early_access/dsl/schemas/contexts.py +0 -156
- relationalai/early_access/dsl/schemas/exprs.py +0 -89
- relationalai/early_access/dsl/schemas/fragments.py +0 -464
- relationalai/early_access/dsl/serialization.py +0 -79
- relationalai/early_access/dsl/serialize/exporter.py +0 -163
- relationalai/early_access/dsl/snow/api.py +0 -105
- relationalai/early_access/dsl/snow/common.py +0 -76
- relationalai/early_access/dsl/state_mgmt/__init__.py +0 -129
- relationalai/early_access/dsl/state_mgmt/state_charts.py +0 -125
- relationalai/early_access/dsl/state_mgmt/transitions.py +0 -130
- relationalai/early_access/dsl/types/__init__.py +0 -40
- relationalai/early_access/dsl/types/concepts.py +0 -12
- relationalai/early_access/dsl/types/entities.py +0 -135
- relationalai/early_access/dsl/types/values.py +0 -17
- relationalai/early_access/dsl/utils.py +0 -102
- relationalai/early_access/graphs/__init__.py +0 -13
- relationalai/early_access/lqp/__init__.py +0 -12
- relationalai/early_access/lqp/compiler/__init__.py +0 -12
- relationalai/early_access/lqp/constructors/__init__.py +0 -18
- relationalai/early_access/lqp/executor/__init__.py +0 -12
- relationalai/early_access/lqp/ir/__init__.py +0 -12
- relationalai/early_access/lqp/passes/__init__.py +0 -12
- relationalai/early_access/lqp/pragmas/__init__.py +0 -12
- relationalai/early_access/lqp/primitives/__init__.py +0 -12
- relationalai/early_access/lqp/types/__init__.py +0 -12
- relationalai/early_access/lqp/utils/__init__.py +0 -12
- relationalai/early_access/lqp/validators/__init__.py +0 -12
- relationalai/early_access/metamodel/__init__.py +0 -58
- relationalai/early_access/metamodel/builtins/__init__.py +0 -12
- relationalai/early_access/metamodel/compiler/__init__.py +0 -12
- relationalai/early_access/metamodel/dependency/__init__.py +0 -12
- relationalai/early_access/metamodel/factory/__init__.py +0 -17
- relationalai/early_access/metamodel/helpers/__init__.py +0 -12
- relationalai/early_access/metamodel/ir/__init__.py +0 -14
- relationalai/early_access/metamodel/rewrite/__init__.py +0 -7
- relationalai/early_access/metamodel/typer/__init__.py +0 -3
- relationalai/early_access/metamodel/typer/typer/__init__.py +0 -12
- relationalai/early_access/metamodel/types/__init__.py +0 -15
- relationalai/early_access/metamodel/util/__init__.py +0 -15
- relationalai/early_access/metamodel/visitor/__init__.py +0 -12
- relationalai/early_access/rel/__init__.py +0 -12
- relationalai/early_access/rel/executor/__init__.py +0 -12
- relationalai/early_access/rel/rel_utils/__init__.py +0 -12
- relationalai/early_access/rel/rewrite/__init__.py +0 -7
- relationalai/early_access/solvers/__init__.py +0 -19
- relationalai/early_access/sql/__init__.py +0 -11
- relationalai/early_access/sql/executor/__init__.py +0 -3
- relationalai/early_access/sql/rewrite/__init__.py +0 -3
- relationalai/early_access/tests/logging/__init__.py +0 -12
- relationalai/early_access/tests/test_snapshot_base/__init__.py +0 -12
- relationalai/early_access/tests/utils/__init__.py +0 -12
- relationalai/environments/__init__.py +0 -35
- relationalai/environments/base.py +0 -381
- relationalai/environments/colab.py +0 -14
- relationalai/environments/generic.py +0 -71
- relationalai/environments/ipython.py +0 -68
- relationalai/environments/jupyter.py +0 -9
- relationalai/environments/snowbook.py +0 -169
- relationalai/errors.py +0 -2478
- relationalai/experimental/SF.py +0 -38
- relationalai/experimental/inspect.py +0 -47
- relationalai/experimental/pathfinder/__init__.py +0 -158
- relationalai/experimental/pathfinder/api.py +0 -160
- relationalai/experimental/pathfinder/automaton.py +0 -584
- relationalai/experimental/pathfinder/bridge.py +0 -226
- relationalai/experimental/pathfinder/compiler.py +0 -416
- relationalai/experimental/pathfinder/datalog.py +0 -214
- relationalai/experimental/pathfinder/diagnostics.py +0 -56
- relationalai/experimental/pathfinder/filter.py +0 -236
- relationalai/experimental/pathfinder/glushkov.py +0 -439
- relationalai/experimental/pathfinder/options.py +0 -265
- relationalai/experimental/pathfinder/pathfinder-v0.7.0.rel +0 -1951
- relationalai/experimental/pathfinder/rpq.py +0 -344
- relationalai/experimental/pathfinder/transition.py +0 -200
- relationalai/experimental/pathfinder/utils.py +0 -26
- relationalai/experimental/paths/README.md +0 -107
- relationalai/experimental/paths/api.py +0 -143
- relationalai/experimental/paths/benchmarks/grid_graph.py +0 -37
- relationalai/experimental/paths/code_organization.md +0 -2
- relationalai/experimental/paths/examples/Movies.ipynb +0 -16328
- relationalai/experimental/paths/examples/basic_example.py +0 -40
- relationalai/experimental/paths/examples/minimal_engine_warmup.py +0 -3
- relationalai/experimental/paths/examples/movie_example.py +0 -77
- relationalai/experimental/paths/examples/movies_data/actedin.csv +0 -193
- relationalai/experimental/paths/examples/movies_data/directed.csv +0 -45
- relationalai/experimental/paths/examples/movies_data/follows.csv +0 -7
- relationalai/experimental/paths/examples/movies_data/movies.csv +0 -39
- relationalai/experimental/paths/examples/movies_data/person.csv +0 -134
- relationalai/experimental/paths/examples/movies_data/produced.csv +0 -16
- relationalai/experimental/paths/examples/movies_data/ratings.csv +0 -10
- relationalai/experimental/paths/examples/movies_data/wrote.csv +0 -11
- relationalai/experimental/paths/examples/paths_benchmark.py +0 -115
- relationalai/experimental/paths/examples/paths_example.py +0 -116
- relationalai/experimental/paths/examples/pattern_to_automaton.py +0 -28
- relationalai/experimental/paths/find_paths_via_automaton.py +0 -85
- relationalai/experimental/paths/graph.py +0 -185
- relationalai/experimental/paths/path_algorithms/find_paths.py +0 -280
- relationalai/experimental/paths/path_algorithms/one_sided_ball_repetition.py +0 -26
- relationalai/experimental/paths/path_algorithms/one_sided_ball_upto.py +0 -111
- relationalai/experimental/paths/path_algorithms/single.py +0 -59
- relationalai/experimental/paths/path_algorithms/two_sided_balls_repetition.py +0 -39
- relationalai/experimental/paths/path_algorithms/two_sided_balls_upto.py +0 -103
- relationalai/experimental/paths/path_algorithms/usp-old.py +0 -130
- relationalai/experimental/paths/path_algorithms/usp-tuple.py +0 -183
- relationalai/experimental/paths/path_algorithms/usp.py +0 -150
- relationalai/experimental/paths/product_graph.py +0 -93
- relationalai/experimental/paths/rpq/automaton.py +0 -584
- relationalai/experimental/paths/rpq/diagnostics.py +0 -56
- relationalai/experimental/paths/rpq/rpq.py +0 -378
- relationalai/experimental/paths/tests/tests_limit_sp_max_length.py +0 -90
- relationalai/experimental/paths/tests/tests_limit_sp_multiple.py +0 -119
- relationalai/experimental/paths/tests/tests_limit_sp_single.py +0 -104
- relationalai/experimental/paths/tests/tests_limit_walks_multiple.py +0 -113
- relationalai/experimental/paths/tests/tests_limit_walks_single.py +0 -149
- relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_multiple.py +0 -70
- relationalai/experimental/paths/tests/tests_one_sided_ball_repetition_single.py +0 -64
- relationalai/experimental/paths/tests/tests_one_sided_ball_upto_multiple.py +0 -115
- relationalai/experimental/paths/tests/tests_one_sided_ball_upto_single.py +0 -75
- relationalai/experimental/paths/tests/tests_single_paths.py +0 -152
- relationalai/experimental/paths/tests/tests_single_walks.py +0 -208
- relationalai/experimental/paths/tests/tests_single_walks_undirected.py +0 -297
- relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_multiple.py +0 -107
- relationalai/experimental/paths/tests/tests_two_sided_balls_repetition_single.py +0 -76
- relationalai/experimental/paths/tests/tests_two_sided_balls_upto_multiple.py +0 -76
- relationalai/experimental/paths/tests/tests_two_sided_balls_upto_single.py +0 -110
- relationalai/experimental/paths/tests/tests_usp_nsp_multiple.py +0 -229
- relationalai/experimental/paths/tests/tests_usp_nsp_single.py +0 -108
- relationalai/experimental/paths/tree_agg.py +0 -168
- relationalai/experimental/paths/utilities/iterators.py +0 -27
- relationalai/experimental/paths/utilities/prefix_sum.py +0 -91
- relationalai/experimental/solvers.py +0 -1087
- relationalai/loaders/csv.py +0 -195
- relationalai/loaders/loader.py +0 -177
- relationalai/loaders/types.py +0 -23
- relationalai/rel_emitter.py +0 -373
- relationalai/rel_utils.py +0 -185
- relationalai/semantics/designs/query_builder/identify_by.md +0 -106
- relationalai/semantics/devtools/benchmark_lqp.py +0 -535
- relationalai/semantics/devtools/compilation_manager.py +0 -294
- relationalai/semantics/devtools/extract_lqp.py +0 -110
- relationalai/semantics/internal/internal.py +0 -3785
- relationalai/semantics/internal/snowflake.py +0 -325
- relationalai/semantics/lqp/README.md +0 -34
- relationalai/semantics/lqp/builtins.py +0 -16
- relationalai/semantics/lqp/compiler.py +0 -22
- relationalai/semantics/lqp/constructors.py +0 -68
- relationalai/semantics/lqp/executor.py +0 -469
- relationalai/semantics/lqp/intrinsics.py +0 -24
- relationalai/semantics/lqp/model2lqp.py +0 -877
- relationalai/semantics/lqp/passes.py +0 -680
- relationalai/semantics/lqp/primitives.py +0 -252
- relationalai/semantics/lqp/result_helpers.py +0 -202
- relationalai/semantics/lqp/rewrite/annotate_constraints.py +0 -57
- relationalai/semantics/lqp/rewrite/cdc.py +0 -216
- relationalai/semantics/lqp/rewrite/extract_common.py +0 -338
- relationalai/semantics/lqp/rewrite/extract_keys.py +0 -506
- relationalai/semantics/lqp/rewrite/function_annotations.py +0 -114
- relationalai/semantics/lqp/rewrite/functional_dependencies.py +0 -314
- relationalai/semantics/lqp/rewrite/quantify_vars.py +0 -296
- relationalai/semantics/lqp/rewrite/splinter.py +0 -76
- relationalai/semantics/lqp/types.py +0 -101
- relationalai/semantics/lqp/utils.py +0 -160
- relationalai/semantics/lqp/validators.py +0 -57
- relationalai/semantics/metamodel/compiler.py +0 -133
- relationalai/semantics/metamodel/dependency.py +0 -862
- relationalai/semantics/metamodel/executor.py +0 -61
- relationalai/semantics/metamodel/factory.py +0 -287
- relationalai/semantics/metamodel/helpers.py +0 -361
- relationalai/semantics/metamodel/rewrite/discharge_constraints.py +0 -39
- relationalai/semantics/metamodel/rewrite/dnf_union_splitter.py +0 -210
- relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py +0 -78
- relationalai/semantics/metamodel/rewrite/flatten.py +0 -554
- relationalai/semantics/metamodel/rewrite/format_outputs.py +0 -165
- relationalai/semantics/metamodel/typer/checker.py +0 -353
- relationalai/semantics/metamodel/typer/typer.py +0 -1395
- relationalai/semantics/metamodel/util.py +0 -506
- relationalai/semantics/reasoners/__init__.py +0 -10
- relationalai/semantics/reasoners/graph/README.md +0 -620
- relationalai/semantics/reasoners/graph/__init__.py +0 -37
- relationalai/semantics/reasoners/graph/core.py +0 -9019
- relationalai/semantics/reasoners/graph/design/beyond_demand_transform.md +0 -797
- relationalai/semantics/reasoners/graph/tests/README.md +0 -21
- relationalai/semantics/reasoners/optimization/__init__.py +0 -68
- relationalai/semantics/reasoners/optimization/common.py +0 -88
- relationalai/semantics/reasoners/optimization/solvers_dev.py +0 -568
- relationalai/semantics/reasoners/optimization/solvers_pb.py +0 -1163
- relationalai/semantics/rel/builtins.py +0 -40
- relationalai/semantics/rel/compiler.py +0 -989
- relationalai/semantics/rel/executor.py +0 -362
- relationalai/semantics/rel/rel.py +0 -482
- relationalai/semantics/rel/rel_utils.py +0 -276
- relationalai/semantics/snowflake/__init__.py +0 -3
- relationalai/semantics/sql/compiler.py +0 -2503
- relationalai/semantics/sql/executor/duck_db.py +0 -52
- relationalai/semantics/sql/executor/result_helpers.py +0 -64
- relationalai/semantics/sql/executor/snowflake.py +0 -149
- relationalai/semantics/sql/rewrite/denormalize.py +0 -222
- relationalai/semantics/sql/rewrite/double_negation.py +0 -49
- relationalai/semantics/sql/rewrite/recursive_union.py +0 -127
- relationalai/semantics/sql/rewrite/sort_output_query.py +0 -246
- relationalai/semantics/sql/sql.py +0 -504
- relationalai/semantics/std/pragmas.py +0 -11
- relationalai/semantics/std/std.py +0 -14
- relationalai/semantics/tests/test_snapshot_abstract.py +0 -143
- relationalai/semantics/tests/test_snapshot_base.py +0 -9
- relationalai/semantics/tests/utils.py +0 -46
- relationalai/std/__init__.py +0 -70
- relationalai/tools/cli.py +0 -1936
- relationalai/tools/cli_controls.py +0 -1826
- relationalai/tools/cli_helpers.py +0 -398
- relationalai/tools/debugger_client.py +0 -109
- relationalai/tools/debugger_server.py +0 -302
- relationalai/tools/dev.py +0 -685
- relationalai/tools/notes +0 -7
- relationalai/tools/qb_debugger.py +0 -425
- relationalai/util/clean_up_databases.py +0 -95
- relationalai/util/list_databases.py +0 -9
- relationalai/util/otel_configuration.py +0 -26
- relationalai/util/otel_handler.py +0 -484
- relationalai/util/snowflake_handler.py +0 -88
- relationalai/util/span_format_test.py +0 -43
- relationalai/util/span_tracker.py +0 -207
- relationalai/util/spans_file_handler.py +0 -72
- relationalai/util/tracing_handler.py +0 -34
- relationalai-0.13.0.dist-info/METADATA +0 -74
- relationalai-0.13.0.dist-info/RECORD +0 -458
- relationalai-0.13.0.dist-info/WHEEL +0 -4
- relationalai-0.13.0.dist-info/entry_points.txt +0 -3
- relationalai-0.13.0.dist-info/licenses/LICENSE +0 -202
- relationalai_test_util/__init__.py +0 -4
- relationalai_test_util/fixtures.py +0 -229
- relationalai_test_util/snapshot.py +0 -252
- relationalai_test_util/traceback.py +0 -118
- /relationalai/{analysis → semantics/frontend}/__init__.py +0 -0
- /relationalai/{auth/__init__.py → semantics/metamodel/metamodel_compiler.py} +0 -0
- /relationalai/{early_access → shims}/__init__.py +0 -0
- {relationalai/early_access/dsl/adapters → v0/relationalai/analysis}/__init__.py +0 -0
- {relationalai → v0/relationalai}/analysis/mechanistic.py +0 -0
- {relationalai → v0/relationalai}/analysis/whynot.py +0 -0
- {relationalai/early_access/dsl/adapters/orm → v0/relationalai/auth}/__init__.py +0 -0
- {relationalai → v0/relationalai}/auth/jwt_generator.py +0 -0
- {relationalai → v0/relationalai}/auth/oauth_callback_server.py +0 -0
- {relationalai → v0/relationalai}/auth/token_handler.py +0 -0
- {relationalai → v0/relationalai}/auth/util.py +0 -0
- {relationalai/clients/resources/snowflake → v0/relationalai/clients}/cache_store.py +0 -0
- {relationalai → v0/relationalai}/compiler.py +0 -0
- {relationalai → v0/relationalai}/dependencies.py +0 -0
- {relationalai → v0/relationalai}/docutils.py +0 -0
- {relationalai/early_access/dsl/adapters/owl → v0/relationalai/early_access}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/__init__.py +0 -0
- {relationalai/early_access/dsl/bindings → v0/relationalai/early_access/dsl/adapters}/__init__.py +0 -0
- {relationalai/early_access/dsl/bindings/legacy → v0/relationalai/early_access/dsl/adapters/orm}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/adapters/orm/model.py +0 -0
- {relationalai/early_access/dsl/codegen → v0/relationalai/early_access/dsl/adapters/owl}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/adapters/owl/model.py +0 -0
- {relationalai/early_access/dsl/core/temporal → v0/relationalai/early_access/dsl/bindings}/__init__.py +0 -0
- {relationalai/early_access/dsl/ir → v0/relationalai/early_access/dsl/bindings/legacy}/__init__.py +0 -0
- {relationalai/early_access/dsl/ontologies → v0/relationalai/early_access/dsl/codegen}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/constants.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/core/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/core/constraints/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/core/constraints/predicate/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/core/stack.py +0 -0
- {relationalai/early_access/dsl/orm → v0/relationalai/early_access/dsl/core/temporal}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/core/utils.py +0 -0
- {relationalai/early_access/dsl/orm/measures → v0/relationalai/early_access/dsl/ir}/__init__.py +0 -0
- {relationalai/early_access/dsl/physical_metadata → v0/relationalai/early_access/dsl/ontologies}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/ontologies/raw_source.py +0 -0
- {relationalai/early_access/dsl/serialize → v0/relationalai/early_access/dsl/orm}/__init__.py +0 -0
- {relationalai/early_access/dsl/snow → v0/relationalai/early_access/dsl/orm/measures}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/orm/reasoner_errors.py +0 -0
- {relationalai/loaders → v0/relationalai/early_access/dsl/physical_metadata}/__init__.py +0 -0
- {relationalai/semantics/tests → v0/relationalai/early_access/dsl/serialize}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/serialize/binding_model.py +0 -0
- {relationalai → v0/relationalai}/early_access/dsl/serialize/model.py +0 -0
- {relationalai/tools → v0/relationalai/early_access/dsl/snow}/__init__.py +0 -0
- {relationalai → v0/relationalai}/early_access/tests/__init__.py +0 -0
- {relationalai → v0/relationalai}/environments/ci.py +0 -0
- {relationalai → v0/relationalai}/environments/hex.py +0 -0
- {relationalai → v0/relationalai}/environments/terminal.py +0 -0
- {relationalai → v0/relationalai}/experimental/__init__.py +0 -0
- {relationalai → v0/relationalai}/experimental/graphs.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/__init__.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/benchmarks/__init__.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/path_algorithms/__init__.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/rpq/__init__.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/rpq/filter.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/rpq/glushkov.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/rpq/transition.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/utilities/__init__.py +0 -0
- {relationalai → v0/relationalai}/experimental/paths/utilities/utilities.py +0 -0
- {relationalai → v0/relationalai}/metagen.py +0 -0
- {relationalai → v0/relationalai}/metamodel.py +0 -0
- {relationalai → v0/relationalai}/rel.py +0 -0
- {relationalai → v0/relationalai}/semantics/devtools/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/internal/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/internal/annotations.py +0 -0
- {relationalai → v0/relationalai}/semantics/lqp/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/lqp/ir.py +0 -0
- {relationalai → v0/relationalai}/semantics/lqp/pragmas.py +0 -0
- {relationalai → v0/relationalai}/semantics/lqp/rewrite/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/metamodel/dataflow.py +0 -0
- {relationalai → v0/relationalai}/semantics/metamodel/ir.py +0 -0
- {relationalai → v0/relationalai}/semantics/metamodel/rewrite/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/metamodel/typer/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/metamodel/types.py +0 -0
- {relationalai → v0/relationalai}/semantics/metamodel/visitor.py +0 -0
- {relationalai → v0/relationalai}/semantics/reasoners/experimental/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/rel/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/sql/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/sql/executor/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/sql/rewrite/__init__.py +0 -0
- {relationalai → v0/relationalai}/semantics/tests/logging.py +0 -0
- {relationalai → v0/relationalai}/std/aggregates.py +0 -0
- {relationalai → v0/relationalai}/std/dates.py +0 -0
- {relationalai → v0/relationalai}/std/graphs.py +0 -0
- {relationalai → v0/relationalai}/std/inspect.py +0 -0
- {relationalai → v0/relationalai}/std/math.py +0 -0
- {relationalai → v0/relationalai}/std/re.py +0 -0
- {relationalai → v0/relationalai}/std/strings.py +0 -0
- {relationalai → v0/relationalai}/tools/cleanup_snapshots.py +0 -0
- {relationalai → v0/relationalai}/tools/constants.py +0 -0
- {relationalai → v0/relationalai}/tools/query_utils.py +0 -0
- {relationalai → v0/relationalai}/tools/snapshot_viewer.py +0 -0
- {relationalai → v0/relationalai}/util/__init__.py +0 -0
- {relationalai → v0/relationalai}/util/constants.py +0 -0
- {relationalai → v0/relationalai}/util/graph.py +0 -0
- {relationalai → v0/relationalai}/util/timeout.py +0 -0
|
@@ -0,0 +1,2503 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
import logging
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from functools import partial
|
|
8
|
+
from itertools import chain
|
|
9
|
+
from typing import Tuple, cast, Optional, Union
|
|
10
|
+
from decimal import Decimal as PyDecimal
|
|
11
|
+
|
|
12
|
+
import math
|
|
13
|
+
|
|
14
|
+
from v0.relationalai.semantics.metamodel.rewrite import (Flatten, ExtractNestedLogicals, DNFUnionSplitter,
|
|
15
|
+
DischargeConstraints)
|
|
16
|
+
from v0.relationalai.semantics.metamodel.visitor import ReadWriteVisitor
|
|
17
|
+
from v0.relationalai.util.graph import topological_sort
|
|
18
|
+
from v0.relationalai.semantics.metamodel import ir, compiler as c, visitor as v, builtins, types, helpers
|
|
19
|
+
from v0.relationalai.semantics.metamodel.typer import Checker, InferTypes, typer
|
|
20
|
+
from v0.relationalai.semantics.metamodel.builtins import from_cdc_annotation, concept_relation_annotation
|
|
21
|
+
from v0.relationalai.semantics.metamodel.types import (Hash, String, Number, Int64, Int128, Bool, Date, DateTime, Float,
|
|
22
|
+
RowId, UInt128)
|
|
23
|
+
from v0.relationalai.semantics.metamodel.util import (FrozenOrderedSet, OrderedSet, frozen, ordered_set, filter_by_type,
|
|
24
|
+
NameCache)
|
|
25
|
+
from v0.relationalai.semantics.sql import sql, rewrite
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Compiler(c.Compiler):
|
|
29
|
+
def __init__(self, skip_denormalization:bool=False):
|
|
30
|
+
rewrites = [
|
|
31
|
+
DischargeConstraints(),
|
|
32
|
+
Checker(),
|
|
33
|
+
ExtractNestedLogicals(), # before InferTypes to avoid extracting casts
|
|
34
|
+
InferTypes(),
|
|
35
|
+
DNFUnionSplitter(),
|
|
36
|
+
Flatten(handle_outputs=False),
|
|
37
|
+
rewrite.RecursiveUnion(),
|
|
38
|
+
rewrite.DoubleNegation(),
|
|
39
|
+
rewrite.SortOutputQuery()
|
|
40
|
+
]
|
|
41
|
+
if not skip_denormalization:
|
|
42
|
+
# group updates, compute SCCs, use Sequence to denote their order
|
|
43
|
+
rewrites.append(rewrite.Denormalize())
|
|
44
|
+
super().__init__(rewrites)
|
|
45
|
+
self.model_to_sql = ModelToSQL()
|
|
46
|
+
|
|
47
|
+
def do_compile(self, model: ir.Model, options:dict={}) -> tuple[str, ir.Model]:
|
|
48
|
+
"""
|
|
49
|
+
Compile the rewritten model into a SQL string. Returns the SQL string together
|
|
50
|
+
with the rewritten model (after the rewrite passes, before translating to sql).
|
|
51
|
+
"""
|
|
52
|
+
return str(self.model_to_sql.to_sql(model, options)), model
|
|
53
|
+
|
|
54
|
+
@dataclass(frozen=True)
|
|
55
|
+
class OutputVar:
|
|
56
|
+
value: ir.Value
|
|
57
|
+
alias: Optional[str] = None
|
|
58
|
+
value_type: Optional[str] = None
|
|
59
|
+
task: Optional[ir.Task] = None
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class RelationInfo:
|
|
63
|
+
used: bool = False
|
|
64
|
+
view_selects: list[sql.Select] = field(default_factory=list)
|
|
65
|
+
table_selects: list[sql.Select] = field(default_factory=list)
|
|
66
|
+
dynamic_table_selects: list[sql.Select] = field(default_factory=list)
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class ImportSpec:
|
|
70
|
+
value: str
|
|
71
|
+
module: Optional[str] = None # e.g., "scipy.special"
|
|
72
|
+
|
|
73
|
+
def render(self) -> str:
|
|
74
|
+
return f"from {self.module} import {self.value}" if self.module else f"import {self.value}"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class UDFConfig:
|
|
79
|
+
handler: str
|
|
80
|
+
code: str
|
|
81
|
+
imports: list[ImportSpec] = field(default_factory=list)
|
|
82
|
+
packages: list[str] = field(default_factory=list)
|
|
83
|
+
|
|
84
|
+
@dataclass
|
|
85
|
+
class ModelToSQL:
|
|
86
|
+
""" Generates SQL from an IR Model, assuming the compiler rewrites were done. """
|
|
87
|
+
|
|
88
|
+
_is_duck_db: bool = False
|
|
89
|
+
_warehouse: str = 'MAIN_WH'
|
|
90
|
+
_query_compilation: bool = False
|
|
91
|
+
_default_dynamic_table_target_lag: str = '5 minutes'
|
|
92
|
+
relation_name_cache: NameCache = field(default_factory=NameCache)
|
|
93
|
+
relation_arg_name_cache: NameCache = field(default_factory=NameCache)
|
|
94
|
+
relation_infos: dict[ir.Relation, RelationInfo] = field(default_factory=dict)
|
|
95
|
+
_error_relation_names: set[str] = field(default_factory=lambda: {'Error', 'pyrel_error_attrs'})
|
|
96
|
+
|
|
97
|
+
def to_sql(self, model: ir.Model, options:dict) -> sql.Program:
|
|
98
|
+
self.relation_infos.clear()
|
|
99
|
+
self._is_duck_db = options.get("is_duck_db", False)
|
|
100
|
+
self._warehouse = options.get("warehouse") or self._warehouse
|
|
101
|
+
self._query_compilation = options.get("query_compilation", False)
|
|
102
|
+
self._default_dynamic_table_target_lag = (options.get("default_dynamic_table_target_lag") or
|
|
103
|
+
self._default_dynamic_table_target_lag)
|
|
104
|
+
return sql.Program(self._sort_dependencies(self._union_output_selects(self._generate_statements(model))))
|
|
105
|
+
|
|
106
|
+
def _generate_statements(self, model: ir.Model) -> list[sql.Node]:
|
|
107
|
+
table_relations, used_builtins = self._get_relations(model)
|
|
108
|
+
|
|
109
|
+
self._register_relation_args(table_relations)
|
|
110
|
+
self._register_external_relations(model)
|
|
111
|
+
|
|
112
|
+
statements: list[sql.Node] = []
|
|
113
|
+
# 1. Process root logical body
|
|
114
|
+
root = cast(ir.Logical, model.root)
|
|
115
|
+
for child in root.body:
|
|
116
|
+
if isinstance(child, ir.Logical):
|
|
117
|
+
statements.extend(self._create_statement(cast(ir.Logical, child)))
|
|
118
|
+
elif isinstance(child, ir.Union):
|
|
119
|
+
statements.append(self._create_recursive_view(cast(ir.Union, child)))
|
|
120
|
+
|
|
121
|
+
relation_selects = {
|
|
122
|
+
relation: info.dynamic_table_selects + info.view_selects + info.table_selects
|
|
123
|
+
for relation, info in self.relation_infos.items()
|
|
124
|
+
if info.dynamic_table_selects or info.view_selects or info.table_selects
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
# 3. Handle each relation with proper priority
|
|
128
|
+
for relation, selects in relation_selects.items():
|
|
129
|
+
table_name = self._relation_name(relation)
|
|
130
|
+
|
|
131
|
+
info = self._get_relation_info(relation)
|
|
132
|
+
if info.table_selects:
|
|
133
|
+
# Relation is a table → insert into it
|
|
134
|
+
columns = [self._var_name(relation.id, f) for f in relation.fields]
|
|
135
|
+
if len(selects) == 1:
|
|
136
|
+
statements.append(sql.Insert(table_name, columns, [], selects[0]))
|
|
137
|
+
else:
|
|
138
|
+
statements.append(sql.Insert(table_name, columns, [],
|
|
139
|
+
sql.CTE(False, f"{table_name}_cte", columns, selects, True)))
|
|
140
|
+
elif info.view_selects:
|
|
141
|
+
statements.append(sql.CreateView(table_name, selects))
|
|
142
|
+
else:
|
|
143
|
+
# Snowflake currently has issues when using DISTINCT together with UNION in a Dynamic Table.
|
|
144
|
+
# As a workaround, we generate a CTE without DISTINCT, using UNION ALL.
|
|
145
|
+
# Then, we create a dynamic table with `SELECT DISTINCT * FROM CTE` to remove duplicates.
|
|
146
|
+
columns = [self._var_name(relation.id, f) for f in relation.fields]
|
|
147
|
+
statements.append(
|
|
148
|
+
sql.CreateDynamicTable(
|
|
149
|
+
table_name,
|
|
150
|
+
sql.CTE(False, f"{table_name}_cte", columns, selects, True),
|
|
151
|
+
self._default_dynamic_table_target_lag,
|
|
152
|
+
self._warehouse
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# 4. Create physical tables for explicitly declared table relations
|
|
157
|
+
for relation in table_relations:
|
|
158
|
+
info = self.relation_infos.get(relation)
|
|
159
|
+
if info is None or info.table_selects:
|
|
160
|
+
statements.append(self._create_table(relation))
|
|
161
|
+
|
|
162
|
+
#5. Create Snowflake user-defined functions
|
|
163
|
+
if not self._is_duck_db:
|
|
164
|
+
statements.extend(self._create_user_defined_functions(used_builtins))
|
|
165
|
+
|
|
166
|
+
return statements
|
|
167
|
+
|
|
168
|
+
#--------------------------------------------------
|
|
169
|
+
# SQL Generation
|
|
170
|
+
#--------------------------------------------------
|
|
171
|
+
def _create_table(self, r: ir.Relation) -> sql.Node:
|
|
172
|
+
return sql.CreateTable(
|
|
173
|
+
sql.Table(self._relation_name(r),
|
|
174
|
+
list(map(lambda f: sql.Column(self._var_name(r.id, f), self._convert_type(f.type)), r.fields))
|
|
175
|
+
), self._query_compilation)
|
|
176
|
+
|
|
177
|
+
def _create_recursive_view(self, union: ir.Union) -> sql.Node:
|
|
178
|
+
assert len(union.tasks) >= 2, f"Recursive CTE requires at least 2 tasks (anchor + recursive), but got {len(union.tasks)}."
|
|
179
|
+
assert all(isinstance(task, ir.Logical) for task in union.tasks), (
|
|
180
|
+
"All tasks in a recursive CTE must be of type `ir.Logical`. "
|
|
181
|
+
f"Invalid types: {[type(task).__name__ for task in union.tasks if not isinstance(task, ir.Logical)]}"
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
def make_case_select(logical: ir.Logical):
|
|
185
|
+
# TODO - assuming a single update per case
|
|
186
|
+
update = v.collect_by_type(ir.Update, logical).some()
|
|
187
|
+
|
|
188
|
+
# Rewrite relation references for recursive lookups
|
|
189
|
+
old_relation = update.relation
|
|
190
|
+
new_relation = ir.Relation(f"{old_relation.name}_rec", old_relation.fields, frozen(), frozen())
|
|
191
|
+
rlr = RecursiveLookupsRewriter(old_relation, new_relation)
|
|
192
|
+
result = rlr.walk(logical)
|
|
193
|
+
|
|
194
|
+
# TODO - improve the typing info to avoid these casts
|
|
195
|
+
nots = cast(list[ir.Not], filter_by_type(result.body, ir.Not))
|
|
196
|
+
unions = cast(list[ir.Union], filter_by_type(result.body, ir.Union))
|
|
197
|
+
lookups = cast(list[ir.Lookup], filter_by_type(result.body, ir.Lookup))
|
|
198
|
+
constructs = cast(list[ir.Construct], filter_by_type(result.body, ir.Construct))
|
|
199
|
+
|
|
200
|
+
aliases = []
|
|
201
|
+
for i, arg in enumerate(update.args):
|
|
202
|
+
relation_field = old_relation.fields[i]
|
|
203
|
+
field_type = self._convert_type(relation_field.type)
|
|
204
|
+
aliases.append(OutputVar(arg, self._var_name(old_relation.id, relation_field), value_type=field_type))
|
|
205
|
+
|
|
206
|
+
return self._make_select(lookups, aliases, nots, unions, constructs)
|
|
207
|
+
|
|
208
|
+
# get a representative update
|
|
209
|
+
update = v.collect_by_type(ir.Update, union).some()
|
|
210
|
+
|
|
211
|
+
relation = update.relation
|
|
212
|
+
self.mark_used(relation)
|
|
213
|
+
return sql.CreateView(
|
|
214
|
+
self._relation_name(relation),
|
|
215
|
+
sql.CTE(
|
|
216
|
+
True,
|
|
217
|
+
f"{self._relation_name(relation)}_rec",
|
|
218
|
+
[self._var_name(relation.id, field) for field in update.relation.fields],
|
|
219
|
+
[
|
|
220
|
+
make_case_select(cast(ir.Logical, task))
|
|
221
|
+
for task in union.tasks
|
|
222
|
+
]
|
|
223
|
+
)
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
def _create_user_defined_functions(self, relations: list[ir.Relation]) -> list[sql.CreateFunction]:
|
|
227
|
+
# Central UDF metadata configuration
|
|
228
|
+
udf_relations: dict[str, UDFConfig] = {
|
|
229
|
+
builtins.acot.name: UDFConfig(
|
|
230
|
+
handler="compute",
|
|
231
|
+
imports=[ImportSpec("math")],
|
|
232
|
+
code="""def compute(x): return math.atan(1 / x) if x != 0 else math.copysign(math.pi / 2, x)"""
|
|
233
|
+
),
|
|
234
|
+
builtins.erf.name: UDFConfig(
|
|
235
|
+
handler="compute",
|
|
236
|
+
imports=[ImportSpec("math")],
|
|
237
|
+
code="""def compute(x): return math.erf(x)"""
|
|
238
|
+
),
|
|
239
|
+
builtins.erfinv.name: UDFConfig(
|
|
240
|
+
handler="compute",
|
|
241
|
+
imports=[ImportSpec("erfinv", module="scipy.special")],
|
|
242
|
+
packages=["'scipy'"],
|
|
243
|
+
code="""def compute(x): return erfinv(x)"""
|
|
244
|
+
)
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
statements: list[sql.CreateFunction] = []
|
|
248
|
+
|
|
249
|
+
for r in relations:
|
|
250
|
+
meta = udf_relations.get(r.name)
|
|
251
|
+
if not meta:
|
|
252
|
+
continue
|
|
253
|
+
|
|
254
|
+
# Split relation fields into inputs and return type
|
|
255
|
+
# We expect a single return argument per builtin relation
|
|
256
|
+
return_type = None
|
|
257
|
+
input_columns: list[sql.Column] = []
|
|
258
|
+
for f in r.fields:
|
|
259
|
+
if f.input:
|
|
260
|
+
input_columns.append(sql.Column(self._var_name(r.id, f), self._convert_type(f.type)))
|
|
261
|
+
else:
|
|
262
|
+
return_type = self._convert_type(f.type)
|
|
263
|
+
|
|
264
|
+
# Build a full code block (imports + code)
|
|
265
|
+
imports_code = "\n".join(imp.render() for imp in meta.imports)
|
|
266
|
+
python_block = "\n".join(part for part in (imports_code, meta.code) if part)
|
|
267
|
+
|
|
268
|
+
assert return_type, f"No return type found for relation '{r.name}'"
|
|
269
|
+
statements.append(
|
|
270
|
+
sql.CreateFunction(
|
|
271
|
+
name=r.name,
|
|
272
|
+
inputs=input_columns,
|
|
273
|
+
return_type=return_type,
|
|
274
|
+
handler=meta.handler,
|
|
275
|
+
body=python_block,
|
|
276
|
+
packages=meta.packages
|
|
277
|
+
)
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
return statements
|
|
281
|
+
|
|
282
|
+
def _create_statement(self, task: ir.Logical):
|
|
283
|
+
|
|
284
|
+
# TODO - improve the typing info to avoid these casts
|
|
285
|
+
nots = cast(list[ir.Not], filter_by_type(task.body, ir.Not))
|
|
286
|
+
lookups = cast(list[ir.Lookup], filter_by_type(task.body, ir.Lookup))
|
|
287
|
+
updates = cast(list[ir.Update], filter_by_type(task.body, ir.Update))
|
|
288
|
+
outputs = cast(list[ir.Output], filter_by_type(task.body, ir.Output))
|
|
289
|
+
logicals = cast(list[ir.Logical], filter_by_type(task.body, ir.Logical))
|
|
290
|
+
constructs = cast(list[ir.Construct], filter_by_type(task.body, ir.Construct))
|
|
291
|
+
ranks = cast(list[ir.Rank], filter_by_type(task.body, ir.Rank))
|
|
292
|
+
aggs = cast(list[ir.Aggregate], filter_by_type(task.body, ir.Aggregate))
|
|
293
|
+
unions = cast(list[ir.Union], filter_by_type(task.body, ir.Union))
|
|
294
|
+
|
|
295
|
+
var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
|
|
296
|
+
|
|
297
|
+
statements = []
|
|
298
|
+
if updates and not lookups and not nots and not aggs and not logicals and not unions:
|
|
299
|
+
for u in updates:
|
|
300
|
+
r = u.relation
|
|
301
|
+
if r == builtins.raw_source:
|
|
302
|
+
lang, src = u.args[0], u.args[1]
|
|
303
|
+
if not (isinstance(lang, str) and lang.lower() == "sql"):
|
|
304
|
+
logging.warning(f"Unsupported language for RawSource: {lang}")
|
|
305
|
+
continue
|
|
306
|
+
if not isinstance(src, str):
|
|
307
|
+
raise Exception(f"Expected SQL source to be a string, got: {type(src).__name__}")
|
|
308
|
+
statements.append(sql.RawSource(src))
|
|
309
|
+
else:
|
|
310
|
+
# Generate select with static values: SELECT hash(V1, ...), V2, V3
|
|
311
|
+
# We need to use `SELECT` instead of `VALUES` because Snowflake parses and restricts certain expressions in VALUES(...).
|
|
312
|
+
# Built-in functions like HASH() or MD5() are often rejected unless used in SELECT.
|
|
313
|
+
for values in self._get_tuples(task, u):
|
|
314
|
+
output_vars = [
|
|
315
|
+
sql.VarRef(str(value), alias=self._var_name(r.id, f))
|
|
316
|
+
for f, value in zip(r.fields, values)
|
|
317
|
+
]
|
|
318
|
+
self.add_table_select(r, sql.Select(False, output_vars))
|
|
319
|
+
elif lookups or outputs or nots or aggs or updates:
|
|
320
|
+
# Some of the lookup relations we wrap into logical and we need to get them out for the SQL compilation.
|
|
321
|
+
# For example QB `decimal(0)` in IR will look like this:
|
|
322
|
+
# Logical ^[res]
|
|
323
|
+
# Exists(vDecimal128)
|
|
324
|
+
# Logical
|
|
325
|
+
# cast(Decimal128, 0, vDecimal128)
|
|
326
|
+
# decimal128(vDecimal128, res)
|
|
327
|
+
unions = self._extract_all_of_type_from_logical(task, ir.Union) if logicals else unions
|
|
328
|
+
all_lookups = self._extract_all_of_type_from_logical(task, ir.Lookup) if logicals else lookups
|
|
329
|
+
|
|
330
|
+
var_to_union = {
|
|
331
|
+
a: u
|
|
332
|
+
for u in unions
|
|
333
|
+
for t in u.tasks
|
|
334
|
+
if isinstance(t, ir.Lookup)
|
|
335
|
+
for a in t.args
|
|
336
|
+
if isinstance(a, ir.Var)
|
|
337
|
+
} if unions else {}
|
|
338
|
+
|
|
339
|
+
if updates:
|
|
340
|
+
# insert values that match a query: INSERT INTO ... SELECT ... FROM ... WHERE ...
|
|
341
|
+
for u in updates:
|
|
342
|
+
r = u.relation
|
|
343
|
+
if self._is_error_relation(r):
|
|
344
|
+
# TODO: revisit this during `RAI-39124`. For now we filter out all error relations.
|
|
345
|
+
continue
|
|
346
|
+
# We shouldn’t create or populate tables for value types that can be directly sourced from existing Snowflake tables.
|
|
347
|
+
if not self._is_value_type_population_relation(r):
|
|
348
|
+
if all_lookups and all(builtins.is_builtin(lookup.relation) for lookup in all_lookups):
|
|
349
|
+
# Assuming static values insert when you have only builtin lookups (like `cast`, etc.) and you do not have table lookups.
|
|
350
|
+
aliases = self._get_update_aliases(u, var_to_construct, var_to_union, True)
|
|
351
|
+
select = self._make_select(all_lookups, aliases, nots, unions, constructs)
|
|
352
|
+
self.add_table_select(r, select)
|
|
353
|
+
else:
|
|
354
|
+
select = None
|
|
355
|
+
drv = DerivedRelationsVisitor()
|
|
356
|
+
task.accept(drv)
|
|
357
|
+
if aggs:
|
|
358
|
+
# After flatten it can be only one aggregation per rule.
|
|
359
|
+
select = self._make_agg_select(u, all_lookups, aggs[0], nots, unions, constructs)
|
|
360
|
+
elif ranks:
|
|
361
|
+
# After flatten it can be only one rank per rule.
|
|
362
|
+
select = self._make_rank_select(u, all_lookups, ranks[0], nots, unions, constructs)
|
|
363
|
+
else:
|
|
364
|
+
# Snowflake currently has issues when using DISTINCT together with UNION in a Dynamic Table.
|
|
365
|
+
# That is why we generate statements without DISTINCT, and we remove duplicates later
|
|
366
|
+
# by using CTE + DISTINCT to declare the Dynamic Tables
|
|
367
|
+
distinct = True if self._is_duck_db or not drv.is_derived() else False
|
|
368
|
+
aliases = self._get_update_aliases(u, var_to_construct, var_to_union)
|
|
369
|
+
|
|
370
|
+
if not unions:
|
|
371
|
+
select = self._make_select(all_lookups, aliases, nots, unions, constructs, distinct)
|
|
372
|
+
elif lookups:
|
|
373
|
+
select = self._make_match_select(all_lookups, aliases, unions, nots, constructs, distinct)
|
|
374
|
+
else:
|
|
375
|
+
select = self._make_full_outer_join_select(aliases, unions, constructs, distinct)
|
|
376
|
+
|
|
377
|
+
if drv.is_derived() and not self._is_duck_db:
|
|
378
|
+
self.add_dynamic_table_select(r, select)
|
|
379
|
+
else:
|
|
380
|
+
self.add_view_select(r, select)
|
|
381
|
+
elif outputs:
|
|
382
|
+
# output a query: SELECT ... FROM ... WHERE ...
|
|
383
|
+
aliases = []
|
|
384
|
+
distinct = False
|
|
385
|
+
for output in outputs:
|
|
386
|
+
distinct = distinct or output.keys is None
|
|
387
|
+
for key, arg in output.aliases:
|
|
388
|
+
aliases.append(self._get_alias(key, arg, None, var_to_construct, var_to_union))
|
|
389
|
+
|
|
390
|
+
if not unions:
|
|
391
|
+
if all(builtins.is_builtin(lookup.relation) for lookup in all_lookups):
|
|
392
|
+
# Example:
|
|
393
|
+
# QB: select(1).where(Foo(1) == Bar(1))
|
|
394
|
+
# IR:
|
|
395
|
+
# Logical
|
|
396
|
+
# 1::Foo = 1::Bar
|
|
397
|
+
# -> output(1 as 'v')
|
|
398
|
+
select = self._make_select(all_lookups, aliases, nots, unions, constructs, distinct, True)
|
|
399
|
+
else:
|
|
400
|
+
select = self._make_left_outer_join_select(task, all_lookups, aliases, nots, constructs, distinct)
|
|
401
|
+
elif lookups:
|
|
402
|
+
select = self._make_match_select(all_lookups, aliases, unions, nots, constructs, distinct, True)
|
|
403
|
+
else:
|
|
404
|
+
select = self._make_full_outer_join_select(aliases, unions, constructs, distinct, True)
|
|
405
|
+
|
|
406
|
+
statements.append(select)
|
|
407
|
+
elif logicals:
|
|
408
|
+
for logical in logicals:
|
|
409
|
+
statements.extend(self._create_statement(logical))
|
|
410
|
+
elif not updates and not outputs:
|
|
411
|
+
# Example:
|
|
412
|
+
# QB:
|
|
413
|
+
# (
|
|
414
|
+
# where(Person.age >= 65).define(Senior(Person)) |
|
|
415
|
+
# where(Person.age >= 18).define(Adult(Person)) |
|
|
416
|
+
# define(Child(Person))
|
|
417
|
+
# )
|
|
418
|
+
# After `flatten` IR will look like this:
|
|
419
|
+
# Logical
|
|
420
|
+
# Union
|
|
421
|
+
# _match_7(person_7)
|
|
422
|
+
# _match_8(person_7)
|
|
423
|
+
# _match_9(person_7)
|
|
424
|
+
#
|
|
425
|
+
# Nothing to query or define, we need to skip this task.
|
|
426
|
+
return statements
|
|
427
|
+
else:
|
|
428
|
+
raise Exception(f"Cannot create SQL statement for:\n{task}")
|
|
429
|
+
return statements
|
|
430
|
+
|
|
431
|
+
def _make_agg_select(self, update: ir.Update, lookups: list[ir.Lookup], agg: ir.Aggregate,
|
|
432
|
+
nots: Optional[list[ir.Not]] = None, unions: Optional[list[ir.Union]] = None,
|
|
433
|
+
constructs: Optional[list[ir.Construct]] = None) -> sql.Select:
|
|
434
|
+
|
|
435
|
+
"""
|
|
436
|
+
Generate a SQL SELECT for an aggregation using a DISTINCT subquery.
|
|
437
|
+
|
|
438
|
+
Example output:
|
|
439
|
+
SELECT
|
|
440
|
+
department, count(v) AS v
|
|
441
|
+
FROM (
|
|
442
|
+
SELECT DISTINCT
|
|
443
|
+
v0.department, v0.employees AS v
|
|
444
|
+
FROM
|
|
445
|
+
department_employees AS v0,
|
|
446
|
+
Department AS v1
|
|
447
|
+
WHERE
|
|
448
|
+
v0.department = v1.department
|
|
449
|
+
) GROUP BY department;
|
|
450
|
+
|
|
451
|
+
Rationale:
|
|
452
|
+
In the IR, it’s not always explicit whether aggregation should be applied over distinct rows.
|
|
453
|
+
By wrapping the aggregation in a DISTINCT subquery, we ensure correctness regardless of whether
|
|
454
|
+
the original query used `count(...)` or `count(distinct ...)`.
|
|
455
|
+
|
|
456
|
+
Compare:
|
|
457
|
+
|
|
458
|
+
QB: select(count(Person.name))
|
|
459
|
+
IR:
|
|
460
|
+
Logical
|
|
461
|
+
Logical ^[name=None, person_4=None]
|
|
462
|
+
Person(person_4)
|
|
463
|
+
name(person_4, name)
|
|
464
|
+
count([person_4, name], [], [v])
|
|
465
|
+
-> derive _aggregate_1(v)
|
|
466
|
+
|
|
467
|
+
QB: select(count(distinct Person.name))
|
|
468
|
+
IR:
|
|
469
|
+
Logical
|
|
470
|
+
Logical ^[name=None]
|
|
471
|
+
Person(person_4)
|
|
472
|
+
name(person_4, name)
|
|
473
|
+
count([name], [], [v])
|
|
474
|
+
-> derive _aggregate_1(v)
|
|
475
|
+
|
|
476
|
+
Note:
|
|
477
|
+
The key difference is that in the `distinct` case, the grouping variable `person_4` is absent from the projection.
|
|
478
|
+
The subquery pattern unifies both cases by projecting all aggregation arguments, ensuring correctness.
|
|
479
|
+
"""
|
|
480
|
+
|
|
481
|
+
seen_args = set()
|
|
482
|
+
outputs: list[Union[sql.VarRef, sql.RowNumberVar, int]] = []
|
|
483
|
+
sub_query_outputs: list[OutputVar] = []
|
|
484
|
+
|
|
485
|
+
relation = update.relation
|
|
486
|
+
agg_var = agg.args[0] if agg.aggregation == builtins.count else agg.args[1]
|
|
487
|
+
# Group across all non-aggregated variables.
|
|
488
|
+
group_by: list[sql.VarRef] = []
|
|
489
|
+
|
|
490
|
+
for i, arg in enumerate(update.args):
|
|
491
|
+
if arg not in seen_args:
|
|
492
|
+
relation_field = relation.fields[i]
|
|
493
|
+
field_type = self._convert_type(relation_field.type)
|
|
494
|
+
field_name = self._var_name(relation.id, relation_field)
|
|
495
|
+
if isinstance(arg, ir.Var) and arg == agg_var:
|
|
496
|
+
outputs.append(sql.VarRef(f"{agg.aggregation.name}({field_name})", alias=field_name, type=field_type))
|
|
497
|
+
sub_query_outputs.append(OutputVar(arg, field_name, task=agg))
|
|
498
|
+
else:
|
|
499
|
+
group_by.append(sql.VarRef(field_name))
|
|
500
|
+
outputs.append(sql.VarRef(field_name, alias=field_name, type=field_type))
|
|
501
|
+
sub_query_outputs.append(OutputVar(arg, field_name))
|
|
502
|
+
seen_args.add(arg)
|
|
503
|
+
|
|
504
|
+
for arg in agg.projection:
|
|
505
|
+
if arg not in seen_args:
|
|
506
|
+
if agg.aggregation == builtins.count and arg == agg.projection[-1]:
|
|
507
|
+
continue
|
|
508
|
+
sub_query_outputs.append(OutputVar(value=arg))
|
|
509
|
+
seen_args.add(arg)
|
|
510
|
+
|
|
511
|
+
sub_select = self._make_select(lookups, sub_query_outputs, nots, unions, constructs, True)
|
|
512
|
+
|
|
513
|
+
return sql.Select(False, outputs, sub_select, group_by=group_by)
|
|
514
|
+
|
|
515
|
+
def _make_rank_select(self, update: ir.Update, lookups: list[ir.Lookup], rank: ir.Rank,
|
|
516
|
+
nots: Optional[list[ir.Not]] = None, unions: Optional[list[ir.Union]] = None,
|
|
517
|
+
constructs: Optional[list[ir.Construct]] = None):
|
|
518
|
+
|
|
519
|
+
"""
|
|
520
|
+
Generate a SQL SELECT for a rank using a DISTINCT subquery.
|
|
521
|
+
|
|
522
|
+
Example output:
|
|
523
|
+
SELECT
|
|
524
|
+
cat, name, ROW_NUMBER() OVER ( ORDER BY name ASC ) as v
|
|
525
|
+
FROM (
|
|
526
|
+
SELECT DISTINCT
|
|
527
|
+
v0.cat, v1.name
|
|
528
|
+
FROM
|
|
529
|
+
Cat AS v0, cat_name AS v1
|
|
530
|
+
WHERE
|
|
531
|
+
v0.cat = v1.cat
|
|
532
|
+
) ORDER BY v LIMIT 10;
|
|
533
|
+
|
|
534
|
+
Rationale:
|
|
535
|
+
In the IR, it’s not always explicit whether rank should be applied over distinct rows.
|
|
536
|
+
By wrapping the rank in a DISTINCT subquery, we ensure correctness regardless of whether
|
|
537
|
+
the original query used `rank(...)` or `rank(distinct ...)`.
|
|
538
|
+
|
|
539
|
+
Compare:
|
|
540
|
+
|
|
541
|
+
QB: select(rank(Cat.name))
|
|
542
|
+
IR:
|
|
543
|
+
Logical
|
|
544
|
+
Cat(cat_5)
|
|
545
|
+
name(cat_5, name)
|
|
546
|
+
rank([cat_5], [], [name'↑'], v)
|
|
547
|
+
-> derive _rank_1(cat_5, name, v)
|
|
548
|
+
|
|
549
|
+
QB: select(rank(distinct(Cat.name)))
|
|
550
|
+
IR:
|
|
551
|
+
Logical
|
|
552
|
+
Cat(cat_5)
|
|
553
|
+
name(cat_5, name)
|
|
554
|
+
rank([], [], [name'↑'], v)
|
|
555
|
+
-> derive _rank_1(name, v)
|
|
556
|
+
|
|
557
|
+
Note:
|
|
558
|
+
The key difference is that in the `distinct` case, the grouping variable `cat_5` is absent from the projection.
|
|
559
|
+
The subquery pattern unifies both cases by projecting all rank arguments, ensuring correctness.
|
|
560
|
+
"""
|
|
561
|
+
|
|
562
|
+
seen_args = set()
|
|
563
|
+
outputs: list[Union[sql.VarRef, sql.RowNumberVar, int]] = []
|
|
564
|
+
sub_query_outputs: list[OutputVar] = []
|
|
565
|
+
|
|
566
|
+
order_by_vars = []
|
|
567
|
+
for arg, is_ascending in zip(rank.args, rank.arg_is_ascending):
|
|
568
|
+
order_by_vars.append(sql.OrderByVar(arg.name, is_ascending))
|
|
569
|
+
partition_by_vars = [arg.name for arg in rank.group] if rank.group else []
|
|
570
|
+
|
|
571
|
+
relation = update.relation
|
|
572
|
+
|
|
573
|
+
rank_result_field_name = None
|
|
574
|
+
for i, arg in enumerate(update.args):
|
|
575
|
+
if arg not in seen_args:
|
|
576
|
+
relation_field = relation.fields[i]
|
|
577
|
+
field_type = self._convert_type(relation_field.type)
|
|
578
|
+
field_name = self._var_name(relation.id, relation_field)
|
|
579
|
+
if isinstance(arg, ir.Var) and arg == rank.result:
|
|
580
|
+
rank_result_field_name = field_name
|
|
581
|
+
outputs.append(sql.RowNumberVar(order_by_vars, partition_by_vars, field_name, field_type))
|
|
582
|
+
else:
|
|
583
|
+
outputs.append(sql.VarRef(field_name, alias=field_name, type=field_type))
|
|
584
|
+
sub_query_outputs.append(OutputVar(arg, field_name))
|
|
585
|
+
seen_args.add(arg)
|
|
586
|
+
|
|
587
|
+
for arg in rank.projection:
|
|
588
|
+
if arg not in seen_args:
|
|
589
|
+
sub_query_outputs.append(OutputVar(value=arg))
|
|
590
|
+
seen_args.add(arg)
|
|
591
|
+
|
|
592
|
+
sub_select = self._make_select(lookups, sub_query_outputs, nots, unions, constructs, True)
|
|
593
|
+
|
|
594
|
+
assert rank_result_field_name is not None, "Rank result variable not found in update.args."
|
|
595
|
+
return sql.Select(False, outputs, sub_select, order_by=[sql.VarRef(rank_result_field_name)], limit=rank.limit)
|
|
596
|
+
|
|
597
|
+
def _make_match_select(self, lookups: list[ir.Lookup], outputs: list[OutputVar], unions: list[ir.Union],
|
|
598
|
+
nots: Optional[list[ir.Not]] = None, constructs: Optional[list[ir.Construct]] = None,
|
|
599
|
+
distinct: bool = False, is_output: bool = False):
|
|
600
|
+
|
|
601
|
+
"""
|
|
602
|
+
Generate a SQL SELECT statement representing a match operation.
|
|
603
|
+
|
|
604
|
+
Example output:
|
|
605
|
+
SELECT
|
|
606
|
+
COALESCE(v2.v0, v3.v0) as v0, v0.name, COALESCE(v4.v0, v5.v0) as v02
|
|
607
|
+
FROM
|
|
608
|
+
person_name AS v0
|
|
609
|
+
JOIN Person AS v1 ON v0.person = v1.person
|
|
610
|
+
LEFT OUTER JOIN _match_3 AS v2 ON v1.person = v2.person
|
|
611
|
+
LEFT OUTER JOIN _match_4 AS v3 ON v1.person = v3.person
|
|
612
|
+
LEFT OUTER JOIN _match_5 AS v4 ON v1.person = v4.person
|
|
613
|
+
LEFT OUTER JOIN _match_6 AS v5 ON v1.person = v5.person
|
|
614
|
+
WHERE
|
|
615
|
+
( v2.person IS NOT NULL OR v3.person IS NOT NULL ) AND
|
|
616
|
+
( v4.person IS NOT NULL OR v5.person IS NOT NULL );
|
|
617
|
+
|
|
618
|
+
Explanation:
|
|
619
|
+
This query performs a series of joins to gather matching records based on shared keys (`person`).
|
|
620
|
+
- INNER JOINs are used for mandatory relations (e.g. `Person`, `person_name`).
|
|
621
|
+
- LEFT OUTER JOINs are used to include optional match sets from auxiliary `_match_*` tables.
|
|
622
|
+
- `COALESCE(expr1, expr2, ...)` is used to merge values from multiple sources,
|
|
623
|
+
returning the first non-null value among the arguments (or NULL if all are null).
|
|
624
|
+
This is particularly useful for flattening results from union-style matches and preserving partial matches
|
|
625
|
+
in a single SELECT clause.
|
|
626
|
+
"""
|
|
627
|
+
|
|
628
|
+
var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
|
|
629
|
+
|
|
630
|
+
union_lookups: dict[ir.Union, OrderedSet[ir.Lookup]] = self._extract_all_lookups_per_union(unions)
|
|
631
|
+
|
|
632
|
+
table_lookups = OrderedSet.from_iterable(t for t in lookups if not builtins.is_builtin(t.relation))
|
|
633
|
+
froms, joins, wheres, sql_vars, var_column, var_lookups = self._extract_match_lookups_metadata(table_lookups, union_lookups)
|
|
634
|
+
|
|
635
|
+
builtin_lookups = OrderedSet.from_iterable(t for t in lookups if builtins.is_builtin(t.relation))
|
|
636
|
+
builtin_vars, builtin_wheres, builtin_table_expressions = (
|
|
637
|
+
self._resolve_builtins(builtin_lookups, var_lookups, var_column, sql_vars, var_to_construct, outputs))
|
|
638
|
+
|
|
639
|
+
froms.extend(self._process_builtin_table_expressions(builtin_table_expressions))
|
|
640
|
+
|
|
641
|
+
wheres.extend(builtin_wheres)
|
|
642
|
+
|
|
643
|
+
construct_wheres = self._process_constructs(table_lookups, var_lookups, var_column, sql_vars, builtin_vars,
|
|
644
|
+
var_to_construct)
|
|
645
|
+
wheres.extend(construct_wheres)
|
|
646
|
+
|
|
647
|
+
not_null_vars, vars = self._generate_select_output(outputs, builtin_vars, sql_vars, var_column, var_lookups,
|
|
648
|
+
var_to_construct)
|
|
649
|
+
|
|
650
|
+
if not_null_vars:
|
|
651
|
+
wheres.extend(sql.NotNull(var) for var in not_null_vars)
|
|
652
|
+
|
|
653
|
+
not_exists, _ = self._generate_select_nots(nots, var_lookups, sql_vars, var_column, len(sql_vars))
|
|
654
|
+
wheres.extend(not_exists)
|
|
655
|
+
|
|
656
|
+
where = self._process_wheres_clauses(wheres)
|
|
657
|
+
|
|
658
|
+
return sql.Select(distinct, vars, froms, where, joins, is_output=is_output)
|
|
659
|
+
|
|
660
|
+
def _make_full_outer_join_select(self, outputs: list[OutputVar], unions: list[ir.Union],
|
|
661
|
+
constructs: Optional[list[ir.Construct]] = None, distinct: bool = False,
|
|
662
|
+
is_output: bool = False):
|
|
663
|
+
|
|
664
|
+
"""
|
|
665
|
+
Generate a SQL SELECT statement representing a match operation that combines multiple sets of data
|
|
666
|
+
(using FULL OUTER JOINs), without additional lookup filtering.
|
|
667
|
+
|
|
668
|
+
This method is used when the input IR (Intermediate Representation) does not contain table lookups
|
|
669
|
+
but consists of `Union` operations grouped under a `Logical` node. The goal is to preserve all values
|
|
670
|
+
from each union input while aligning their corresponding fields via outer joins.
|
|
671
|
+
|
|
672
|
+
IR Example:
|
|
673
|
+
Logical
|
|
674
|
+
Logical ^[v0=None]
|
|
675
|
+
Union ^[v0]
|
|
676
|
+
_match_10(v0)
|
|
677
|
+
_match_11(v0)
|
|
678
|
+
Logical ^[v0_2=None]
|
|
679
|
+
Union ^[v0_2]
|
|
680
|
+
_match_12(v0_2)
|
|
681
|
+
_match_13(v0_2)
|
|
682
|
+
-> output(v0, v0_2 as 'v02')
|
|
683
|
+
|
|
684
|
+
This corresponds to an output schema with two final fields:
|
|
685
|
+
- `v0`, derived from `_match_10` and `_match_11`
|
|
686
|
+
- `v02`, derived from `_match_12` and `_match_13`
|
|
687
|
+
|
|
688
|
+
Example output:
|
|
689
|
+
SELECT DISTINCT
|
|
690
|
+
COALESCE(v0.v0, v1.v0) as v0, COALESCE(v2.v0, v3.v0) as v02
|
|
691
|
+
FROM
|
|
692
|
+
_match_10 AS v0
|
|
693
|
+
FULL OUTER JOIN _match_11 AS v1 ON TRUE
|
|
694
|
+
FULL OUTER JOIN _match_12 AS v2 ON TRUE
|
|
695
|
+
FULL OUTER JOIN _match_13 AS v3 ON TRUE;
|
|
696
|
+
|
|
697
|
+
Explanation:
|
|
698
|
+
- Each `Union` is compiled into one or more subqueries (e.g. `_match_10`, `_match_11`) that may represent
|
|
699
|
+
disjoint subsets of data.
|
|
700
|
+
- These are combined using `FULL OUTER JOIN` to retain all possible values from each side, including `NULL`s.
|
|
701
|
+
- `COALESCE()` is used to merge values from the joined tables into a single column per output field.
|
|
702
|
+
- This strategy ensures completeness when different subsets may contain different keys or match results.
|
|
703
|
+
"""
|
|
704
|
+
|
|
705
|
+
var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
|
|
706
|
+
|
|
707
|
+
union_lookups: dict[ir.Union, OrderedSet[ir.Lookup]] = self._extract_all_lookups_per_union(unions)
|
|
708
|
+
froms, joins, wheres, sql_vars, var_column, var_lookups = self._extract_union_lookups_metadata(union_lookups)
|
|
709
|
+
|
|
710
|
+
not_null_vars, vars = self._generate_select_output(outputs, {}, sql_vars, var_column, var_lookups,
|
|
711
|
+
var_to_construct)
|
|
712
|
+
|
|
713
|
+
if not_null_vars:
|
|
714
|
+
wheres.extend(sql.NotNull(var) for var in not_null_vars)
|
|
715
|
+
|
|
716
|
+
where = self._process_wheres_clauses(wheres)
|
|
717
|
+
|
|
718
|
+
return sql.Select(distinct, vars, froms, where, joins, is_output=is_output)
|
|
719
|
+
|
|
720
|
+
def _make_left_outer_join_select(self, task: ir.Logical, lookups: list[ir.Lookup], outputs: list[OutputVar],
|
|
721
|
+
nots: Optional[list[ir.Not]] = None, constructs: Optional[list[ir.Construct]] = None,
|
|
722
|
+
distinct: bool = False) -> sql.Select:
|
|
723
|
+
|
|
724
|
+
"""
|
|
725
|
+
Generate a SQL SELECT statement from an output query by combining INNER JOIN and LEFT OUTER JOIN clauses
|
|
726
|
+
based on the IR structure.
|
|
727
|
+
|
|
728
|
+
### JOIN Rules:
|
|
729
|
+
|
|
730
|
+
1. **Top-level lookups** (direct children of the root `Logical`) always use **INNER JOIN**.
|
|
731
|
+
|
|
732
|
+
2. **LEFT OUTER JOIN** is used for a lookup if:
|
|
733
|
+
- It appears inside a nested `Logical`, and
|
|
734
|
+
- The corresponding variable is hoisted with a `None` value in that `Logical`.
|
|
735
|
+
- Example: `id(student, id)` is translated as a LEFT OUTER JOIN if the `Logical` hoists `id=None`.
|
|
736
|
+
|
|
737
|
+
3. If a variable is hoisted with `None` in one `Logical`, but used in another lookup that is hoisted without `None`,
|
|
738
|
+
the corresponding join becomes **INNER JOIN**.
|
|
739
|
+
- This resolves ambiguity when a lookup's output variable is reused meaningfully elsewhere.
|
|
740
|
+
|
|
741
|
+
---
|
|
742
|
+
|
|
743
|
+
### IR Example 1 (with LEFT OUTER JOIN):
|
|
744
|
+
|
|
745
|
+
IR:
|
|
746
|
+
Logical
|
|
747
|
+
Logical
|
|
748
|
+
Student(student)
|
|
749
|
+
goes_at(student, school)
|
|
750
|
+
subject(school, subject)
|
|
751
|
+
desc(subject, desc)
|
|
752
|
+
desc = "English"
|
|
753
|
+
Logical ^[id=None]
|
|
754
|
+
id(student, id)
|
|
755
|
+
Logical ^[name=None, course=None]
|
|
756
|
+
attends(student, course)
|
|
757
|
+
instructor(course, instructor)
|
|
758
|
+
name(instructor, name)
|
|
759
|
+
-> output[student, course, subject](id, name, desc)
|
|
760
|
+
|
|
761
|
+
SQL Output:
|
|
762
|
+
SELECT
|
|
763
|
+
v0.id, v3.name, v7.desc
|
|
764
|
+
FROM
|
|
765
|
+
Student AS v4
|
|
766
|
+
JOIN student_goes_at AS v5 ON v4.student = v5.student
|
|
767
|
+
JOIN school_subject AS v6 ON v5.school = v6.school
|
|
768
|
+
JOIN subject_desc AS v7 ON v6.subject = v7.subject
|
|
769
|
+
LEFT OUTER JOIN student_id AS v0 ON v5.student = v0.student
|
|
770
|
+
LEFT OUTER JOIN student_attends AS v1 ON v5.student = v1.student
|
|
771
|
+
LEFT OUTER JOIN course_instructor AS v2 ON v1.course = v2.course
|
|
772
|
+
LEFT OUTER JOIN instructor_name AS v3 ON v2.instructor = v3.instructor
|
|
773
|
+
WHERE
|
|
774
|
+
v7.desc = 'English';
|
|
775
|
+
|
|
776
|
+
---
|
|
777
|
+
|
|
778
|
+
### IR Example 2 (with NOT EXISTS):
|
|
779
|
+
|
|
780
|
+
IR:
|
|
781
|
+
Logical
|
|
782
|
+
Not
|
|
783
|
+
Logical
|
|
784
|
+
Logical ^[person, age]
|
|
785
|
+
_union_1(person, age)
|
|
786
|
+
Person(person)
|
|
787
|
+
Logical ^[name=None]
|
|
788
|
+
name(person, name)
|
|
789
|
+
Logical ^[age=None]
|
|
790
|
+
age(person, age)
|
|
791
|
+
-> output[person](name, age)
|
|
792
|
+
|
|
793
|
+
Note: Even though `age` is hoisted with `None`, it is also used in `_union_1` which is hoisted without `None`
|
|
794
|
+
(i.e., `^[person, age]`). Therefore, `age(person, age)` is compiled as an INNER JOIN.
|
|
795
|
+
|
|
796
|
+
SQL Output:
|
|
797
|
+
SELECT
|
|
798
|
+
v0.name,
|
|
799
|
+
v1.age
|
|
800
|
+
FROM
|
|
801
|
+
Person AS v2
|
|
802
|
+
JOIN person_name AS v0 ON v2.person = v0.person
|
|
803
|
+
JOIN person_age AS v1 ON v2.person = v1.person
|
|
804
|
+
WHERE
|
|
805
|
+
NOT EXISTS ( SELECT 1 FROM _union_1 AS v3 WHERE v3.person = v0.person AND v3.age = v1.age );
|
|
806
|
+
|
|
807
|
+
---
|
|
808
|
+
"""
|
|
809
|
+
|
|
810
|
+
var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
|
|
811
|
+
|
|
812
|
+
table_lookups = OrderedSet.from_iterable(t for t in lookups if not builtins.is_builtin(t.relation))
|
|
813
|
+
froms, joins, wheres, sql_vars, var_column, var_lookups = (
|
|
814
|
+
self._extract_left_outer_joins_lookups_metadata(task, table_lookups, nots))
|
|
815
|
+
|
|
816
|
+
builtin_lookups = OrderedSet.from_iterable(t for t in lookups if builtins.is_builtin(t.relation))
|
|
817
|
+
builtin_vars, builtin_wheres, builtin_table_expressions = (
|
|
818
|
+
self._resolve_builtins(builtin_lookups, var_lookups, var_column, sql_vars, var_to_construct, outputs))
|
|
819
|
+
|
|
820
|
+
# SF in case of `LEFT OUTER JOIN` and `ARRAY_GENERATE_RANGE` doesn't allow usage of `ON TRUE` but
|
|
821
|
+
# for DuckDB this is mandatory that is why we have 2 different join classes.
|
|
822
|
+
make_join = (lambda e, a: sql.Join(e, a)) if self._is_duck_db else (lambda e, a: sql.JoinWithoutCondition(e, a))
|
|
823
|
+
joins.extend(make_join(expr, alias) for alias, expr in builtin_table_expressions.items())
|
|
824
|
+
|
|
825
|
+
wheres.extend(builtin_wheres)
|
|
826
|
+
|
|
827
|
+
construct_wheres = self._process_constructs(table_lookups, var_lookups, var_column, sql_vars, builtin_vars,
|
|
828
|
+
var_to_construct)
|
|
829
|
+
wheres.extend(construct_wheres)
|
|
830
|
+
|
|
831
|
+
_, vars = self._generate_select_output(outputs, builtin_vars, sql_vars, var_column, var_lookups, var_to_construct)
|
|
832
|
+
|
|
833
|
+
not_exists, _ = self._generate_select_nots(nots, var_lookups, sql_vars, var_column, len(sql_vars))
|
|
834
|
+
wheres.extend(not_exists)
|
|
835
|
+
|
|
836
|
+
where = self._process_wheres_clauses(wheres)
|
|
837
|
+
|
|
838
|
+
return sql.Select(distinct, vars, froms, where, joins, is_output=True)
|
|
839
|
+
|
|
840
|
+
def _make_select(self, lookups: list[ir.Lookup], outputs: list[OutputVar], nots: Optional[list[ir.Not]] = None,
|
|
841
|
+
unions: Optional[list[ir.Union]] = None, constructs: Optional[list[ir.Construct]] = None,
|
|
842
|
+
distinct: bool = False, is_output: bool = False) -> sql.Select:
|
|
843
|
+
|
|
844
|
+
var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
|
|
845
|
+
|
|
846
|
+
union_lookups: dict[ir.Union, OrderedSet[ir.Lookup]] = self._extract_all_lookups_per_union(unions)
|
|
847
|
+
all_lookups = lookups + list(chain.from_iterable(union_lookups.values()))
|
|
848
|
+
|
|
849
|
+
table_lookups = OrderedSet.from_iterable(t for t in all_lookups if not builtins.is_builtin(t.relation))
|
|
850
|
+
froms, wheres, sql_vars, var_column, var_lookups = self._extract_lookups_metadata(table_lookups)
|
|
851
|
+
|
|
852
|
+
builtin_lookups = OrderedSet.from_iterable(t for t in all_lookups if builtins.is_builtin(t.relation))
|
|
853
|
+
builtin_vars, builtin_wheres, builtin_table_expressions = (
|
|
854
|
+
self._resolve_builtins(builtin_lookups, var_lookups, var_column, sql_vars, var_to_construct, outputs))
|
|
855
|
+
|
|
856
|
+
froms.extend(self._process_builtin_table_expressions(builtin_table_expressions))
|
|
857
|
+
|
|
858
|
+
wheres.extend(builtin_wheres)
|
|
859
|
+
|
|
860
|
+
construct_wheres = self._process_constructs(table_lookups, var_lookups, var_column, sql_vars, builtin_vars,
|
|
861
|
+
var_to_construct)
|
|
862
|
+
wheres.extend(construct_wheres)
|
|
863
|
+
|
|
864
|
+
wheres.extend(self._generate_where_clauses(var_lookups, var_column, sql_vars, union_lookups))
|
|
865
|
+
|
|
866
|
+
not_null_vars, vars = self._generate_select_output(outputs, builtin_vars, sql_vars, var_column,
|
|
867
|
+
var_lookups, var_to_construct)
|
|
868
|
+
|
|
869
|
+
if not_null_vars:
|
|
870
|
+
wheres.extend(sql.NotNull(var) for var in not_null_vars)
|
|
871
|
+
|
|
872
|
+
not_exists, _ = self._generate_select_nots(nots, var_lookups, sql_vars, var_column, len(sql_vars))
|
|
873
|
+
wheres.extend(not_exists)
|
|
874
|
+
|
|
875
|
+
where = self._process_wheres_clauses(wheres)
|
|
876
|
+
|
|
877
|
+
return sql.Select(distinct, vars, froms, where, is_output=is_output)
|
|
878
|
+
|
|
879
|
+
def _extract_lookups_metadata(self, lookups: OrderedSet[ir.Lookup], start_index: int = 0):
|
|
880
|
+
froms: list[sql.From] = []
|
|
881
|
+
wheres: list[sql.Expr] = []
|
|
882
|
+
sql_vars: dict[ir.Lookup, str] = dict() # one var per table lookup
|
|
883
|
+
var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field] = dict()
|
|
884
|
+
var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]] = defaultdict(OrderedSet)
|
|
885
|
+
i = start_index
|
|
886
|
+
|
|
887
|
+
for lookup in lookups:
|
|
888
|
+
varname = f"v{i}"
|
|
889
|
+
froms.append(sql.From(self._relation_name(lookup.relation), varname))
|
|
890
|
+
sql_vars[lookup] = varname
|
|
891
|
+
self._process_lookup_args(lookup, sql_vars, var_column, var_lookups, wheres)
|
|
892
|
+
i += 1
|
|
893
|
+
|
|
894
|
+
return froms, wheres, sql_vars, var_column, var_lookups
|
|
895
|
+
|
|
896
|
+
def _extract_match_lookups_metadata(self, lookups: OrderedSet[ir.Lookup],
|
|
897
|
+
union_lookups: dict[ir.Union, OrderedSet[ir.Lookup]], start_index: int = 0):
|
|
898
|
+
wheres: list[sql.Expr] = []
|
|
899
|
+
sql_vars: dict[ir.Lookup, str] = dict() # one var per table lookup
|
|
900
|
+
var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field] = dict()
|
|
901
|
+
var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]] = defaultdict(OrderedSet)
|
|
902
|
+
i = start_index
|
|
903
|
+
|
|
904
|
+
def process_lookups(lookup_set: OrderedSet[ir.Lookup]):
|
|
905
|
+
nonlocal i
|
|
906
|
+
for lookup in lookup_set:
|
|
907
|
+
sql_vars[lookup] = f"v{i}"
|
|
908
|
+
self._process_lookup_args(lookup, sql_vars, var_column, var_lookups, wheres)
|
|
909
|
+
i += 1
|
|
910
|
+
|
|
911
|
+
# Step 1: assign aliases and populate helper mappings
|
|
912
|
+
process_lookups(lookups)
|
|
913
|
+
for values in union_lookups.values():
|
|
914
|
+
process_lookups(values)
|
|
915
|
+
|
|
916
|
+
# Step 2: build joins
|
|
917
|
+
used_lookups = ordered_set()
|
|
918
|
+
first_lookup = next(iter(lookups))
|
|
919
|
+
used_lookups.add(first_lookup)
|
|
920
|
+
froms: list[sql.From] = []
|
|
921
|
+
joins: list[sql.Join] = []
|
|
922
|
+
|
|
923
|
+
# Start with the first table as the root FROM
|
|
924
|
+
froms.append(sql.From(self._relation_name(first_lookup.relation), sql_vars[first_lookup]))
|
|
925
|
+
|
|
926
|
+
def _process_joins(lookup: ir.Lookup, is_left_join: bool = False):
|
|
927
|
+
# Try to find a shared variable with any *latest* used lookup
|
|
928
|
+
join_conditions = []
|
|
929
|
+
lookup_not_null_conditions = []
|
|
930
|
+
|
|
931
|
+
for arg in lookup.args:
|
|
932
|
+
if isinstance(arg, ir.Var) and arg in var_lookups:
|
|
933
|
+
for other_lookup in reversed(list(used_lookups)): # reversed: prioritize most recent join
|
|
934
|
+
if other_lookup in var_lookups[arg]:
|
|
935
|
+
left_alias = sql_vars[other_lookup]
|
|
936
|
+
right_alias = sql_vars[lookup]
|
|
937
|
+
|
|
938
|
+
left_field = self._var_name(other_lookup.relation.id, var_column[(arg, other_lookup)])
|
|
939
|
+
right_field = self._var_name(lookup.relation.id, var_column[(arg, lookup)])
|
|
940
|
+
|
|
941
|
+
left_var = f"{left_alias}.{left_field}"
|
|
942
|
+
right_var = f"{right_alias}.{right_field}"
|
|
943
|
+
|
|
944
|
+
join_conditions.append(sql.Terminal(f"{left_var} = {right_var}"))
|
|
945
|
+
|
|
946
|
+
if is_left_join:
|
|
947
|
+
lookup_not_null_conditions.append(sql.NotNull(right_var))
|
|
948
|
+
|
|
949
|
+
break # stop on first recent match
|
|
950
|
+
|
|
951
|
+
if join_conditions:
|
|
952
|
+
on = sql.And(join_conditions) if len(join_conditions) > 1 else join_conditions[0]
|
|
953
|
+
join = sql.LeftOuterJoin(self._relation_name(lookup.relation), sql_vars[lookup], on) if is_left_join \
|
|
954
|
+
else sql.Join(self._relation_name(lookup.relation), sql_vars[lookup], on)
|
|
955
|
+
joins.append(join)
|
|
956
|
+
|
|
957
|
+
if is_left_join:
|
|
958
|
+
return sql.And(lookup_not_null_conditions) if len(lookup_not_null_conditions) > 1 else lookup_not_null_conditions[0]
|
|
959
|
+
else:
|
|
960
|
+
used_lookups.add(lookup)
|
|
961
|
+
return None
|
|
962
|
+
else:
|
|
963
|
+
raise ValueError(f"No join condition found for lookup: {lookup}")
|
|
964
|
+
|
|
965
|
+
# Add JOINs based on shared variables
|
|
966
|
+
for lookup in lookups:
|
|
967
|
+
if lookup not in used_lookups:
|
|
968
|
+
_process_joins(lookup)
|
|
969
|
+
|
|
970
|
+
# Add LEFT JOINs based on shared variables
|
|
971
|
+
for values in union_lookups.values():
|
|
972
|
+
not_null_conditions = []
|
|
973
|
+
for lookup in values:
|
|
974
|
+
if lookup not in used_lookups:
|
|
975
|
+
lookup_condition = _process_joins(lookup, is_left_join=True)
|
|
976
|
+
not_null_conditions.append(lookup_condition)
|
|
977
|
+
|
|
978
|
+
if not_null_conditions:
|
|
979
|
+
wheres.append(sql.Or(not_null_conditions))
|
|
980
|
+
|
|
981
|
+
return froms, joins, wheres, sql_vars, var_column, var_lookups
|
|
982
|
+
|
|
983
|
+
def _extract_union_lookups_metadata(self, lookups: dict[ir.Union, OrderedSet[ir.Lookup]], start_index: int = 0):
|
|
984
|
+
wheres: list[sql.Expr] = []
|
|
985
|
+
sql_vars: dict[ir.Lookup, str] = {}
|
|
986
|
+
var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field] = {}
|
|
987
|
+
var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]] = defaultdict(OrderedSet)
|
|
988
|
+
froms: list[sql.From] = []
|
|
989
|
+
joins: list[sql.Join] = []
|
|
990
|
+
used_lookups = ordered_set()
|
|
991
|
+
|
|
992
|
+
i = start_index
|
|
993
|
+
first_lookup_handled = False
|
|
994
|
+
|
|
995
|
+
for values in lookups.values():
|
|
996
|
+
for lookup in values:
|
|
997
|
+
sql_vars[lookup] = f"v{i}"
|
|
998
|
+
self._process_lookup_args(lookup, sql_vars, var_column, var_lookups, wheres)
|
|
999
|
+
i += 1
|
|
1000
|
+
|
|
1001
|
+
if not first_lookup_handled:
|
|
1002
|
+
# Use this as the base FROM
|
|
1003
|
+
froms.append(sql.From(self._relation_name(lookup.relation), sql_vars[lookup]))
|
|
1004
|
+
used_lookups.add(lookup)
|
|
1005
|
+
first_lookup_handled = True
|
|
1006
|
+
else:
|
|
1007
|
+
# Join the rest
|
|
1008
|
+
joins.append(sql.FullOuterJoin(self._relation_name(lookup.relation), sql_vars[lookup]))
|
|
1009
|
+
|
|
1010
|
+
return froms, joins, wheres, sql_vars, var_column, var_lookups
|
|
1011
|
+
|
|
1012
|
+
def _extract_left_outer_joins_lookups_metadata(self, task: ir.Logical, lookups: OrderedSet[ir.Lookup],
|
|
1013
|
+
nots: Optional[list[ir.Not]] = None, start_index: int = 0):
|
|
1014
|
+
wheres: list[sql.Expr] = []
|
|
1015
|
+
sql_vars: dict[ir.Lookup, str] = dict() # one var per table lookup
|
|
1016
|
+
var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field] = dict()
|
|
1017
|
+
var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]] = defaultdict(OrderedSet)
|
|
1018
|
+
|
|
1019
|
+
# Step 1: assign aliases and populate helper mappings
|
|
1020
|
+
i = start_index
|
|
1021
|
+
for lookup in lookups:
|
|
1022
|
+
sql_vars[lookup] = f"v{i}"
|
|
1023
|
+
self._process_lookup_args(lookup, sql_vars, var_column, var_lookups, wheres)
|
|
1024
|
+
i += 1
|
|
1025
|
+
|
|
1026
|
+
froms: list[sql.From] = []
|
|
1027
|
+
joins: list[sql.Join] = []
|
|
1028
|
+
full_context = ordered_set()
|
|
1029
|
+
|
|
1030
|
+
# Choose a root FROM table
|
|
1031
|
+
first_lookup = next(iter(lookups))
|
|
1032
|
+
froms.append(sql.From(self._relation_name(first_lookup.relation), sql_vars[first_lookup]))
|
|
1033
|
+
full_context.add(first_lookup)
|
|
1034
|
+
|
|
1035
|
+
@dataclass(frozen=True)
|
|
1036
|
+
class JoinMetadata:
|
|
1037
|
+
on: Optional[sql.Expr] = None
|
|
1038
|
+
inner_join: bool = False
|
|
1039
|
+
|
|
1040
|
+
joins_metadata: dict[ir.Lookup, JoinMetadata] = {}
|
|
1041
|
+
not_null_vars: set[ir.Var] = self._extract_all_not_null_vars_from_nots(nots)
|
|
1042
|
+
|
|
1043
|
+
def _process_joins(lookup: ir.Lookup, context: OrderedSet[ir.Lookup], inner_join: bool = True):
|
|
1044
|
+
join_conditions = []
|
|
1045
|
+
seen_pairs = set()
|
|
1046
|
+
|
|
1047
|
+
# We want most recent joins first from context, then from full_context
|
|
1048
|
+
search_context = list(reversed(context)) + [
|
|
1049
|
+
lk for lk in reversed(full_context) if lk not in context
|
|
1050
|
+
]
|
|
1051
|
+
|
|
1052
|
+
for arg in lookup.args:
|
|
1053
|
+
inner_join = arg in not_null_vars or inner_join
|
|
1054
|
+
if isinstance(arg, ir.Var) and arg in var_lookups:
|
|
1055
|
+
for other_lookup in search_context:
|
|
1056
|
+
if other_lookup in var_lookups[arg]:
|
|
1057
|
+
right_alias = sql_vars[lookup]
|
|
1058
|
+
left_alias = sql_vars[other_lookup]
|
|
1059
|
+
|
|
1060
|
+
right_field = self._var_name(lookup.relation.id, var_column[(arg, lookup)])
|
|
1061
|
+
left_field = self._var_name(other_lookup.relation.id, var_column[(arg, other_lookup)])
|
|
1062
|
+
|
|
1063
|
+
pair = (left_alias, left_field, right_alias, right_field)
|
|
1064
|
+
if pair not in seen_pairs:
|
|
1065
|
+
seen_pairs.add(pair)
|
|
1066
|
+
join_conditions.append(sql.Terminal(f"{left_alias}.{left_field} = {right_alias}.{right_field}"))
|
|
1067
|
+
break # stop at first matching lookup
|
|
1068
|
+
|
|
1069
|
+
on = None
|
|
1070
|
+
if join_conditions:
|
|
1071
|
+
on = sql.And(join_conditions) if len(join_conditions) > 1 else join_conditions[0]
|
|
1072
|
+
|
|
1073
|
+
join_metadata = joins_metadata.get(lookup)
|
|
1074
|
+
|
|
1075
|
+
if join_metadata:
|
|
1076
|
+
# Upgrade to inner join only if previously marked as left outer join
|
|
1077
|
+
if inner_join and not join_metadata.inner_join:
|
|
1078
|
+
joins_metadata[lookup] = JoinMetadata(on, inner_join)
|
|
1079
|
+
else:
|
|
1080
|
+
joins_metadata[lookup] = JoinMetadata(on, inner_join)
|
|
1081
|
+
|
|
1082
|
+
def _process_logical(logical: ir.Logical, parent_context: Optional[OrderedSet[ir.Lookup]] = None):
|
|
1083
|
+
# Step 1: Prepare null variables from hoisted defaults
|
|
1084
|
+
null_vars = {
|
|
1085
|
+
v.var for v in (logical.hoisted or [])
|
|
1086
|
+
if isinstance(v, ir.Default) and v.value is None
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
# Step 2: Create a working context from parent_context
|
|
1090
|
+
context = OrderedSet.from_iterable(parent_context) if parent_context else ordered_set()
|
|
1091
|
+
|
|
1092
|
+
# Step 3: Process all sub-tasks
|
|
1093
|
+
for sub_task in logical.body:
|
|
1094
|
+
if isinstance(sub_task, ir.Logical):
|
|
1095
|
+
_process_logical(sub_task, context)
|
|
1096
|
+
elif isinstance(sub_task, ir.Lookup):
|
|
1097
|
+
lookup = cast(ir.Lookup, sub_task)
|
|
1098
|
+
if lookup != first_lookup and not builtins.is_builtin(lookup.relation):
|
|
1099
|
+
inner_join = False if null_vars else True
|
|
1100
|
+
_process_joins(lookup, context, inner_join)
|
|
1101
|
+
context.add(lookup)
|
|
1102
|
+
full_context.add(lookup)
|
|
1103
|
+
|
|
1104
|
+
_process_logical(task, full_context)
|
|
1105
|
+
|
|
1106
|
+
for lookup, metadata in joins_metadata.items():
|
|
1107
|
+
if metadata.inner_join:
|
|
1108
|
+
joins.append(sql.Join(self._relation_name(lookup.relation), sql_vars[lookup], metadata.on))
|
|
1109
|
+
else:
|
|
1110
|
+
joins.append(sql.LeftOuterJoin(self._relation_name(lookup.relation), sql_vars[lookup], metadata.on))
|
|
1111
|
+
|
|
1112
|
+
return froms, joins, wheres, sql_vars, var_column, var_lookups
|
|
1113
|
+
|
|
1114
|
+
def _process_lookup_args(self, lookup: ir.Lookup, sql_vars: dict[ir.Lookup, str],
|
|
1115
|
+
var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field],
|
|
1116
|
+
var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]], wheres: list[sql.Expr]):
|
|
1117
|
+
relation = lookup.relation
|
|
1118
|
+
for j, arg in enumerate(lookup.args):
|
|
1119
|
+
rel_field = relation.fields[j]
|
|
1120
|
+
if isinstance(arg, ir.Var):
|
|
1121
|
+
var_column[arg, lookup] = rel_field
|
|
1122
|
+
var_lookups[arg].add(lookup)
|
|
1123
|
+
# case when Literal is used as a relation argument: `test(1, x)`
|
|
1124
|
+
elif isinstance(arg, (int, str, float, bool, ir.Literal)):
|
|
1125
|
+
ref = f"{sql_vars[lookup]}.{self._var_name(relation.id, rel_field)}"
|
|
1126
|
+
wheres.append(sql.Terminal(f"{ref} = {self._convert_value(arg)}"))
|
|
1127
|
+
|
|
1128
|
+
def _var_reference(self, var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]], sql_vars: dict[ir.Lookup, str],
|
|
1129
|
+
var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field], v):
|
|
1130
|
+
if isinstance(v, ir.Var):
|
|
1131
|
+
# TODO - assuming the built-in reference was grounded elsewhere
|
|
1132
|
+
lookup = var_lookups[v].some()
|
|
1133
|
+
return f"{sql_vars[lookup]}.{self._var_name(lookup.relation.id, var_column[(v, lookup)])}"
|
|
1134
|
+
return f"'{v}'" if isinstance(v, str) else str(v)
|
|
1135
|
+
|
|
1136
|
+
def _resolve_builtin_var(self, builtin_vars: dict[ir.Var, ir.Value|str|int], var):
|
|
1137
|
+
# We need recursive lookup because it maybe a case when we need to join more than 2 lookups.
|
|
1138
|
+
# For example QB `a != decimal(0)` in IR will look like this:
|
|
1139
|
+
# Logical ^[res]
|
|
1140
|
+
# Exists(vDecimal128)
|
|
1141
|
+
# Logical
|
|
1142
|
+
# cast(Decimal128, 0, vDecimal128)
|
|
1143
|
+
# decimal128(vDecimal128, res)
|
|
1144
|
+
# a != res
|
|
1145
|
+
# But we need to convert it to `a != 0` in SQL.
|
|
1146
|
+
if isinstance(var, ir.Var) and var in builtin_vars:
|
|
1147
|
+
val = builtin_vars[var]
|
|
1148
|
+
return self._resolve_builtin_var(builtin_vars, val) if isinstance(val, ir.Var) else val
|
|
1149
|
+
return var
|
|
1150
|
+
|
|
1151
|
+
def _build_hash_expression(self, reference, resolve_builtin_var, var_to_construct, values):
|
|
1152
|
+
"""Generate hash expression like hash(`x`, `y`, TABLE_ALIAS.COLUMN_NAME)."""
|
|
1153
|
+
elements = []
|
|
1154
|
+
for val in values:
|
|
1155
|
+
resolved_val = resolve_builtin_var(val)
|
|
1156
|
+
if val != resolved_val and isinstance(resolved_val, str):
|
|
1157
|
+
# In case we parsed builtin into some expression, we may add it as an element.
|
|
1158
|
+
# For example, `TO_DATE('1990-1-1', 'Y-m-d')` or `(v1.value + 5)`.
|
|
1159
|
+
elements.append(f"{resolved_val}")
|
|
1160
|
+
continue
|
|
1161
|
+
if isinstance(resolved_val, ir.Var):
|
|
1162
|
+
if resolved_val in var_to_construct:
|
|
1163
|
+
elements.append(self._resolve_construct_var(reference, resolve_builtin_var, var_to_construct, var_to_construct[resolved_val]))
|
|
1164
|
+
else:
|
|
1165
|
+
elements.append(reference(resolved_val))
|
|
1166
|
+
else:
|
|
1167
|
+
elements.append(str(self._convert_value(resolved_val)))
|
|
1168
|
+
return f"hash({', '.join(elements)})"
|
|
1169
|
+
|
|
1170
|
+
def _resolve_construct_var(self, reference, resolve_builtin_var, var_to_construct, construct: ir.Construct):
|
|
1171
|
+
return self._build_hash_expression(reference, resolve_builtin_var, var_to_construct, construct.values)
|
|
1172
|
+
|
|
1173
|
+
def _resolve_hash_var(self, reference, resolve_builtin_var, var_to_construct, arg: Union[ir.ListType, ir.Value]):
|
|
1174
|
+
if isinstance(arg, Tuple):
|
|
1175
|
+
return self._build_hash_expression(reference, resolve_builtin_var, var_to_construct, arg)
|
|
1176
|
+
return self._build_hash_expression(reference, resolve_builtin_var, var_to_construct, [arg])
|
|
1177
|
+
|
|
1178
|
+
def _resolve_builtins(self, builtin_lookups: OrderedSet[ir.Lookup], var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
|
|
1179
|
+
var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field], sql_vars: dict[ir.Lookup, str],
|
|
1180
|
+
var_to_construct: dict[ir.Var, ir.Construct],
|
|
1181
|
+
outputs: Optional[list[OutputVar]] = None):
|
|
1182
|
+
|
|
1183
|
+
wheres: list[sql.Expr] = []
|
|
1184
|
+
# We need to maintain a mapping of these builtin expressions because they generate a new table, which must be
|
|
1185
|
+
# referenced in the FROM clause as part of a JOIN. Structure is `SQL table variable` -> `generated expression`
|
|
1186
|
+
table_expressions: dict[str, str] = {}
|
|
1187
|
+
builtin_vars: dict[ir.Var, ir.Value|str|int] = {}
|
|
1188
|
+
# TODO: remove this when we introduce date periods in builtins
|
|
1189
|
+
date_period_var_type: dict[ir.Var, str] = {}
|
|
1190
|
+
|
|
1191
|
+
output_vars = {
|
|
1192
|
+
output.value
|
|
1193
|
+
for output in outputs or []
|
|
1194
|
+
if isinstance(output.value, ir.Var)
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
intermediate_builtin_vars: set[ir.Var] = {
|
|
1198
|
+
arg for lookup in builtin_lookups
|
|
1199
|
+
for arg in lookup.args
|
|
1200
|
+
if isinstance(arg, ir.Var) and arg not in var_lookups
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1203
|
+
reference = partial(self._var_reference, var_lookups, sql_vars, var_column)
|
|
1204
|
+
resolve_builtin_var = partial(self._resolve_builtin_var, builtin_vars)
|
|
1205
|
+
|
|
1206
|
+
for lookup in self._sort_builtin_lookups(list(builtin_lookups), output_vars):
|
|
1207
|
+
args = lookup.args
|
|
1208
|
+
relation = lookup.relation
|
|
1209
|
+
relation_name = self._relation_name(relation)
|
|
1210
|
+
|
|
1211
|
+
if relation == builtins.substring:
|
|
1212
|
+
assert len(args) == 4, f"Expected 4 args for `strings.substring`, got {len(args)}: {args}"
|
|
1213
|
+
|
|
1214
|
+
# Unpack and process arguments
|
|
1215
|
+
lhs_raw, from_idx_raw, to_idx_raw, output = args
|
|
1216
|
+
assert isinstance(output, ir.Var), "Fourth argument (output) must be a variable"
|
|
1217
|
+
from_idx = self._convert_value(from_idx_raw)
|
|
1218
|
+
to_idx = self._convert_value(to_idx_raw)
|
|
1219
|
+
|
|
1220
|
+
# Resolve the left-hand side expression
|
|
1221
|
+
left = self._var_to_expr(lhs_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1222
|
+
|
|
1223
|
+
# Calculate substring length: SQL is 1-based and end-inclusive
|
|
1224
|
+
substring_len = int(to_idx) - int(from_idx) + 1
|
|
1225
|
+
assert substring_len >= 0, f"Invalid substring range: from {from_idx} to {to_idx}"
|
|
1226
|
+
|
|
1227
|
+
expr = f"substring({left}, {from_idx}, {substring_len})"
|
|
1228
|
+
builtin_vars[output] = expr
|
|
1229
|
+
elif relation == builtins.replace:
|
|
1230
|
+
assert len(args) == 4, f"Expected 4 args for `replace`, got {len(args)}: {args}"
|
|
1231
|
+
subject_raw, pattern_raw, replacement_raw, output = args
|
|
1232
|
+
subject = self._var_to_expr(subject_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1233
|
+
pattern = self._var_to_expr(pattern_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1234
|
+
replacement = self._var_to_expr(replacement_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1235
|
+
assert isinstance(output, ir.Var), "Fourth argument (output) must be a variable"
|
|
1236
|
+
builtin_vars[output] = f"replace({subject}, {pattern}, {replacement})"
|
|
1237
|
+
elif relation == builtins.split_part:
|
|
1238
|
+
assert len(args) == 4, f"Expected 4 args for `split_part`, got {len(args)}: {args}"
|
|
1239
|
+
separator_raw, s_raw, idx_raw, output = args
|
|
1240
|
+
separator = self._var_to_expr(separator_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1241
|
+
s = self._var_to_expr(s_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1242
|
+
idx = self._var_to_expr(idx_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1243
|
+
assert isinstance(output, ir.Var)
|
|
1244
|
+
builtin_vars[output] = f"split_part({s}, {separator}, {idx})"
|
|
1245
|
+
elif relation == builtins.split:
|
|
1246
|
+
assert len(args) == 4, f"Expected 4 args for `split`, got {len(args)}: {args}"
|
|
1247
|
+
separator_raw, value_raw, index, part = args
|
|
1248
|
+
value = self._var_to_expr(value_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1249
|
+
separator = self._var_to_expr(separator_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1250
|
+
table_sql_var = f"v{len(sql_vars)}"
|
|
1251
|
+
sql_vars[lookup] = table_sql_var
|
|
1252
|
+
if self._is_duck_db:
|
|
1253
|
+
table_alias = f"{table_sql_var}(data)"
|
|
1254
|
+
table_expressions[table_alias] = f"VALUES(string_split({value}, {separator}))"
|
|
1255
|
+
|
|
1256
|
+
part_expr = f"unnest({table_sql_var}.data)"
|
|
1257
|
+
index_expr = f"generate_subscripts({table_sql_var}.data, 1)"
|
|
1258
|
+
else:
|
|
1259
|
+
table_expressions[table_sql_var] = f"LATERAL FLATTEN(input => SPLIT({value}, {separator}))"
|
|
1260
|
+
|
|
1261
|
+
# SF returns values in `""` and to avoid this, we need to cast it to `TEXT` type
|
|
1262
|
+
part_expr = f"cast({table_sql_var}.value as TEXT)"
|
|
1263
|
+
index_expr = f"({table_sql_var}.index + 1)" # SF is 0-based internally, adjust to it back
|
|
1264
|
+
assert isinstance(index, ir.Var) and isinstance(part, ir.Var), "Third and fourth arguments (index, part) must be variables"
|
|
1265
|
+
builtin_vars[part] = part_expr
|
|
1266
|
+
builtin_vars[index] = index_expr
|
|
1267
|
+
elif relation == builtins.range:
|
|
1268
|
+
assert len(args) == 4, f"Expected 4 args for `range`, got {len(args)}: {args}"
|
|
1269
|
+
start_raw, stop_raw, step_raw, result = args
|
|
1270
|
+
start = self._var_to_expr(start_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1271
|
+
stop = self._var_to_expr(stop_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1272
|
+
step = self._var_to_expr(step_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1273
|
+
table_sql_var = f"v{len(sql_vars)}"
|
|
1274
|
+
sql_vars[lookup] = table_sql_var
|
|
1275
|
+
# In SQL range is 1...stop exclusive, and because we did `-1` in PyRel v1 we need to return it here
|
|
1276
|
+
if self._is_duck_db:
|
|
1277
|
+
table_expr = f"LATERAL range(cast({start} as bigint), cast(({stop} + 1) as bigint), cast({step} as bigint))"
|
|
1278
|
+
expr = f"{table_sql_var}.range"
|
|
1279
|
+
else:
|
|
1280
|
+
table_expr = f"LATERAL FLATTEN(input => ARRAY_GENERATE_RANGE({start}, ({stop} + 1), {step}))"
|
|
1281
|
+
expr = f"{table_sql_var}.value"
|
|
1282
|
+
table_expressions[table_sql_var] = table_expr
|
|
1283
|
+
assert isinstance(result, ir.Var), "Fourth argument (result) must be a variable"
|
|
1284
|
+
builtin_vars[result] = f"{expr}"
|
|
1285
|
+
elif relation == builtins.cast:
|
|
1286
|
+
assert len(args) == 3, f"Expected 3 args for `cast`, got {len(args)}: {args}"
|
|
1287
|
+
|
|
1288
|
+
_, original_raw, result = args
|
|
1289
|
+
assert isinstance(result, ir.Var), "Third argument (result) must be a variable"
|
|
1290
|
+
|
|
1291
|
+
builtin_vars[result] = original_raw
|
|
1292
|
+
elif relation in {builtins.isnan, builtins.isinf}:
|
|
1293
|
+
arg_expr = self._var_to_expr(args[0], reference, resolve_builtin_var, var_to_construct)
|
|
1294
|
+
expr = "cast('NaN' AS DOUBLE)" if relation == builtins.isnan else "cast('Infinity' AS DOUBLE)"
|
|
1295
|
+
wheres.append(sql.Terminal(f"{arg_expr} = {expr}"))
|
|
1296
|
+
elif relation == builtins.construct_date:
|
|
1297
|
+
assert len(args) == 4, f"Expected 4 args for `construct_date`, got {len(args)}: {args}"
|
|
1298
|
+
year_raw, month_raw, day_raw, result = args
|
|
1299
|
+
year = self._var_to_expr(year_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1300
|
+
month = self._var_to_expr(month_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1301
|
+
day = self._var_to_expr(day_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1302
|
+
|
|
1303
|
+
assert isinstance(result, ir.Var), "Fourth argument (result) must be a variable."
|
|
1304
|
+
if self._is_duck_db:
|
|
1305
|
+
expr = f"make_date(cast({year} as bigint), cast({month} as bigint), cast({day} as bigint))"
|
|
1306
|
+
else:
|
|
1307
|
+
expr = f"date_from_parts({year}, {month}, {day})"
|
|
1308
|
+
builtin_vars[result] = expr
|
|
1309
|
+
elif relation == builtins.construct_datetime_ms_tz:
|
|
1310
|
+
assert len(args) == 9, f"Expected 9 args for `construct_datetime_ms_tz`, got {len(args)}: {args}"
|
|
1311
|
+
|
|
1312
|
+
year_raw, month_raw, day_raw, hour_raw, minute_raw, second_raw, millisecond_raw, tz_raw, result = args
|
|
1313
|
+
assert isinstance(result, ir.Var), "Ninth argument (result) must be a variable."
|
|
1314
|
+
|
|
1315
|
+
year = self._var_to_expr(year_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1316
|
+
month = self._var_to_expr(month_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1317
|
+
day = self._var_to_expr(day_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1318
|
+
hour = self._var_to_expr(hour_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1319
|
+
minute = self._var_to_expr(minute_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1320
|
+
second = self._var_to_expr(second_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1321
|
+
millisecond = self._var_to_expr(millisecond_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1322
|
+
tz = self._var_to_expr(tz_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1323
|
+
|
|
1324
|
+
if self._is_duck_db:
|
|
1325
|
+
sub_expr = (f"make_timestamp(cast({year} as bigint), cast({month} as bigint), cast({day} as bigint), "
|
|
1326
|
+
f"cast({hour} as bigint), cast({minute} as bigint), cast({second} as bigint) + {millisecond}/1000.0)")
|
|
1327
|
+
if tz.lower() != "'utc'":
|
|
1328
|
+
sub_expr = f"(({sub_expr} at time zone {tz}) at time zone 'UTC')"
|
|
1329
|
+
else:
|
|
1330
|
+
sub_expr = (f"to_timestamp_ntz(lpad({year}, 4, '0') || '-' || lpad({month}, 2, '0') || '-' || "
|
|
1331
|
+
f"lpad({day}, 2, '0') || ' ' || lpad({hour}, 2, '0') || ':' || "
|
|
1332
|
+
f"lpad({minute}, 2, '0') || ':' || lpad({second}, 2, '0') || '.' || "
|
|
1333
|
+
f"lpad({millisecond}, 3, '0'), 'YYYY-MM-DD HH24:MI:SS.FF3')")
|
|
1334
|
+
if tz.lower() != "'utc'":
|
|
1335
|
+
sub_expr = f"convert_timezone({tz}, 'UTC', {sub_expr})"
|
|
1336
|
+
builtin_vars[result] = f"cast({sub_expr} as DATETIME)"
|
|
1337
|
+
elif relation == builtins.infomap:
|
|
1338
|
+
raise NotImplementedError("`infomap` is not supported in SQL")
|
|
1339
|
+
elif relation == builtins.louvain:
|
|
1340
|
+
raise NotImplementedError("`louvain` is not supported in SQL")
|
|
1341
|
+
elif relation == builtins.label_propagation:
|
|
1342
|
+
raise NotImplementedError("`label_propagation` is not supported in SQL")
|
|
1343
|
+
else:
|
|
1344
|
+
# Assuming infix binary or ternary operators here
|
|
1345
|
+
lhs, rhs = args[0], args[1]
|
|
1346
|
+
if relation in builtins.string_binary_builtins:
|
|
1347
|
+
left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1348
|
+
if relation == builtins.num_chars and isinstance(rhs, ir.Var):
|
|
1349
|
+
builtin_vars[rhs] = f"length({left})"
|
|
1350
|
+
elif relation == builtins.lower and isinstance(rhs, ir.Var):
|
|
1351
|
+
builtin_vars[rhs] = f"lower({left})"
|
|
1352
|
+
elif relation == builtins.upper and isinstance(rhs, ir.Var):
|
|
1353
|
+
builtin_vars[rhs] = f"upper({left})"
|
|
1354
|
+
elif relation == builtins.strip and isinstance(rhs, ir.Var):
|
|
1355
|
+
builtin_vars[rhs] = f"trim({left})"
|
|
1356
|
+
elif relation == builtins.regex_match:
|
|
1357
|
+
right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1358
|
+
# swap left and right for SQL
|
|
1359
|
+
wheres.append(sql.RegexLike(right, left))
|
|
1360
|
+
else:
|
|
1361
|
+
right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct, False)
|
|
1362
|
+
if relation == builtins.starts_with:
|
|
1363
|
+
expr = f"concat({right}, '%')" if isinstance(rhs, ir.Var) else f"'{right}%'"
|
|
1364
|
+
elif relation == builtins.ends_with:
|
|
1365
|
+
expr = f"concat('%', {right})" if isinstance(rhs, ir.Var) else f"'%{right}'"
|
|
1366
|
+
elif relation == builtins.like_match:
|
|
1367
|
+
expr = right if isinstance(rhs, ir.Var) else f"'{right}'"
|
|
1368
|
+
elif relation == builtins.contains:
|
|
1369
|
+
expr = f"concat('%', {right}, '%')" if isinstance(rhs, ir.Var) else f"'%{right}%'"
|
|
1370
|
+
else:
|
|
1371
|
+
raise Exception(f"Unsupported string builtin relation: {relation}")
|
|
1372
|
+
wheres.append(sql.Like(left, expr))
|
|
1373
|
+
elif relation == builtins.levenshtein:
|
|
1374
|
+
assert len(args) == 3, f"Expected 3 args for `levenshtein`, got {len(args)}: {args}"
|
|
1375
|
+
left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1376
|
+
right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1377
|
+
function = "levenshtein" if self._is_duck_db else "editdistance"
|
|
1378
|
+
assert isinstance(args[2], ir.Var)
|
|
1379
|
+
builtin_vars[args[2]] = f"{function}({left}, {right})"
|
|
1380
|
+
elif relation == builtins.concat:
|
|
1381
|
+
assert len(args) == 3, f"Expected 3 args for `concat`, got {len(args)}: {args}"
|
|
1382
|
+
left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1383
|
+
right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1384
|
+
assert isinstance(args[2], ir.Var)
|
|
1385
|
+
builtin_vars[args[2]] = f"concat({left}, {right})"
|
|
1386
|
+
elif relation == builtins.join:
|
|
1387
|
+
assert len(args) == 3, f"Expected 3 args for `join`, got {len(args)}: {args}"
|
|
1388
|
+
assert isinstance(lhs, tuple)
|
|
1389
|
+
f_args = [
|
|
1390
|
+
self._var_to_expr(item, reference, resolve_builtin_var, var_to_construct)
|
|
1391
|
+
for item in lhs
|
|
1392
|
+
]
|
|
1393
|
+
right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1394
|
+
assert isinstance(args[2], ir.Var)
|
|
1395
|
+
builtin_vars[args[2]] = f"concat_ws({right}, {', '.join(f_args)})"
|
|
1396
|
+
elif relation == builtins.hash and isinstance(rhs, ir.Var):
|
|
1397
|
+
builtin_vars[rhs] = self._resolve_hash_var(reference, resolve_builtin_var, var_to_construct, lhs)
|
|
1398
|
+
elif relation == builtins.string and isinstance(rhs, ir.Var):
|
|
1399
|
+
if isinstance(lhs, ir.Var) and typer.to_base_primitive(lhs.type) == DateTime:
|
|
1400
|
+
lhs = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1401
|
+
# Convert DateTime to string in the ISO 8601 format.
|
|
1402
|
+
if self._is_duck_db:
|
|
1403
|
+
builtin_vars[rhs] = f"""strftime({lhs}, '%Y-%m-%dT%H:%M:%S.%f')"""
|
|
1404
|
+
else:
|
|
1405
|
+
builtin_vars[rhs] = f"""to_varchar({lhs}, 'YYYY-MM-DD"T"HH24:MI:SS.FF3')"""
|
|
1406
|
+
else:
|
|
1407
|
+
builtin_vars[rhs] = lhs
|
|
1408
|
+
elif relation == builtins.parse_float and isinstance(rhs, ir.Var):
|
|
1409
|
+
left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1410
|
+
builtin_vars[rhs] = f"cast({left} AS DOUBLE)"
|
|
1411
|
+
elif relation == builtins.parse_date:
|
|
1412
|
+
if self._is_duck_db:
|
|
1413
|
+
raise Exception("DuckDB: unsupported builtin relation 'parse_date'.")
|
|
1414
|
+
assert len(args) == 3, f"Expected 3 args for `parse_date`, got {len(args)}: {args}"
|
|
1415
|
+
left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1416
|
+
right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1417
|
+
assert isinstance(args[2], ir.Var)
|
|
1418
|
+
builtin_vars[args[2]] = f"to_date({left}, {right})"
|
|
1419
|
+
elif relation == builtins.parse_datetime:
|
|
1420
|
+
assert len(args) == 3, f"Expected 3 args for `parse_datetime`, got {len(args)}: {args}"
|
|
1421
|
+
left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1422
|
+
right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1423
|
+
sub_expr = left
|
|
1424
|
+
if 'z' in right: # this means that out datetime formatter includes timezone, and we need to convert first.
|
|
1425
|
+
if self._is_duck_db:
|
|
1426
|
+
sub_expr = f"({left} AT TIME ZONE 'UTC')"
|
|
1427
|
+
else:
|
|
1428
|
+
sub_expr = f"convert_timezone('UTC', to_timestamp_tz({left}))"
|
|
1429
|
+
assert isinstance(args[2], ir.Var)
|
|
1430
|
+
builtin_vars[args[2]] = f"cast({sub_expr} as DATETIME)"
|
|
1431
|
+
elif relation in builtins.date_periods and isinstance(rhs, ir.Var):
|
|
1432
|
+
builtin_vars[rhs] = lhs
|
|
1433
|
+
date_period_var_type[rhs] = relation.name
|
|
1434
|
+
elif relation in builtins.date_builtins:
|
|
1435
|
+
if relation in {builtins.date_add, builtins.date_subtract, builtins.datetime_add,
|
|
1436
|
+
builtins.datetime_subtract}:
|
|
1437
|
+
assert len(args) == 3, f"Expected 3 args for {relation}, got {len(args)}: {args}"
|
|
1438
|
+
assert isinstance(rhs, ir.Var), f"Period variable must be `ir.Var`, got: {rhs}"
|
|
1439
|
+
period = date_period_var_type[rhs]
|
|
1440
|
+
period_val = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1441
|
+
|
|
1442
|
+
left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1443
|
+
|
|
1444
|
+
if self._is_duck_db:
|
|
1445
|
+
op = "+" if relation in {builtins.date_add, builtins.datetime_add} else "-"
|
|
1446
|
+
expr = f"({left} {op} {period_val} * interval 1 {period})"
|
|
1447
|
+
else:
|
|
1448
|
+
sign = 1 if relation in {builtins.date_add, builtins.datetime_add} else -1
|
|
1449
|
+
expr = f"dateadd({period}, ({sign} * {period_val}), {left})"
|
|
1450
|
+
|
|
1451
|
+
result_var = args[2]
|
|
1452
|
+
assert isinstance(result_var, ir.Var), (
|
|
1453
|
+
f"Expected `ir.Var` type for the result of `{relation}`, "
|
|
1454
|
+
f"but got `{type(result_var).__name__}`: {result_var}"
|
|
1455
|
+
)
|
|
1456
|
+
builtin_vars[result_var] = expr
|
|
1457
|
+
# handle binary cases
|
|
1458
|
+
elif len(args) == 2:
|
|
1459
|
+
assert isinstance(rhs, ir.Var), f"Resulting variable must be `ir.Var`, got: {rhs}"
|
|
1460
|
+
expr_map = {
|
|
1461
|
+
builtins.date_year: "year",
|
|
1462
|
+
builtins.date_quarter: "quarter",
|
|
1463
|
+
builtins.date_month: "month",
|
|
1464
|
+
builtins.date_week: "week",
|
|
1465
|
+
builtins.date_day: "day",
|
|
1466
|
+
builtins.date_dayofyear: "dayofyear",
|
|
1467
|
+
builtins.date_weekday: "isodow" if self._is_duck_db else "dayofweekiso",
|
|
1468
|
+
builtins.datetime_second: "second",
|
|
1469
|
+
}
|
|
1470
|
+
expr = expr_map.get(relation)
|
|
1471
|
+
lhs = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1472
|
+
builtin_vars[rhs] = f"{expr}({lhs})"
|
|
1473
|
+
elif len(args) == 3:
|
|
1474
|
+
result_var = args[2]
|
|
1475
|
+
assert isinstance(result_var, ir.Var), f"Resulting variable must be `ir.Var`, got: {result_var}"
|
|
1476
|
+
expr_map = {
|
|
1477
|
+
builtins.datetime_year: "year",
|
|
1478
|
+
builtins.datetime_quarter: "quarter",
|
|
1479
|
+
builtins.datetime_month: "month",
|
|
1480
|
+
builtins.datetime_week: "week",
|
|
1481
|
+
builtins.datetime_day: "day",
|
|
1482
|
+
builtins.datetime_dayofyear: "dayofyear",
|
|
1483
|
+
builtins.datetime_hour: "hour",
|
|
1484
|
+
builtins.datetime_minute: "minute",
|
|
1485
|
+
builtins.datetime_weekday: "isodow" if self._is_duck_db else "dayofweekiso",
|
|
1486
|
+
builtins.dates_period_days: "date_diff" if self._is_duck_db else "datediff",
|
|
1487
|
+
builtins.datetimes_period_milliseconds: "date_diff" if self._is_duck_db else "datediff"
|
|
1488
|
+
}
|
|
1489
|
+
expr = expr_map.get(relation)
|
|
1490
|
+
lhs = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1491
|
+
rhs = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1492
|
+
if relation == builtins.dates_period_days:
|
|
1493
|
+
sub_expr = f"'day', {lhs}, {rhs}" if self._is_duck_db else f"day, {lhs}, {rhs}"
|
|
1494
|
+
elif relation == builtins.datetimes_period_milliseconds:
|
|
1495
|
+
sub_expr = f"'millisecond', {lhs}, {rhs}" if self._is_duck_db else f"millisecond, {lhs}, {rhs}"
|
|
1496
|
+
else:
|
|
1497
|
+
sub_expr = self._convert_timezone(lhs, rhs)
|
|
1498
|
+
builtin_vars[result_var] = f"{expr}({sub_expr})"
|
|
1499
|
+
else:
|
|
1500
|
+
raise NotImplementedError("Unsupported number of arguments for date builtin (3+).")
|
|
1501
|
+
elif relation == builtins.construct_date_from_datetime:
|
|
1502
|
+
assert len(args) == 3, f"Expected 3 args for `construct_date_from_datetime`, got {len(args)}: {args}"
|
|
1503
|
+
dt_raw, tz, result = args
|
|
1504
|
+
tz = self._convert_value(tz)
|
|
1505
|
+
|
|
1506
|
+
assert isinstance(tz, str), "Timezone argument (tz) must be a string."
|
|
1507
|
+
assert isinstance(result, ir.Var), "Third argument (result) must be a variable."
|
|
1508
|
+
|
|
1509
|
+
# Note that the order of utc and dt is swapped in construct_date_from_datetime and construct_datetime,
|
|
1510
|
+
# because datetime->date (this case) ensures "the datetime is converted to the specified
|
|
1511
|
+
# timezone or offset string before extracting the date", while date->datetime (next case below)
|
|
1512
|
+
# ensures "the datetime is converted to UTC from the specified timezone or offset string."
|
|
1513
|
+
# (quotes are from pyrel0 docs for fromdate and fromdatetime).
|
|
1514
|
+
dt = self._var_to_expr(dt_raw, reference, resolve_builtin_var, var_to_construct)
|
|
1515
|
+
sub_expr = self._convert_timezone(dt, tz)
|
|
1516
|
+
sub_expr = f"cast({sub_expr} AS DATE)"
|
|
1517
|
+
builtin_vars[result] = sub_expr
|
|
1518
|
+
elif relation in builtins.math_builtins:
|
|
1519
|
+
result_var = rhs
|
|
1520
|
+
rel_name = relation.name
|
|
1521
|
+
left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1522
|
+
if relation in builtins.math_unary_builtins:
|
|
1523
|
+
method = "ln" if rel_name == builtins.natural_log.name else rel_name
|
|
1524
|
+
sub_expr = left
|
|
1525
|
+
if rel_name == builtins.factorial.name and self._is_duck_db:
|
|
1526
|
+
# Factorial requires an integer operand in DuckDB
|
|
1527
|
+
sub_expr = f"{left}::INTEGER"
|
|
1528
|
+
elif rel_name == builtins.log10.name:
|
|
1529
|
+
# log10 is not supported, so we use log with base 10
|
|
1530
|
+
sub_expr = f"10, {left}"
|
|
1531
|
+
method = "log"
|
|
1532
|
+
expr = f"{method}({sub_expr})"
|
|
1533
|
+
elif rel_name in {builtins.minimum.name, builtins.maximum.name, builtins.trunc_div.name,
|
|
1534
|
+
builtins.power.name, builtins.mod.name, builtins.pow.name,
|
|
1535
|
+
builtins.log.name}:
|
|
1536
|
+
assert len(args) == 3, f"Expected 3 args for {relation}, got {len(args)}: {args}"
|
|
1537
|
+
|
|
1538
|
+
result_var = args[2]
|
|
1539
|
+
right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1540
|
+
|
|
1541
|
+
if rel_name == builtins.minimum.name:
|
|
1542
|
+
expr = f"least({left}, {right})"
|
|
1543
|
+
elif rel_name == builtins.maximum.name:
|
|
1544
|
+
expr = f"greatest({left}, {right})"
|
|
1545
|
+
elif rel_name == builtins.trunc_div.name:
|
|
1546
|
+
expr = f"trunc({left} / {right})"
|
|
1547
|
+
elif rel_name == builtins.power.name or rel_name == builtins.pow.name:
|
|
1548
|
+
expr = f"power({left}, {right})"
|
|
1549
|
+
elif rel_name == builtins.log.name:
|
|
1550
|
+
expr = f"log({left}, {right})"
|
|
1551
|
+
else:
|
|
1552
|
+
expr = f"mod({left}, {right})"
|
|
1553
|
+
else:
|
|
1554
|
+
raise Exception(f"Unsupported math builtin relation: {relation}")
|
|
1555
|
+
assert isinstance(result_var, ir.Var), (
|
|
1556
|
+
f"Expected `ir.Var` type for the result of `{relation}`, "
|
|
1557
|
+
f"but got `{type(result_var).__name__}`: {result_var}"
|
|
1558
|
+
)
|
|
1559
|
+
builtin_vars[result_var] = expr
|
|
1560
|
+
elif relation in {builtins.parse_int64, builtins.parse_int128} and isinstance(rhs, ir.Var):
|
|
1561
|
+
builtin_vars[rhs] = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct, False)
|
|
1562
|
+
elif helpers.is_from_cast(lookup) and isinstance(rhs, ir.Var):
|
|
1563
|
+
# For the `from cast` relations we keep the raw var, and we will ground it later.
|
|
1564
|
+
builtin_vars[rhs] = lhs
|
|
1565
|
+
elif isinstance(lhs, ir.Var) and lhs in intermediate_builtin_vars and lhs not in (builtin_vars | var_to_construct):
|
|
1566
|
+
# Example IR:
|
|
1567
|
+
# Logical
|
|
1568
|
+
# Logical ^[v0]
|
|
1569
|
+
# int = 2
|
|
1570
|
+
# Logical ^[res=None]
|
|
1571
|
+
# Logical ^[res]
|
|
1572
|
+
# cast(Float, int, int_Float)
|
|
1573
|
+
# res = 2.1 * int_Float
|
|
1574
|
+
# v0 = res
|
|
1575
|
+
# -> derive _match_1(v0)
|
|
1576
|
+
#
|
|
1577
|
+
# In this example, the `int` variable is an intermediate result produced by the `=` (assignment) builtin.
|
|
1578
|
+
# We must retain this value in the `builtin_vars` mapping so it can be used when compiling the `cast`.
|
|
1579
|
+
# Ultimately, this allows us to fully resolve the final expression: `v0 = 2.1 * 2`.
|
|
1580
|
+
builtin_vars[lhs] = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1581
|
+
elif isinstance(rhs, ir.Var) and rhs in intermediate_builtin_vars and rhs not in (builtin_vars | var_to_construct):
|
|
1582
|
+
# Please see the example above but in this case it will be `2 = int` builtin lookup instead of `int = 2`.
|
|
1583
|
+
builtin_vars[rhs] = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1584
|
+
else:
|
|
1585
|
+
left = self._var_to_expr(lhs, reference, resolve_builtin_var, var_to_construct)
|
|
1586
|
+
right = self._var_to_expr(rhs, reference, resolve_builtin_var, var_to_construct)
|
|
1587
|
+
|
|
1588
|
+
if len(args) == 3:
|
|
1589
|
+
out_var = args[2]
|
|
1590
|
+
if isinstance(out_var, ir.Var):
|
|
1591
|
+
out_var = resolve_builtin_var(out_var)
|
|
1592
|
+
expr = f"({left} {relation_name} {right})"
|
|
1593
|
+
if isinstance(out_var, ir.Var):
|
|
1594
|
+
# For example, when this is an intermediate result
|
|
1595
|
+
# example: c = a - b in the IR is (a - b = d) and (d = c)
|
|
1596
|
+
builtin_vars[out_var] = expr
|
|
1597
|
+
else:
|
|
1598
|
+
# This means that var was already grounded, and we can add a WHERE clause.
|
|
1599
|
+
wheres.append(sql.Terminal(f"{expr} = {out_var}"))
|
|
1600
|
+
else:
|
|
1601
|
+
raise Exception(
|
|
1602
|
+
f"Expected `ir.Var` type for the relation `{relation}` output but got `{type(out_var).__name__}`: {out_var}"
|
|
1603
|
+
)
|
|
1604
|
+
else:
|
|
1605
|
+
# Replace intermediate vars with disjoined expressions
|
|
1606
|
+
expr = f"{left} {relation_name} {right}"
|
|
1607
|
+
wheres.append(sql.Terminal(expr))
|
|
1608
|
+
|
|
1609
|
+
# After handling all builtins we need to generate where statements for args with single lookup.
|
|
1610
|
+
for arg, lookup_set in var_lookups.items():
|
|
1611
|
+
if len(lookup_set) == 1:
|
|
1612
|
+
lookup = lookup_set[0]
|
|
1613
|
+
column = var_column[cast(ir.Var, arg), lookup]
|
|
1614
|
+
column_name = self._var_name(lookup.relation.id, column)
|
|
1615
|
+
ref = f"{sql_vars[lookup]}.{column_name}"
|
|
1616
|
+
# case when we have a builtin operation as a relation argument
|
|
1617
|
+
# example: `test(a - 1, b)` and we are handling here `a - 1` arg.
|
|
1618
|
+
if arg in builtin_vars:
|
|
1619
|
+
rhs_ref = resolve_builtin_var(arg)
|
|
1620
|
+
if isinstance(rhs_ref, ir.Var):
|
|
1621
|
+
rhs = reference(rhs_ref) if rhs_ref in var_lookups else rhs_ref.name
|
|
1622
|
+
elif isinstance(rhs_ref, ir.Literal):
|
|
1623
|
+
rhs = self._convert_value(rhs_ref.value)
|
|
1624
|
+
else:
|
|
1625
|
+
rhs = str(rhs_ref)
|
|
1626
|
+
wheres.append(sql.Terminal(f"{ref} = {rhs}"))
|
|
1627
|
+
|
|
1628
|
+
return builtin_vars, wheres, table_expressions
|
|
1629
|
+
|
|
1630
|
+
def _convert_timezone(self, dt: str, tz: str) -> str:
|
|
1631
|
+
if tz.lower() != "'utc'":
|
|
1632
|
+
if self._is_duck_db:
|
|
1633
|
+
return f"({dt} at time zone 'UTC') at time zone {tz}"
|
|
1634
|
+
else:
|
|
1635
|
+
return f"convert_timezone('UTC', {tz}, {dt})"
|
|
1636
|
+
return dt
|
|
1637
|
+
|
|
1638
|
+
def _process_builtin_table_expressions(self, builtin_table_expressions: dict[str, str]):
|
|
1639
|
+
"""Convert builtin table expressions into SQL FROM clauses."""
|
|
1640
|
+
return [
|
|
1641
|
+
sql.From(expr, alias)
|
|
1642
|
+
for alias, expr in builtin_table_expressions.items()
|
|
1643
|
+
]
|
|
1644
|
+
|
|
1645
|
+
def _process_constructs(self, lookups: OrderedSet[ir.Lookup], var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
|
|
1646
|
+
var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field], sql_vars: dict[ir.Lookup, str],
|
|
1647
|
+
builtin_vars: dict[ir.Var, ir.Value|str|int], var_to_construct: dict[ir.Var, ir.Construct]) -> list[sql.Expr]:
|
|
1648
|
+
"""
|
|
1649
|
+
Handles `filter_by` constructs that require generating SQL `WHERE` conditions.
|
|
1650
|
+
|
|
1651
|
+
Example:
|
|
1652
|
+
|
|
1653
|
+
QB:
|
|
1654
|
+
Name = m.Concept('Name', extends=[str])
|
|
1655
|
+
Bank = m.Concept('Bank', identify_by={'name': Name})
|
|
1656
|
+
|
|
1657
|
+
where(Bank.filter_by(name="Chase")).select(Bank)
|
|
1658
|
+
|
|
1659
|
+
IR:
|
|
1660
|
+
construct(Bank, "name"::String, "Chase"::String, bank::Bank)
|
|
1661
|
+
Bank(bank::Bank)
|
|
1662
|
+
|
|
1663
|
+
SQL:
|
|
1664
|
+
... FROM Bank v0
|
|
1665
|
+
WHERE v0.bank = hash('Bank', 'name', 'Chase')
|
|
1666
|
+
"""
|
|
1667
|
+
|
|
1668
|
+
wheres: list[sql.Expr] = []
|
|
1669
|
+
|
|
1670
|
+
reference = partial(self._var_reference, var_lookups, sql_vars, var_column)
|
|
1671
|
+
resolve_builtin_var = partial(self._resolve_builtin_var, builtin_vars)
|
|
1672
|
+
|
|
1673
|
+
seen_vars: set[ir.Var] = set()
|
|
1674
|
+
|
|
1675
|
+
for lookup in lookups:
|
|
1676
|
+
relation = lookup.relation
|
|
1677
|
+
for j, arg in enumerate(lookup.args):
|
|
1678
|
+
if isinstance(arg, ir.Var) and arg in var_to_construct and arg not in seen_vars:
|
|
1679
|
+
seen_vars.add(arg)
|
|
1680
|
+
|
|
1681
|
+
rel_field = relation.fields[j]
|
|
1682
|
+
ref = f"{sql_vars[lookup]}.{self._var_name(relation.id, rel_field)}"
|
|
1683
|
+
|
|
1684
|
+
construct = var_to_construct[arg]
|
|
1685
|
+
construct_expr = self._resolve_construct_var(
|
|
1686
|
+
reference, resolve_builtin_var, var_to_construct, construct
|
|
1687
|
+
)
|
|
1688
|
+
|
|
1689
|
+
wheres.append(sql.Terminal(f"{ref} = {construct_expr}"))
|
|
1690
|
+
|
|
1691
|
+
return wheres
|
|
1692
|
+
|
|
1693
|
+
def _generate_where_clauses(self, var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
|
|
1694
|
+
var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field], sql_vars: dict[ir.Lookup, str],
|
|
1695
|
+
union_lookups: dict[ir.Union, OrderedSet[ir.Lookup]]):
|
|
1696
|
+
# Reverse mapping: lookup -> union
|
|
1697
|
+
lookup_to_union: dict[ir.Lookup, ir.Union] = {}
|
|
1698
|
+
for union, lookups in union_lookups.items():
|
|
1699
|
+
for lu in lookups:
|
|
1700
|
+
lookup_to_union[lu] = union
|
|
1701
|
+
|
|
1702
|
+
wheres: list[sql.Expr] = []
|
|
1703
|
+
plain_refs_by_var: dict[ir.Var, list[str]] = defaultdict(list)
|
|
1704
|
+
all_union_members: dict[str, dict[ir.Var, str]] = defaultdict(dict)
|
|
1705
|
+
for arg, lookup_set in var_lookups.items():
|
|
1706
|
+
# if there are 2 lookups for the same variable, we need a join
|
|
1707
|
+
if len(lookup_set) > 1:
|
|
1708
|
+
# Step 1: Collect all lookups by union member or plain
|
|
1709
|
+
for lu in lookup_set:
|
|
1710
|
+
col = var_column[arg, lu]
|
|
1711
|
+
col_name = self._var_name(lu.relation.id, col)
|
|
1712
|
+
|
|
1713
|
+
matched_union = lookup_to_union.get(lu)
|
|
1714
|
+
if matched_union:
|
|
1715
|
+
for u_lu in union_lookups[matched_union]:
|
|
1716
|
+
u_ref = f"{sql_vars[u_lu]}.{col_name}"
|
|
1717
|
+
all_union_members[sql_vars[u_lu]][arg] = u_ref
|
|
1718
|
+
else:
|
|
1719
|
+
ref = f"{sql_vars[lu]}.{col_name}"
|
|
1720
|
+
plain_refs_by_var[arg].append(ref)
|
|
1721
|
+
|
|
1722
|
+
# Step 2: Build AND chain of plain lookups
|
|
1723
|
+
and_clauses = []
|
|
1724
|
+
for refs in plain_refs_by_var.values():
|
|
1725
|
+
# join variable references pairwise (e.g. "x.id = y.id AND y.id = z.id")
|
|
1726
|
+
for lhs, rhs in zip(refs, refs[1:]):
|
|
1727
|
+
and_clauses.append(sql.Terminal(f"{lhs} = {rhs}"))
|
|
1728
|
+
|
|
1729
|
+
# Step 3: Build one OR clause across union members
|
|
1730
|
+
or_groups: list[sql.Expr] = []
|
|
1731
|
+
for member_ref_map in all_union_members.values():
|
|
1732
|
+
expressions = []
|
|
1733
|
+
for arg_var, rhs in member_ref_map.items():
|
|
1734
|
+
plain_refs = plain_refs_by_var.get(arg_var)
|
|
1735
|
+
if plain_refs:
|
|
1736
|
+
lhs = plain_refs[-1] # last plain ref for that var
|
|
1737
|
+
expressions.append(sql.Terminal(f"{lhs} = {rhs}"))
|
|
1738
|
+
if expressions:
|
|
1739
|
+
or_groups.append(sql.And(expressions) if len(expressions) > 1 else expressions[0])
|
|
1740
|
+
|
|
1741
|
+
wheres.extend(and_clauses)
|
|
1742
|
+
if or_groups:
|
|
1743
|
+
wheres.append(sql.Or(or_groups))
|
|
1744
|
+
|
|
1745
|
+
return wheres
|
|
1746
|
+
|
|
1747
|
+
def _process_wheres_clauses(self, wheres: list[sql.Expr]) -> Optional[sql.Where]:
|
|
1748
|
+
# conjunction of not_wheres
|
|
1749
|
+
if len(wheres) == 0:
|
|
1750
|
+
where = None
|
|
1751
|
+
elif len(wheres) == 1:
|
|
1752
|
+
where = sql.Where(wheres[0])
|
|
1753
|
+
else:
|
|
1754
|
+
where = sql.Where(sql.And(wheres))
|
|
1755
|
+
return where
|
|
1756
|
+
|
|
1757
|
+
def _generate_select_output(self, outputs: list[OutputVar], builtin_vars: dict[ir.Var, ir.Value|str|int],
|
|
1758
|
+
sql_vars: dict[ir.Lookup, str], var_column: dict[Tuple[ir.Var, ir.Lookup], ir.Field],
|
|
1759
|
+
var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
|
|
1760
|
+
var_to_construct: dict[ir.Var, ir.Construct]):
|
|
1761
|
+
|
|
1762
|
+
reference = partial(self._var_reference, var_lookups, sql_vars, var_column)
|
|
1763
|
+
resolve_builtin_var = partial(self._resolve_builtin_var, builtin_vars)
|
|
1764
|
+
|
|
1765
|
+
def handle_lookup_var(var, var_type, alias):
|
|
1766
|
+
lookup = var_lookups[var].some()
|
|
1767
|
+
relation = lookup.relation
|
|
1768
|
+
var_name = sql_vars[lookup]
|
|
1769
|
+
column_name = self._var_name(relation.id, var_column[var, lookup])
|
|
1770
|
+
vars.append(sql.VarRef(var_name, column_name, alias, var_type))
|
|
1771
|
+
if from_cdc_annotation in relation.annotations:
|
|
1772
|
+
not_null_vars.add(f"{var_name}.{column_name}")
|
|
1773
|
+
|
|
1774
|
+
def handle_construct(construct):
|
|
1775
|
+
# Generate constructions like hash(`x`, `y`, TABLE_ALIAS.COLUMN_NAME)
|
|
1776
|
+
elements = []
|
|
1777
|
+
for val in construct.values:
|
|
1778
|
+
if val in builtin_vars:
|
|
1779
|
+
val = resolve_builtin_var(val)
|
|
1780
|
+
if isinstance(val, str):
|
|
1781
|
+
# In case we parsed builtin into some expression, we may add it as an element.
|
|
1782
|
+
# For example, `TO_DATE('1990-1-1', 'Y-m-d')` or `(v1.value + 5)`.
|
|
1783
|
+
elements.append(f"{val}")
|
|
1784
|
+
continue
|
|
1785
|
+
if isinstance(val, ir.Var):
|
|
1786
|
+
if val in var_to_construct:
|
|
1787
|
+
elements.append(handle_construct(var_to_construct[val]))
|
|
1788
|
+
else:
|
|
1789
|
+
lookup = var_lookups[val].some()
|
|
1790
|
+
column_name = self._var_name(lookup.relation.id, var_column[val, lookup])
|
|
1791
|
+
lookup_var = f"{sql_vars[lookup]}.{column_name}"
|
|
1792
|
+
elements.append(lookup_var)
|
|
1793
|
+
if from_cdc_annotation in lookup.relation.annotations:
|
|
1794
|
+
not_null_vars.add(lookup_var)
|
|
1795
|
+
else:
|
|
1796
|
+
elements.append(str(self._convert_value(val)))
|
|
1797
|
+
return f"hash({', '.join(elements)})"
|
|
1798
|
+
|
|
1799
|
+
# finally, compute what the select will return
|
|
1800
|
+
vars = []
|
|
1801
|
+
not_null_vars = ordered_set()
|
|
1802
|
+
for output in outputs:
|
|
1803
|
+
alias, var, var_type, task = output.alias, output.value, output.value_type, output.task
|
|
1804
|
+
if isinstance(var, ir.Var):
|
|
1805
|
+
if var in var_lookups and not task:
|
|
1806
|
+
handle_lookup_var(var, var_type, alias)
|
|
1807
|
+
elif var in builtin_vars:
|
|
1808
|
+
var_ref = resolve_builtin_var(var)
|
|
1809
|
+
if var_ref in var_lookups:
|
|
1810
|
+
# Case: result of `cast` variable
|
|
1811
|
+
handle_lookup_var(var_ref, var_type, alias)
|
|
1812
|
+
elif isinstance(var_ref, ir.Literal):
|
|
1813
|
+
# Case: literal value from `cast` relation, e.g. `decimal(0)`
|
|
1814
|
+
vars.append(sql.VarRef(str(self._convert_value(var_ref.value)), alias=alias, type=var_type))
|
|
1815
|
+
else:
|
|
1816
|
+
# Example: We may have `decimal(0)` in QB which turns in IR into:
|
|
1817
|
+
# (cast(Decimal128, 0, vDecimal128) and decimal128(vDecimal128, res_3))
|
|
1818
|
+
# and we need to make it `0` in SQL.
|
|
1819
|
+
var_ref = var_ref.name if isinstance(var_ref, ir.Var) else str(var_ref)
|
|
1820
|
+
vars.append(sql.VarRef(var_ref, alias=alias, type=var_type))
|
|
1821
|
+
elif task:
|
|
1822
|
+
if isinstance(task, ir.Construct):
|
|
1823
|
+
# Generate constructions like hash(`x`, `y`, TABLE_ALIAS.COLUMN_NAME) as `alias`
|
|
1824
|
+
vars.append(sql.VarRef(handle_construct(task), alias=alias, type=var_type))
|
|
1825
|
+
elif isinstance(task, ir.Aggregate):
|
|
1826
|
+
result_arg = task.projection[-1] if task.aggregation == builtins.count else task.args[0]
|
|
1827
|
+
result_arg = resolve_builtin_var(result_arg)
|
|
1828
|
+
ref = reference(result_arg) if isinstance(result_arg, ir.Var) else str(result_arg)
|
|
1829
|
+
vars.append(sql.VarRef(str(ref), alias=alias, type=var_type))
|
|
1830
|
+
elif isinstance(task, ir.Union):
|
|
1831
|
+
# Handle `COALESCE` of all lookups of this var from the union
|
|
1832
|
+
lookups = self._extract_all_lookups_from_union(task)
|
|
1833
|
+
elements = []
|
|
1834
|
+
|
|
1835
|
+
for lu in lookups:
|
|
1836
|
+
if any(isinstance(arg, ir.Var) and arg == var for arg in lu.args):
|
|
1837
|
+
column_name = self._var_name(lu.relation.id, var_column[var, lu])
|
|
1838
|
+
elements.append(f"{sql_vars[lu]}.{column_name}")
|
|
1839
|
+
|
|
1840
|
+
expr = "COALESCE(" + ", ".join(elements) + ")"
|
|
1841
|
+
vars.append(sql.VarRef(expr, alias=alias, type=var_type))
|
|
1842
|
+
else:
|
|
1843
|
+
# TODO - abusing even more here, because var is a value!
|
|
1844
|
+
vars.append(sql.VarRef(str(self._convert_value(var)), alias=alias, type=var_type))
|
|
1845
|
+
return not_null_vars, vars
|
|
1846
|
+
|
|
1847
|
+
def _generate_select_nots(self, nots: Optional[list[ir.Not]], var_lookups: dict[ir.Var, OrderedSet[ir.Lookup]],
|
|
1848
|
+
sql_vars: dict[ir.Lookup, str], var_column:dict[Tuple[ir.Var, ir.Lookup], ir.Field],
|
|
1849
|
+
index: int) -> tuple[list[sql.NotExists], int]:
|
|
1850
|
+
not_exists = []
|
|
1851
|
+
if nots:
|
|
1852
|
+
for not_expr in nots:
|
|
1853
|
+
unions = []
|
|
1854
|
+
inner_nots = []
|
|
1855
|
+
constructs = []
|
|
1856
|
+
if isinstance(not_expr.task, ir.Lookup):
|
|
1857
|
+
all_lookups = [not_expr.task]
|
|
1858
|
+
else:
|
|
1859
|
+
logical = cast(ir.Logical, not_expr.task)
|
|
1860
|
+
all_lookups = cast(list[ir.Lookup], filter_by_type(logical.body, ir.Lookup))
|
|
1861
|
+
logicals = cast(list[ir.Logical], filter_by_type(logical.body, ir.Logical))
|
|
1862
|
+
inner_nots = cast(list[ir.Not], filter_by_type(logical.body, ir.Not))
|
|
1863
|
+
unions = cast(list[ir.Union], filter_by_type(logical.body, ir.Union))
|
|
1864
|
+
constructs = cast(list[ir.Construct], filter_by_type(logical.body, ir.Construct))
|
|
1865
|
+
|
|
1866
|
+
# Some of the lookup relations we wrap into logical and we need to get them out for the SQL compilation.
|
|
1867
|
+
# For example QB `decimal(0)` in IR will look like this:
|
|
1868
|
+
# Logical ^[res]
|
|
1869
|
+
# Exists(vDecimal128)
|
|
1870
|
+
# Logical
|
|
1871
|
+
# cast(Decimal128, 0, vDecimal128)
|
|
1872
|
+
# decimal128(vDecimal128, res)
|
|
1873
|
+
if logicals:
|
|
1874
|
+
unions = self._extract_all_of_type_from_logicals(logicals, ir.Union) + unions
|
|
1875
|
+
all_lookups = self._extract_all_of_type_from_logicals(logicals, ir.Lookup) + all_lookups
|
|
1876
|
+
|
|
1877
|
+
union_lookups: dict[ir.Union, OrderedSet[ir.Lookup]] = self._extract_all_lookups_per_union(unions)
|
|
1878
|
+
all_lookups.extend(list(chain.from_iterable(union_lookups.values())))
|
|
1879
|
+
|
|
1880
|
+
lookups = OrderedSet.from_iterable(t for t in all_lookups if not builtins.is_builtin(t.relation))
|
|
1881
|
+
froms, wheres, not_sql_vars, not_var_column, not_var_lookups = self._extract_lookups_metadata(lookups, index)
|
|
1882
|
+
index += len(not_sql_vars)
|
|
1883
|
+
|
|
1884
|
+
all_sql_vars = {**sql_vars, **not_sql_vars}
|
|
1885
|
+
all_var_column = {**var_column, **not_var_column}
|
|
1886
|
+
all_var_lookups = {**var_lookups, **not_var_lookups}
|
|
1887
|
+
|
|
1888
|
+
var_to_construct = {c.id_var: c for c in constructs} if constructs else {}
|
|
1889
|
+
builtin_lookups = OrderedSet.from_iterable(t for t in all_lookups if builtins.is_builtin(t.relation))
|
|
1890
|
+
builtin_vars, builtin_wheres, builtin_table_expressions = (
|
|
1891
|
+
self._resolve_builtins(builtin_lookups, all_var_lookups, all_var_column, all_sql_vars, var_to_construct))
|
|
1892
|
+
|
|
1893
|
+
froms.extend(self._process_builtin_table_expressions(builtin_table_expressions))
|
|
1894
|
+
|
|
1895
|
+
wheres.extend(builtin_wheres)
|
|
1896
|
+
|
|
1897
|
+
construct_wheres = self._process_constructs(lookups, var_lookups, var_column, sql_vars, builtin_vars,
|
|
1898
|
+
var_to_construct)
|
|
1899
|
+
wheres.extend(construct_wheres)
|
|
1900
|
+
|
|
1901
|
+
# We need to join the not exists select with the outside select query context
|
|
1902
|
+
for arg, lookup_set in not_var_lookups.items():
|
|
1903
|
+
if len(lookup_set) > 0:
|
|
1904
|
+
lu = lookup_set[0]
|
|
1905
|
+
column = not_var_column[cast(ir.Var, arg), lu]
|
|
1906
|
+
column_name = self._var_name(lu.relation.id, column)
|
|
1907
|
+
lhs = f"{not_sql_vars[lu]}.{column_name}"
|
|
1908
|
+
|
|
1909
|
+
# lookup the same var from the outside context to make the join
|
|
1910
|
+
matching_lookup = next(
|
|
1911
|
+
(lookup for (var, lookup) in var_column if var == arg),
|
|
1912
|
+
None
|
|
1913
|
+
)
|
|
1914
|
+
|
|
1915
|
+
if matching_lookup is not None:
|
|
1916
|
+
matching_column = var_column[(arg, matching_lookup)]
|
|
1917
|
+
matching_column_name = self._var_name(matching_lookup.relation.id, matching_column)
|
|
1918
|
+
rhs = f"{sql_vars[matching_lookup]}.{matching_column_name}"
|
|
1919
|
+
wheres.append(sql.Terminal(f"{lhs} = {rhs}"))
|
|
1920
|
+
|
|
1921
|
+
wheres.extend(self._generate_where_clauses(not_var_lookups, not_var_column, not_sql_vars, union_lookups))
|
|
1922
|
+
|
|
1923
|
+
inner_not_exists, index = self._generate_select_nots(inner_nots, not_var_lookups, not_sql_vars, not_var_column, index)
|
|
1924
|
+
wheres.extend(inner_not_exists)
|
|
1925
|
+
|
|
1926
|
+
where = self._process_wheres_clauses(wheres)
|
|
1927
|
+
not_exists.append(sql.NotExists(sql.Select(False, [1], froms, where)))
|
|
1928
|
+
|
|
1929
|
+
return not_exists, index
|
|
1930
|
+
|
|
1931
|
+
def _extract_all_of_type_from_logical(self, task: ir.Logical, target_type: type) -> list:
|
|
1932
|
+
"""Recursively extract all instances of `target_type` from a Logical task."""
|
|
1933
|
+
return self._extract_all_of_type_from_logicals([task], target_type)
|
|
1934
|
+
|
|
1935
|
+
def _extract_all_of_type_from_logicals(self, logicals: list[ir.Logical], target_type: type) -> list:
|
|
1936
|
+
"""Recursively extract all instances of `target_type` from a list of Logical tasks."""
|
|
1937
|
+
result = ordered_set()
|
|
1938
|
+
|
|
1939
|
+
def visit(logical: ir.Logical):
|
|
1940
|
+
for expr in logical.body:
|
|
1941
|
+
if isinstance(expr, ir.Logical):
|
|
1942
|
+
visit(expr)
|
|
1943
|
+
elif isinstance(expr, target_type):
|
|
1944
|
+
result.add(expr)
|
|
1945
|
+
|
|
1946
|
+
for logical in logicals or []:
|
|
1947
|
+
visit(logical)
|
|
1948
|
+
|
|
1949
|
+
return result.list if result.list else []
|
|
1950
|
+
|
|
1951
|
+
def _extract_all_lookups_per_union(self, unions: Optional[list[ir.Union]]) -> dict[ir.Union, OrderedSet[ir.Lookup]]:
|
|
1952
|
+
return {
|
|
1953
|
+
union: self._extract_all_lookups_from_union(union)
|
|
1954
|
+
for union in unions or []
|
|
1955
|
+
}
|
|
1956
|
+
|
|
1957
|
+
def _extract_all_lookups_from_union(self, union: ir.Union) -> OrderedSet[ir.Lookup]:
|
|
1958
|
+
lookups: OrderedSet[ir.Lookup] = OrderedSet()
|
|
1959
|
+
for task in union.tasks:
|
|
1960
|
+
if isinstance(task, ir.Logical):
|
|
1961
|
+
lookups.update(self._extract_all_of_type_from_logicals([task], ir.Lookup))
|
|
1962
|
+
elif isinstance(task, ir.Lookup):
|
|
1963
|
+
lookups.add(cast(ir.Lookup, task))
|
|
1964
|
+
return lookups
|
|
1965
|
+
|
|
1966
|
+
def _extract_all_not_null_vars_from_nots(self, nots: Optional[list[ir.Not]]) -> set[ir.Var]:
|
|
1967
|
+
vars: set[ir.Var] = set()
|
|
1968
|
+
null_vars: set[ir.Var] = set()
|
|
1969
|
+
|
|
1970
|
+
def visit(task):
|
|
1971
|
+
if isinstance(task, ir.Not):
|
|
1972
|
+
visit(task.task)
|
|
1973
|
+
elif isinstance(task, ir.Logical):
|
|
1974
|
+
for var in task.hoisted:
|
|
1975
|
+
if isinstance(var, ir.Var):
|
|
1976
|
+
vars.add(var)
|
|
1977
|
+
elif isinstance(var, ir.Default):
|
|
1978
|
+
(vars if var.value is not None else null_vars).add(var.var)
|
|
1979
|
+
for subtask in task.body:
|
|
1980
|
+
visit(subtask)
|
|
1981
|
+
elif isinstance(task, ir.Lookup):
|
|
1982
|
+
vars.update(arg for arg in task.args if isinstance(arg, ir.Var))
|
|
1983
|
+
|
|
1984
|
+
for not_task in nots or []:
|
|
1985
|
+
visit(not_task)
|
|
1986
|
+
|
|
1987
|
+
return vars - null_vars
|
|
1988
|
+
|
|
1989
|
+
def _var_to_expr(self, var, reference, resolve_builtin_var, var_to_construct: dict[ir.Var, ir.Construct],
|
|
1990
|
+
quote_strings: bool = True):
|
|
1991
|
+
"""
|
|
1992
|
+
Convert a variable to an expression string.
|
|
1993
|
+
"""
|
|
1994
|
+
if isinstance(var, ir.Var) and var in var_to_construct:
|
|
1995
|
+
return self._resolve_construct_var(reference, resolve_builtin_var, var_to_construct, var_to_construct[var])
|
|
1996
|
+
resolved = resolve_builtin_var(var)
|
|
1997
|
+
if isinstance(resolved, ir.Var):
|
|
1998
|
+
return reference(resolved)
|
|
1999
|
+
elif isinstance(resolved, ir.Literal):
|
|
2000
|
+
return str(self._convert_value(resolved, quote_strings=quote_strings))
|
|
2001
|
+
elif isinstance(resolved, int):
|
|
2002
|
+
return str(resolved)
|
|
2003
|
+
else:
|
|
2004
|
+
return str(resolved) if isinstance(var, ir.Var) or not quote_strings else f"'{resolved}'"
|
|
2005
|
+
|
|
2006
|
+
def _get_update_aliases(self, update: ir.Update, var_to_construct, var_to_union, skip_type:bool=False):
|
|
2007
|
+
relation = update.relation
|
|
2008
|
+
return [
|
|
2009
|
+
self._get_alias(
|
|
2010
|
+
self._var_name(relation.id, f),
|
|
2011
|
+
arg,
|
|
2012
|
+
self._convert_type(f.type) if not skip_type else None,
|
|
2013
|
+
var_to_construct,
|
|
2014
|
+
var_to_union,
|
|
2015
|
+
)
|
|
2016
|
+
for f, arg in zip(relation.fields, update.args)
|
|
2017
|
+
]
|
|
2018
|
+
|
|
2019
|
+
def _get_alias(self, key, arg, arg_type, var_to_construct, var_to_union):
|
|
2020
|
+
if not isinstance(arg, ir.Var):
|
|
2021
|
+
return OutputVar(arg, key, arg_type)
|
|
2022
|
+
|
|
2023
|
+
return OutputVar(arg, key, arg_type, var_to_construct.get(arg) or var_to_union.get(arg))
|
|
2024
|
+
|
|
2025
|
+
def _get_tuples(self, logical: ir.Logical, u: ir.Update):
|
|
2026
|
+
"""
|
|
2027
|
+
Get a list of tuples to perform this update.
|
|
2028
|
+
|
|
2029
|
+
This function traverses the update args, assuming they contain only static values or
|
|
2030
|
+
variables bound to a construct task, and generates a list of tuples to insert. There
|
|
2031
|
+
may be multiple tuples because arguments can be lists of values bound to a field
|
|
2032
|
+
whose role is multi.
|
|
2033
|
+
"""
|
|
2034
|
+
# TODO - this only works if the variable is bound to a Construct task, we need a more general approach.
|
|
2035
|
+
|
|
2036
|
+
def find_construct(var):
|
|
2037
|
+
for stmt in logical.body:
|
|
2038
|
+
if isinstance(stmt, ir.Construct) and stmt.id_var == var:
|
|
2039
|
+
return stmt
|
|
2040
|
+
return None
|
|
2041
|
+
|
|
2042
|
+
def resolve_value(arg):
|
|
2043
|
+
if isinstance(arg, ir.Var):
|
|
2044
|
+
construct = find_construct(arg)
|
|
2045
|
+
if not construct:
|
|
2046
|
+
return self._convert_value(arg)
|
|
2047
|
+
|
|
2048
|
+
resolved = []
|
|
2049
|
+
for val in construct.values:
|
|
2050
|
+
if isinstance(val, ir.Var):
|
|
2051
|
+
inner_construct = find_construct(val)
|
|
2052
|
+
if inner_construct:
|
|
2053
|
+
nested = [str(self._convert_value(x)) for x in inner_construct.values]
|
|
2054
|
+
resolved.append(f"hash({', '.join(nested)})")
|
|
2055
|
+
else:
|
|
2056
|
+
resolved.append(str(self._convert_value(val)))
|
|
2057
|
+
else:
|
|
2058
|
+
resolved.append(str(self._convert_value(val)))
|
|
2059
|
+
|
|
2060
|
+
return f"hash({', '.join(resolved)})"
|
|
2061
|
+
elif isinstance(arg, FrozenOrderedSet):
|
|
2062
|
+
return frozen(*[self._convert_value(v) for v in arg])
|
|
2063
|
+
else:
|
|
2064
|
+
return self._convert_value(arg)
|
|
2065
|
+
|
|
2066
|
+
values = [resolve_value(a) for a in u.args]
|
|
2067
|
+
return self._product(values)
|
|
2068
|
+
|
|
2069
|
+
def _product(self, values):
|
|
2070
|
+
""" Compute a cartesian product of values when the value is a FrozenOrderedSet. """
|
|
2071
|
+
# TODO - some pass needs to check that this is correct, i.e. that we are using a
|
|
2072
|
+
# FrozenOrderedSet only if the field is of role multi.
|
|
2073
|
+
tuples = [[]]
|
|
2074
|
+
for value in values:
|
|
2075
|
+
if isinstance(value, FrozenOrderedSet):
|
|
2076
|
+
tuples = [prev + [element] for prev in tuples for element in value]
|
|
2077
|
+
else:
|
|
2078
|
+
tuples = [prev + [value] for prev in tuples]
|
|
2079
|
+
return [tuple(t) for t in tuples]
|
|
2080
|
+
|
|
2081
|
+
def _convert_value(self, v, quote_strings:bool=True) -> str|int:
|
|
2082
|
+
""" Convert the literal value in v to a SQL value."""
|
|
2083
|
+
if isinstance(v, str):
|
|
2084
|
+
return f"'{v}'" if quote_strings else v
|
|
2085
|
+
if isinstance(v, PyDecimal):
|
|
2086
|
+
return str(v)
|
|
2087
|
+
if isinstance(v, ir.ScalarType):
|
|
2088
|
+
return f"'{v.name}'"
|
|
2089
|
+
if isinstance(v, ir.Literal):
|
|
2090
|
+
if v.type == types.Date:
|
|
2091
|
+
return f"cast('{v.value}' as date)"
|
|
2092
|
+
if v.type == types.DateTime:
|
|
2093
|
+
return f"cast('{v.value}' as datetime)"
|
|
2094
|
+
return self._convert_value(v.value, quote_strings)
|
|
2095
|
+
if isinstance(v, float):
|
|
2096
|
+
if math.isnan(v):
|
|
2097
|
+
return "cast('NaN' as DOUBLE)"
|
|
2098
|
+
elif v == float("inf"):
|
|
2099
|
+
return "cast('Infinity' as DOUBLE)"
|
|
2100
|
+
elif v == float("-inf"):
|
|
2101
|
+
return "cast('-Infinity' as DOUBLE)"
|
|
2102
|
+
return str(v)
|
|
2103
|
+
if isinstance(v, datetime.datetime):
|
|
2104
|
+
return f"cast('{v}' as datetime)"
|
|
2105
|
+
if isinstance(v, datetime.date):
|
|
2106
|
+
return f"cast('{v}' as date)"
|
|
2107
|
+
if isinstance(v, bool):
|
|
2108
|
+
return str(v).lower()
|
|
2109
|
+
if isinstance(v, int):
|
|
2110
|
+
return v
|
|
2111
|
+
return str(v)
|
|
2112
|
+
|
|
2113
|
+
COMMON_CONVERSION = {
|
|
2114
|
+
Hash: "DECIMAL(38, 0)",
|
|
2115
|
+
String: "TEXT",
|
|
2116
|
+
Number: "DOUBLE",
|
|
2117
|
+
Bool: "BOOLEAN",
|
|
2118
|
+
Date: "DATE",
|
|
2119
|
+
DateTime: "DATETIME",
|
|
2120
|
+
Float: "FLOAT(53)",
|
|
2121
|
+
RowId: "NUMBER(38, 0)", # NUMBER(38,0) cannot hold the full UInt128 range — it can only go up to about 2¹²⁶. We need to find something better.
|
|
2122
|
+
UInt128: "NUMBER(38, 0)" # NUMBER(38,0) cannot hold the full UInt128 range — it can only go up to about 2¹²⁶. We need to find something better.
|
|
2123
|
+
}
|
|
2124
|
+
SNOWFLAKE_OVERRIDES = {
|
|
2125
|
+
Int64: "NUMBER(19, 0)",
|
|
2126
|
+
Int128: "NUMBER(38, 0)",
|
|
2127
|
+
}
|
|
2128
|
+
DUCKDB_OVERRIDES = {
|
|
2129
|
+
Int64: "BIGINT",
|
|
2130
|
+
Int128: "HUGEINT",
|
|
2131
|
+
}
|
|
2132
|
+
SF_BUILTIN_CONVERSION = {**COMMON_CONVERSION, **SNOWFLAKE_OVERRIDES}
|
|
2133
|
+
DUCKDB_BUILTIN_CONVERSION = {**COMMON_CONVERSION, **DUCKDB_OVERRIDES}
|
|
2134
|
+
def _convert_type(self, t: ir.Type) -> str:
|
|
2135
|
+
""" Convert the type t into the equivalent SQL type."""
|
|
2136
|
+
# entities become DECIMAL(38, 0)
|
|
2137
|
+
if not types.is_builtin(t) and not types.is_value_type(t):
|
|
2138
|
+
return "DECIMAL(38, 0)"
|
|
2139
|
+
|
|
2140
|
+
# convert known builtins
|
|
2141
|
+
base_type = typer.to_base_primitive(t)
|
|
2142
|
+
if isinstance(base_type, ir.ScalarType):
|
|
2143
|
+
if self._is_duck_db and base_type in self.DUCKDB_BUILTIN_CONVERSION:
|
|
2144
|
+
return self.DUCKDB_BUILTIN_CONVERSION[base_type]
|
|
2145
|
+
elif base_type in self.SF_BUILTIN_CONVERSION:
|
|
2146
|
+
return self.SF_BUILTIN_CONVERSION[base_type]
|
|
2147
|
+
if isinstance(base_type, ir.DecimalType):
|
|
2148
|
+
return f"DECIMAL({base_type.precision},{base_type.scale})"
|
|
2149
|
+
raise Exception(f"Unknown built-in type: {t}")
|
|
2150
|
+
|
|
2151
|
+
def _get_relations(self, model: ir.Model) -> Tuple[list[ir.Relation], list[ir.Relation]]:
|
|
2152
|
+
rw = ReadWriteVisitor()
|
|
2153
|
+
model.accept(rw)
|
|
2154
|
+
|
|
2155
|
+
root = cast(ir.Logical, model.root)
|
|
2156
|
+
|
|
2157
|
+
# For query compilation exclude read-only tables because we do not need to declare `CREATE TABLE` statements
|
|
2158
|
+
used_relations = rw.writes(root) if self._query_compilation else rw.writes(root) | rw.reads(root)
|
|
2159
|
+
|
|
2160
|
+
# Filter only relations that require table creation
|
|
2161
|
+
table_relations = [
|
|
2162
|
+
r for r in used_relations
|
|
2163
|
+
if self._is_table_creation_required(r)
|
|
2164
|
+
]
|
|
2165
|
+
|
|
2166
|
+
used_builtins = [
|
|
2167
|
+
r for r in rw.reads(root)
|
|
2168
|
+
if builtins.is_builtin(r)
|
|
2169
|
+
]
|
|
2170
|
+
|
|
2171
|
+
return table_relations, used_builtins
|
|
2172
|
+
|
|
2173
|
+
def _is_table_creation_required(self, r: ir.Relation) -> bool:
|
|
2174
|
+
"""
|
|
2175
|
+
Determine whether the given relation should result in a SQL table creation.
|
|
2176
|
+
|
|
2177
|
+
Skips creation for:
|
|
2178
|
+
- Built-in relations or annotations
|
|
2179
|
+
- CDC relations
|
|
2180
|
+
- Boxed types or special "rank" name
|
|
2181
|
+
- Relations with unresolved field types (types.Any)
|
|
2182
|
+
- ValueType population relations
|
|
2183
|
+
"""
|
|
2184
|
+
if (
|
|
2185
|
+
builtins.is_builtin(r) or
|
|
2186
|
+
builtins.is_annotation(r) or
|
|
2187
|
+
from_cdc_annotation in r.annotations or
|
|
2188
|
+
r.name == "rank" or
|
|
2189
|
+
# TODO: revisit this during `RAI-39124`. For now we filter out all error relations.
|
|
2190
|
+
self._is_error_relation(r)
|
|
2191
|
+
):
|
|
2192
|
+
return False
|
|
2193
|
+
|
|
2194
|
+
if any(relation_field.type == types.Any for relation_field in r.fields):
|
|
2195
|
+
if not r.overloads:
|
|
2196
|
+
raise ValueError(f"Relation '{r.name}' has unresolved field types (`types.Any`) and no overloads.")
|
|
2197
|
+
return False
|
|
2198
|
+
|
|
2199
|
+
return not self._is_value_type_population_relation(r)
|
|
2200
|
+
|
|
2201
|
+
def _is_error_relation(self, r: ir.Relation) -> bool:
|
|
2202
|
+
return r.name in self._error_relation_names or self._relation_name(r).startswith('error_')
|
|
2203
|
+
|
|
2204
|
+
@staticmethod
|
|
2205
|
+
def _is_value_type_population_relation(r: ir.Relation) -> bool:
|
|
2206
|
+
"""
|
|
2207
|
+
Check if the relation is a ValueType population relation:
|
|
2208
|
+
- Has exactly one field
|
|
2209
|
+
- Field type is a value type
|
|
2210
|
+
- Annotated with concept_relation_annotation
|
|
2211
|
+
"""
|
|
2212
|
+
if not r.fields or len(r.fields) != 1:
|
|
2213
|
+
return False
|
|
2214
|
+
return types.is_value_type(r.fields[0].type) and concept_relation_annotation in r.annotations
|
|
2215
|
+
|
|
2216
|
+
def _relation_name(self, relation: ir.Relation):
|
|
2217
|
+
if helpers.is_external(relation) or helpers.builtins.is_builtin(relation):
|
|
2218
|
+
return relation.name
|
|
2219
|
+
return self.relation_name_cache.get_name(relation.id, helpers.sanitize(relation.name), helpers.relation_name_prefix(relation))
|
|
2220
|
+
|
|
2221
|
+
def _register_external_relations(self, model: ir.Model):
|
|
2222
|
+
# force all external relations to get a name in the cache, so that internal relations
|
|
2223
|
+
# cannot use those names in _relation_name
|
|
2224
|
+
for r in model.relations:
|
|
2225
|
+
if helpers.is_external(r):
|
|
2226
|
+
self.relation_name_cache.get_name(r.id, r.name)
|
|
2227
|
+
|
|
2228
|
+
def _get_relation_info(self, relation: ir.Relation) -> RelationInfo:
|
|
2229
|
+
if relation not in self.relation_infos:
|
|
2230
|
+
self.relation_infos[relation] = RelationInfo()
|
|
2231
|
+
return self.relation_infos[relation]
|
|
2232
|
+
|
|
2233
|
+
def mark_used(self, relation: ir.Relation):
|
|
2234
|
+
self._get_relation_info(relation).used = True
|
|
2235
|
+
|
|
2236
|
+
def add_table_select(self, relation: ir.Relation, select: sql.Select):
|
|
2237
|
+
self._get_relation_info(relation).table_selects.append(select)
|
|
2238
|
+
|
|
2239
|
+
def add_view_select(self, relation: ir.Relation, select: sql.Select):
|
|
2240
|
+
self._get_relation_info(relation).view_selects.append(select)
|
|
2241
|
+
|
|
2242
|
+
def add_dynamic_table_select(self, relation: ir.Relation, select: sql.Select):
|
|
2243
|
+
self._get_relation_info(relation).dynamic_table_selects.append(select)
|
|
2244
|
+
|
|
2245
|
+
def _var_name(self, relation_id: int, arg: Union[ir.Var, ir.Field]):
|
|
2246
|
+
name = helpers.sanitize(self.relation_arg_name_cache.get_name((relation_id, arg.id), arg.name))
|
|
2247
|
+
return f'"{name}"' if name.lower() in {"any", "order"} else name
|
|
2248
|
+
|
|
2249
|
+
def _register_relation_args(self, relations: list[ir.Relation]):
|
|
2250
|
+
"""
|
|
2251
|
+
Register all relation arguments in the cache to ensure they have unique names.
|
|
2252
|
+
This is necessary for SQL compilation to avoid name collisions.
|
|
2253
|
+
"""
|
|
2254
|
+
self.relation_arg_name_cache = NameCache()
|
|
2255
|
+
for r in relations:
|
|
2256
|
+
for rel_field in r.fields:
|
|
2257
|
+
self.relation_arg_name_cache.get_name((r.id, rel_field.id), rel_field.name)
|
|
2258
|
+
|
|
2259
|
+
def _sort_builtin_lookups(self, lookups: list[ir.Lookup], output_vars: set[ir.Var]) -> list[ir.Lookup]:
|
|
2260
|
+
# Process lookups with output vars at last because they depend on other builtin lookups.
|
|
2261
|
+
lookups_with_output_vars = [lookup for lookup in lookups if builtins.is_eq(lookup.relation)
|
|
2262
|
+
and any(arg in output_vars for arg in lookup.args)]
|
|
2263
|
+
other_lookups = [lookup for lookup in lookups if lookup not in lookups_with_output_vars]
|
|
2264
|
+
|
|
2265
|
+
sorted_lookups = topological_sort(other_lookups, self._build_builtin_lookups_dependencies(other_lookups))
|
|
2266
|
+
|
|
2267
|
+
return sorted_lookups + lookups_with_output_vars
|
|
2268
|
+
|
|
2269
|
+
@staticmethod
|
|
2270
|
+
def _build_builtin_lookups_dependencies(lookups: list[ir.Lookup]) -> list[Tuple[ir.Lookup, ir.Lookup]]:
|
|
2271
|
+
"""
|
|
2272
|
+
Builds dependency edges for topological_sort:
|
|
2273
|
+
1. Terminal comparisons (neq, gt, lt, gte, lte) come last.
|
|
2274
|
+
2. Conditionals (starts_with, contains, etc.) come after basic lookups but before terminals.
|
|
2275
|
+
3. eq with only constants comes first.
|
|
2276
|
+
4. eq with two vars must wait until one of them is grounded.
|
|
2277
|
+
5. A lookup whose last argument is used non-terminally in another must come first.
|
|
2278
|
+
6. For builtins that take multiple input arguments (like range, concat, substring, etc.),
|
|
2279
|
+
ensure that all non-terminal arguments are processed before the builtin that consumes them.
|
|
2280
|
+
"""
|
|
2281
|
+
|
|
2282
|
+
edges = []
|
|
2283
|
+
arg_usages = defaultdict(list) # arg -> List[(lookup, position)]
|
|
2284
|
+
|
|
2285
|
+
terminal_relations = {
|
|
2286
|
+
builtins.neq.name, builtins.gt.name, builtins.lt.name, builtins.gte.name, builtins.lte.name
|
|
2287
|
+
}
|
|
2288
|
+
|
|
2289
|
+
conditional_relations = {
|
|
2290
|
+
builtins.starts_with.name, builtins.ends_with.name, builtins.contains.name, builtins.like_match.name
|
|
2291
|
+
}
|
|
2292
|
+
|
|
2293
|
+
# Step 1: Collect argument usage positions
|
|
2294
|
+
for lookup in lookups:
|
|
2295
|
+
for idx, arg in enumerate(lookup.args):
|
|
2296
|
+
arg_usages[arg].append((lookup, idx))
|
|
2297
|
+
|
|
2298
|
+
# Step 2: Add edges based on lookup semantics
|
|
2299
|
+
for lookup in lookups:
|
|
2300
|
+
relation_name = lookup.relation.name
|
|
2301
|
+
args = lookup.args
|
|
2302
|
+
|
|
2303
|
+
# Rule 1: Terminal relations depend on everything else
|
|
2304
|
+
if relation_name in terminal_relations:
|
|
2305
|
+
for other in lookups:
|
|
2306
|
+
other_name = other.relation.name
|
|
2307
|
+
if other is not lookup and other_name not in terminal_relations:
|
|
2308
|
+
edges.append((other, lookup))
|
|
2309
|
+
continue # skip rest of rules for terminal lookups
|
|
2310
|
+
|
|
2311
|
+
# Rule 2: Conditional relations go before terminals, but after others
|
|
2312
|
+
if relation_name in conditional_relations:
|
|
2313
|
+
for other in lookups:
|
|
2314
|
+
if other is not lookup:
|
|
2315
|
+
other_name = other.relation.name
|
|
2316
|
+
if other_name not in terminal_relations and other_name not in conditional_relations:
|
|
2317
|
+
edges.append((other, lookup)) # only non-conditional, non-terminal
|
|
2318
|
+
continue
|
|
2319
|
+
|
|
2320
|
+
if relation_name == builtins.eq.name:
|
|
2321
|
+
var_args = [arg for arg in args if isinstance(arg, ir.Var)]
|
|
2322
|
+
|
|
2323
|
+
# Rule 3: eq with only constants comes first
|
|
2324
|
+
if len(var_args) == 1:
|
|
2325
|
+
# This lookup defines a var — should come before any that use this var non-terminally
|
|
2326
|
+
grounded_var = var_args[0]
|
|
2327
|
+
for other, pos in arg_usages[grounded_var]:
|
|
2328
|
+
if other is not lookup:
|
|
2329
|
+
if pos != len(other.args) - 1:
|
|
2330
|
+
edges.append((lookup, other))
|
|
2331
|
+
continue # skip adding other edges among terminal assignments like a=2, b=2
|
|
2332
|
+
|
|
2333
|
+
# Rule 4: eq with two vars must wait until one of them is grounded
|
|
2334
|
+
elif len(var_args) == 2:
|
|
2335
|
+
# eq(x, y): both are vars — lookup must come after those grounding either var
|
|
2336
|
+
for var in var_args:
|
|
2337
|
+
for other, pos in arg_usages[var]:
|
|
2338
|
+
if other is not lookup:
|
|
2339
|
+
if other.args[-1] == var:
|
|
2340
|
+
edges.append((other, lookup))
|
|
2341
|
+
continue
|
|
2342
|
+
|
|
2343
|
+
# In generate builtins has a single output var but `split` returns `index` and `part`
|
|
2344
|
+
num_outputs = 2 if lookup.relation == builtins.split else 1
|
|
2345
|
+
|
|
2346
|
+
# Rule 5: last output args must go first if used elsewhere non-terminally
|
|
2347
|
+
for out_arg in args[-num_outputs:]:
|
|
2348
|
+
for other, pos in arg_usages.get(out_arg, []):
|
|
2349
|
+
if other is not lookup and pos != len(other.args) - 1:
|
|
2350
|
+
edges.append((lookup, other))
|
|
2351
|
+
|
|
2352
|
+
# Rule 6: builtins with multiple input args must wait until all input args are grounded,
|
|
2353
|
+
# for example, range(start, end, step, result)
|
|
2354
|
+
if len(args) > num_outputs:
|
|
2355
|
+
for input_arg in args[:-num_outputs]:
|
|
2356
|
+
for other, pos in arg_usages.get(input_arg, []):
|
|
2357
|
+
if other is not lookup:
|
|
2358
|
+
other_name = other.relation.name
|
|
2359
|
+
if other_name not in terminal_relations and other_name not in conditional_relations:
|
|
2360
|
+
# Ensure any lookup that defines this arg (as last) comes before
|
|
2361
|
+
if other.args[-1] == input_arg:
|
|
2362
|
+
edges.append((other, lookup))
|
|
2363
|
+
|
|
2364
|
+
return edges
|
|
2365
|
+
|
|
2366
|
+
def _union_output_selects(self, statements: list[sql.Node]) -> list[sql.Node]:
|
|
2367
|
+
"""Group consecutive sql.Select nodes into a single sql.UnionAllSelect if there is multiple."""
|
|
2368
|
+
result: list[sql.Node] = []
|
|
2369
|
+
selects: list[sql.Select] = []
|
|
2370
|
+
|
|
2371
|
+
for statement in statements:
|
|
2372
|
+
if isinstance(statement, sql.Select):
|
|
2373
|
+
selects.append(statement)
|
|
2374
|
+
else:
|
|
2375
|
+
result.append(statement)
|
|
2376
|
+
|
|
2377
|
+
if selects:
|
|
2378
|
+
if len(selects) > 1:
|
|
2379
|
+
result.append(sql.UnionAllSelect(selects))
|
|
2380
|
+
else:
|
|
2381
|
+
result.extend(selects)
|
|
2382
|
+
|
|
2383
|
+
return result
|
|
2384
|
+
|
|
2385
|
+
def _sort_dependencies(self, statements: list[sql.Node]) -> list[sql.Node]:
|
|
2386
|
+
"""
|
|
2387
|
+
Sorts SQL statements to ensure proper execution order:
|
|
2388
|
+
1. CREATE TABLE statements
|
|
2389
|
+
2. INSERT statements and CREATE VIEW (topologically sorted by dependencies)
|
|
2390
|
+
3. UPDATE statements
|
|
2391
|
+
3. Other statements except SELECT queries
|
|
2392
|
+
4. SELECT queries
|
|
2393
|
+
"""
|
|
2394
|
+
udfs = []
|
|
2395
|
+
create_tables = []
|
|
2396
|
+
need_sort: dict[str, list[Union[sql.Insert, sql.CreateView, sql.CreateDynamicTable]]] = defaultdict(list)
|
|
2397
|
+
updates = []
|
|
2398
|
+
miscellaneous_statements = []
|
|
2399
|
+
selects = []
|
|
2400
|
+
|
|
2401
|
+
for statement in statements:
|
|
2402
|
+
if isinstance(statement, sql.CreateTable):
|
|
2403
|
+
create_tables.append(statement)
|
|
2404
|
+
elif isinstance(statement, sql.Insert):
|
|
2405
|
+
need_sort[statement.table].append(statement)
|
|
2406
|
+
elif isinstance(statement, sql.CreateView):
|
|
2407
|
+
need_sort[statement.name].append(statement)
|
|
2408
|
+
elif isinstance(statement, sql.CreateDynamicTable):
|
|
2409
|
+
need_sort[statement.name].append(statement)
|
|
2410
|
+
elif isinstance(statement, sql.Update):
|
|
2411
|
+
updates.append(statement)
|
|
2412
|
+
elif isinstance(statement, sql.Select):
|
|
2413
|
+
selects.append(statement)
|
|
2414
|
+
elif isinstance(statement, sql.CreateFunction):
|
|
2415
|
+
udfs.append(statement)
|
|
2416
|
+
else:
|
|
2417
|
+
miscellaneous_statements.append(statement)
|
|
2418
|
+
|
|
2419
|
+
sorted_statements = self._sort_statements_dependency_graph(need_sort)
|
|
2420
|
+
|
|
2421
|
+
return udfs + create_tables + sorted_statements + updates + miscellaneous_statements + selects
|
|
2422
|
+
|
|
2423
|
+
@staticmethod
|
|
2424
|
+
def _sort_statements_dependency_graph(statements: dict[str, list[Union[sql.Insert, sql.CreateView, sql.CreateDynamicTable]]]) -> list[sql.Insert]:
|
|
2425
|
+
""" Topologic sort INSERT and CREATE VIEW statements based on dependencies in their SELECT FROM clauses. """
|
|
2426
|
+
edges = ordered_set()
|
|
2427
|
+
nodes = OrderedSet.from_iterable(statements.keys())
|
|
2428
|
+
|
|
2429
|
+
def extract_dependencies(select: Optional[sql.Select], target_table: str):
|
|
2430
|
+
"""Recursively extract dependency edges from FROM, JOIN, and WHERE clauses."""
|
|
2431
|
+
if not select:
|
|
2432
|
+
return
|
|
2433
|
+
|
|
2434
|
+
def register_dependency(source_table: str):
|
|
2435
|
+
edges.add((source_table, target_table))
|
|
2436
|
+
nodes.add(source_table)
|
|
2437
|
+
|
|
2438
|
+
# Process FROM clause
|
|
2439
|
+
if select.froms:
|
|
2440
|
+
if isinstance(select.froms, sql.Select): # Single sub-select
|
|
2441
|
+
extract_dependencies(select.froms, target_table)
|
|
2442
|
+
else:
|
|
2443
|
+
for from_clause in select.froms:
|
|
2444
|
+
register_dependency(from_clause.table)
|
|
2445
|
+
|
|
2446
|
+
# Process JOIN clause
|
|
2447
|
+
if select.joins:
|
|
2448
|
+
for join in select.joins:
|
|
2449
|
+
register_dependency(join.table)
|
|
2450
|
+
|
|
2451
|
+
# Process WHERE clause recursively
|
|
2452
|
+
def _extract_from_expr(expr: sql.Expr):
|
|
2453
|
+
if isinstance(expr, sql.NotExists):
|
|
2454
|
+
extract_dependencies(expr.expr, target_table)
|
|
2455
|
+
elif isinstance(expr, (sql.And, sql.Or)):
|
|
2456
|
+
for sub_expr in expr.expr:
|
|
2457
|
+
_extract_from_expr(sub_expr)
|
|
2458
|
+
|
|
2459
|
+
if select.where and select.where.expression:
|
|
2460
|
+
_extract_from_expr(select.where.expression)
|
|
2461
|
+
|
|
2462
|
+
for target_table, table_statements in statements.items():
|
|
2463
|
+
for statement in table_statements:
|
|
2464
|
+
if statement.query:
|
|
2465
|
+
query = statement.query
|
|
2466
|
+
if isinstance(query, list):
|
|
2467
|
+
for sub_query in query:
|
|
2468
|
+
extract_dependencies(sub_query, target_table)
|
|
2469
|
+
elif isinstance(query, sql.Select):
|
|
2470
|
+
extract_dependencies(query, target_table)
|
|
2471
|
+
elif isinstance(query, sql.CTE):
|
|
2472
|
+
for select in query.selects:
|
|
2473
|
+
extract_dependencies(select, target_table)
|
|
2474
|
+
|
|
2475
|
+
sorted_tables = topological_sort(list(nodes), list(edges))
|
|
2476
|
+
|
|
2477
|
+
sorted_statements = []
|
|
2478
|
+
for table in sorted_tables:
|
|
2479
|
+
if table in statements:
|
|
2480
|
+
sorted_statements.extend(statements.get(table, []))
|
|
2481
|
+
|
|
2482
|
+
return sorted_statements
|
|
2483
|
+
|
|
2484
|
+
class RecursiveLookupsRewriter(v.Rewriter):
|
|
2485
|
+
def __init__(self, recursive_relation: ir.Relation, new_recursive_relation: ir.Relation):
|
|
2486
|
+
super().__init__()
|
|
2487
|
+
self._recursive_relation:ir.Relation = recursive_relation
|
|
2488
|
+
self._new_recursive_relation:ir.Relation = new_recursive_relation
|
|
2489
|
+
|
|
2490
|
+
def handle_lookup(self, node: ir.Lookup, parent: ir.Node):
|
|
2491
|
+
if node.relation == self._recursive_relation:
|
|
2492
|
+
return node.reconstruct(node.engine, self._new_recursive_relation, node.args, node.annotations)
|
|
2493
|
+
return node
|
|
2494
|
+
|
|
2495
|
+
class DerivedRelationsVisitor(v.Visitor):
|
|
2496
|
+
_is_derived: bool = True
|
|
2497
|
+
|
|
2498
|
+
def is_derived(self) -> bool:
|
|
2499
|
+
return self._is_derived
|
|
2500
|
+
|
|
2501
|
+
def visit_relation(self, node: ir.Relation, parent: Optional[ir.Node]):
|
|
2502
|
+
if self._is_derived and from_cdc_annotation in node.annotations:
|
|
2503
|
+
self._is_derived = False
|