dvt-core 0.58.6__cp311-cp311-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt/__init__.py +7 -0
- dbt/_pydantic_shim.py +26 -0
- dbt/artifacts/__init__.py +0 -0
- dbt/artifacts/exceptions/__init__.py +1 -0
- dbt/artifacts/exceptions/schemas.py +31 -0
- dbt/artifacts/resources/__init__.py +116 -0
- dbt/artifacts/resources/base.py +67 -0
- dbt/artifacts/resources/types.py +93 -0
- dbt/artifacts/resources/v1/analysis.py +10 -0
- dbt/artifacts/resources/v1/catalog.py +23 -0
- dbt/artifacts/resources/v1/components.py +274 -0
- dbt/artifacts/resources/v1/config.py +277 -0
- dbt/artifacts/resources/v1/documentation.py +11 -0
- dbt/artifacts/resources/v1/exposure.py +51 -0
- dbt/artifacts/resources/v1/function.py +52 -0
- dbt/artifacts/resources/v1/generic_test.py +31 -0
- dbt/artifacts/resources/v1/group.py +21 -0
- dbt/artifacts/resources/v1/hook.py +11 -0
- dbt/artifacts/resources/v1/macro.py +29 -0
- dbt/artifacts/resources/v1/metric.py +172 -0
- dbt/artifacts/resources/v1/model.py +145 -0
- dbt/artifacts/resources/v1/owner.py +10 -0
- dbt/artifacts/resources/v1/saved_query.py +111 -0
- dbt/artifacts/resources/v1/seed.py +41 -0
- dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dbt/artifacts/resources/v1/semantic_model.py +314 -0
- dbt/artifacts/resources/v1/singular_test.py +14 -0
- dbt/artifacts/resources/v1/snapshot.py +91 -0
- dbt/artifacts/resources/v1/source_definition.py +84 -0
- dbt/artifacts/resources/v1/sql_operation.py +10 -0
- dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
- dbt/artifacts/schemas/__init__.py +0 -0
- dbt/artifacts/schemas/base.py +191 -0
- dbt/artifacts/schemas/batch_results.py +24 -0
- dbt/artifacts/schemas/catalog/__init__.py +11 -0
- dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
- dbt/artifacts/schemas/freshness/__init__.py +1 -0
- dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
- dbt/artifacts/schemas/manifest/__init__.py +2 -0
- dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
- dbt/artifacts/schemas/results.py +147 -0
- dbt/artifacts/schemas/run/__init__.py +2 -0
- dbt/artifacts/schemas/run/v5/__init__.py +0 -0
- dbt/artifacts/schemas/run/v5/run.py +184 -0
- dbt/artifacts/schemas/upgrades/__init__.py +4 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dbt/artifacts/utils/validation.py +153 -0
- dbt/cli/__init__.py +1 -0
- dbt/cli/context.py +17 -0
- dbt/cli/exceptions.py +57 -0
- dbt/cli/flags.py +560 -0
- dbt/cli/main.py +2403 -0
- dbt/cli/option_types.py +121 -0
- dbt/cli/options.py +80 -0
- dbt/cli/params.py +844 -0
- dbt/cli/requires.py +490 -0
- dbt/cli/resolvers.py +50 -0
- dbt/cli/types.py +40 -0
- dbt/clients/__init__.py +0 -0
- dbt/clients/checked_load.py +83 -0
- dbt/clients/git.py +164 -0
- dbt/clients/jinja.py +206 -0
- dbt/clients/jinja_static.py +245 -0
- dbt/clients/registry.py +192 -0
- dbt/clients/yaml_helper.py +68 -0
- dbt/compilation.py +876 -0
- dbt/compute/__init__.py +14 -0
- dbt/compute/engines/__init__.py +12 -0
- dbt/compute/engines/spark_engine.cpython-311-darwin.so +0 -0
- dbt/compute/engines/spark_engine.py +642 -0
- dbt/compute/federated_executor.cpython-311-darwin.so +0 -0
- dbt/compute/federated_executor.py +1080 -0
- dbt/compute/filter_pushdown.cpython-311-darwin.so +0 -0
- dbt/compute/filter_pushdown.py +273 -0
- dbt/compute/jar_provisioning.cpython-311-darwin.so +0 -0
- dbt/compute/jar_provisioning.py +255 -0
- dbt/compute/java_compat.cpython-311-darwin.so +0 -0
- dbt/compute/java_compat.py +689 -0
- dbt/compute/jdbc_utils.cpython-311-darwin.so +0 -0
- dbt/compute/jdbc_utils.py +678 -0
- dbt/compute/metadata/__init__.py +40 -0
- dbt/compute/metadata/adapters_registry.cpython-311-darwin.so +0 -0
- dbt/compute/metadata/adapters_registry.py +370 -0
- dbt/compute/metadata/registry.cpython-311-darwin.so +0 -0
- dbt/compute/metadata/registry.py +674 -0
- dbt/compute/metadata/store.cpython-311-darwin.so +0 -0
- dbt/compute/metadata/store.py +1499 -0
- dbt/compute/smart_selector.cpython-311-darwin.so +0 -0
- dbt/compute/smart_selector.py +377 -0
- dbt/compute/strategies/__init__.py +55 -0
- dbt/compute/strategies/base.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/base.py +165 -0
- dbt/compute/strategies/dataproc.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/dataproc.py +207 -0
- dbt/compute/strategies/emr.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/emr.py +203 -0
- dbt/compute/strategies/local.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/local.py +443 -0
- dbt/compute/strategies/standalone.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/standalone.py +262 -0
- dbt/config/__init__.py +4 -0
- dbt/config/catalogs.py +94 -0
- dbt/config/compute.cpython-311-darwin.so +0 -0
- dbt/config/compute.py +513 -0
- dbt/config/dvt_profile.cpython-311-darwin.so +0 -0
- dbt/config/dvt_profile.py +342 -0
- dbt/config/profile.py +422 -0
- dbt/config/project.py +873 -0
- dbt/config/project_utils.py +28 -0
- dbt/config/renderer.py +231 -0
- dbt/config/runtime.py +553 -0
- dbt/config/selectors.py +208 -0
- dbt/config/utils.py +77 -0
- dbt/constants.py +28 -0
- dbt/context/__init__.py +0 -0
- dbt/context/base.py +745 -0
- dbt/context/configured.py +135 -0
- dbt/context/context_config.py +382 -0
- dbt/context/docs.py +82 -0
- dbt/context/exceptions_jinja.py +178 -0
- dbt/context/macro_resolver.py +195 -0
- dbt/context/macros.py +171 -0
- dbt/context/manifest.py +72 -0
- dbt/context/providers.py +2249 -0
- dbt/context/query_header.py +13 -0
- dbt/context/secret.py +58 -0
- dbt/context/target.py +74 -0
- dbt/contracts/__init__.py +0 -0
- dbt/contracts/files.py +413 -0
- dbt/contracts/graph/__init__.py +0 -0
- dbt/contracts/graph/manifest.py +1904 -0
- dbt/contracts/graph/metrics.py +97 -0
- dbt/contracts/graph/model_config.py +70 -0
- dbt/contracts/graph/node_args.py +42 -0
- dbt/contracts/graph/nodes.py +1806 -0
- dbt/contracts/graph/semantic_manifest.py +232 -0
- dbt/contracts/graph/unparsed.py +811 -0
- dbt/contracts/project.py +417 -0
- dbt/contracts/results.py +53 -0
- dbt/contracts/selection.py +23 -0
- dbt/contracts/sql.py +85 -0
- dbt/contracts/state.py +68 -0
- dbt/contracts/util.py +46 -0
- dbt/deprecations.py +348 -0
- dbt/deps/__init__.py +0 -0
- dbt/deps/base.py +152 -0
- dbt/deps/git.py +195 -0
- dbt/deps/local.py +79 -0
- dbt/deps/registry.py +130 -0
- dbt/deps/resolver.py +149 -0
- dbt/deps/tarball.py +120 -0
- dbt/docs/source/_ext/dbt_click.py +119 -0
- dbt/docs/source/conf.py +32 -0
- dbt/env_vars.py +64 -0
- dbt/event_time/event_time.py +40 -0
- dbt/event_time/sample_window.py +60 -0
- dbt/events/__init__.py +15 -0
- dbt/events/base_types.py +36 -0
- dbt/events/core_types_pb2.py +2 -0
- dbt/events/logging.py +108 -0
- dbt/events/types.py +2516 -0
- dbt/exceptions.py +1486 -0
- dbt/flags.py +89 -0
- dbt/graph/__init__.py +11 -0
- dbt/graph/cli.py +249 -0
- dbt/graph/graph.py +172 -0
- dbt/graph/queue.py +214 -0
- dbt/graph/selector.py +374 -0
- dbt/graph/selector_methods.py +975 -0
- dbt/graph/selector_spec.py +222 -0
- dbt/graph/thread_pool.py +18 -0
- dbt/hooks.py +21 -0
- dbt/include/README.md +49 -0
- dbt/include/__init__.py +3 -0
- dbt/include/data/adapters_registry.duckdb +0 -0
- dbt/include/data/build_registry.py +242 -0
- dbt/include/data/csv/adapter_queries.csv +33 -0
- dbt/include/data/csv/syntax_rules.csv +9 -0
- dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
- dbt/include/data/csv/type_mappings_databricks.csv +30 -0
- dbt/include/data/csv/type_mappings_mysql.csv +40 -0
- dbt/include/data/csv/type_mappings_oracle.csv +30 -0
- dbt/include/data/csv/type_mappings_postgres.csv +56 -0
- dbt/include/data/csv/type_mappings_redshift.csv +33 -0
- dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
- dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
- dbt/include/starter_project/.gitignore +4 -0
- dbt/include/starter_project/README.md +15 -0
- dbt/include/starter_project/__init__.py +3 -0
- dbt/include/starter_project/analyses/.gitkeep +0 -0
- dbt/include/starter_project/dbt_project.yml +36 -0
- dbt/include/starter_project/macros/.gitkeep +0 -0
- dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
- dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
- dbt/include/starter_project/models/example/schema.yml +21 -0
- dbt/include/starter_project/seeds/.gitkeep +0 -0
- dbt/include/starter_project/snapshots/.gitkeep +0 -0
- dbt/include/starter_project/tests/.gitkeep +0 -0
- dbt/internal_deprecations.py +26 -0
- dbt/jsonschemas/__init__.py +3 -0
- dbt/jsonschemas/jsonschemas.py +309 -0
- dbt/jsonschemas/project/0.0.110.json +4717 -0
- dbt/jsonschemas/project/0.0.85.json +2015 -0
- dbt/jsonschemas/resources/0.0.110.json +2636 -0
- dbt/jsonschemas/resources/0.0.85.json +2536 -0
- dbt/jsonschemas/resources/latest.json +6773 -0
- dbt/links.py +4 -0
- dbt/materializations/__init__.py +0 -0
- dbt/materializations/incremental/__init__.py +0 -0
- dbt/materializations/incremental/microbatch.py +236 -0
- dbt/mp_context.py +8 -0
- dbt/node_types.py +37 -0
- dbt/parser/__init__.py +23 -0
- dbt/parser/analysis.py +21 -0
- dbt/parser/base.py +548 -0
- dbt/parser/common.py +266 -0
- dbt/parser/docs.py +52 -0
- dbt/parser/fixtures.py +51 -0
- dbt/parser/functions.py +30 -0
- dbt/parser/generic_test.py +100 -0
- dbt/parser/generic_test_builders.py +333 -0
- dbt/parser/hooks.py +118 -0
- dbt/parser/macros.py +137 -0
- dbt/parser/manifest.py +2204 -0
- dbt/parser/models.py +573 -0
- dbt/parser/partial.py +1178 -0
- dbt/parser/read_files.py +445 -0
- dbt/parser/schema_generic_tests.py +422 -0
- dbt/parser/schema_renderer.py +111 -0
- dbt/parser/schema_yaml_readers.py +935 -0
- dbt/parser/schemas.py +1466 -0
- dbt/parser/search.py +149 -0
- dbt/parser/seeds.py +28 -0
- dbt/parser/singular_test.py +20 -0
- dbt/parser/snapshots.py +44 -0
- dbt/parser/sources.py +558 -0
- dbt/parser/sql.py +62 -0
- dbt/parser/unit_tests.py +621 -0
- dbt/plugins/__init__.py +20 -0
- dbt/plugins/contracts.py +9 -0
- dbt/plugins/exceptions.py +2 -0
- dbt/plugins/manager.py +163 -0
- dbt/plugins/manifest.py +21 -0
- dbt/profiler.py +20 -0
- dbt/py.typed +1 -0
- dbt/query_analyzer.cpython-311-darwin.so +0 -0
- dbt/query_analyzer.py +410 -0
- dbt/runners/__init__.py +2 -0
- dbt/runners/exposure_runner.py +7 -0
- dbt/runners/no_op_runner.py +45 -0
- dbt/runners/saved_query_runner.py +7 -0
- dbt/selected_resources.py +8 -0
- dbt/task/__init__.py +0 -0
- dbt/task/base.py +503 -0
- dbt/task/build.py +197 -0
- dbt/task/clean.py +56 -0
- dbt/task/clone.py +161 -0
- dbt/task/compile.py +150 -0
- dbt/task/compute.cpython-311-darwin.so +0 -0
- dbt/task/compute.py +458 -0
- dbt/task/debug.py +505 -0
- dbt/task/deps.py +280 -0
- dbt/task/docs/__init__.py +3 -0
- dbt/task/docs/api/__init__.py +23 -0
- dbt/task/docs/api/catalog.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/catalog.py +204 -0
- dbt/task/docs/api/lineage.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/lineage.py +234 -0
- dbt/task/docs/api/profile.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/profile.py +204 -0
- dbt/task/docs/api/spark.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/spark.py +186 -0
- dbt/task/docs/generate.py +947 -0
- dbt/task/docs/index.html +250 -0
- dbt/task/docs/serve.cpython-311-darwin.so +0 -0
- dbt/task/docs/serve.py +174 -0
- dbt/task/dvt_output.py +362 -0
- dbt/task/dvt_run.py +204 -0
- dbt/task/freshness.py +322 -0
- dbt/task/function.py +121 -0
- dbt/task/group_lookup.py +46 -0
- dbt/task/init.cpython-311-darwin.so +0 -0
- dbt/task/init.py +604 -0
- dbt/task/java.cpython-311-darwin.so +0 -0
- dbt/task/java.py +316 -0
- dbt/task/list.py +236 -0
- dbt/task/metadata.cpython-311-darwin.so +0 -0
- dbt/task/metadata.py +804 -0
- dbt/task/printer.py +175 -0
- dbt/task/profile.cpython-311-darwin.so +0 -0
- dbt/task/profile.py +1307 -0
- dbt/task/profile_serve.py +615 -0
- dbt/task/retract.py +438 -0
- dbt/task/retry.py +175 -0
- dbt/task/run.py +1387 -0
- dbt/task/run_operation.py +141 -0
- dbt/task/runnable.py +758 -0
- dbt/task/seed.py +103 -0
- dbt/task/show.py +149 -0
- dbt/task/snapshot.py +56 -0
- dbt/task/spark.cpython-311-darwin.so +0 -0
- dbt/task/spark.py +414 -0
- dbt/task/sql.py +110 -0
- dbt/task/target_sync.cpython-311-darwin.so +0 -0
- dbt/task/target_sync.py +766 -0
- dbt/task/test.py +464 -0
- dbt/tests/fixtures/__init__.py +1 -0
- dbt/tests/fixtures/project.py +620 -0
- dbt/tests/util.py +651 -0
- dbt/tracking.py +529 -0
- dbt/utils/__init__.py +3 -0
- dbt/utils/artifact_upload.py +151 -0
- dbt/utils/utils.py +408 -0
- dbt/version.py +270 -0
- dvt_cli/__init__.py +72 -0
- dvt_core-0.58.6.dist-info/METADATA +288 -0
- dvt_core-0.58.6.dist-info/RECORD +324 -0
- dvt_core-0.58.6.dist-info/WHEEL +5 -0
- dvt_core-0.58.6.dist-info/entry_points.txt +2 -0
- dvt_core-0.58.6.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# DVT Metadata Layer
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Project-level metadata store using DuckDB for:
|
|
5
|
+
# - Type registry (adapter types → Spark types)
|
|
6
|
+
# - Syntax registry (quoting, case sensitivity per adapter)
|
|
7
|
+
# - Metadata snapshot (cached table/column info)
|
|
8
|
+
# - Profile results (v0.56.0 - dvt profile)
|
|
9
|
+
# - Catalog nodes (v0.56.0 - dvt docs generate)
|
|
10
|
+
# - Lineage edges (v0.56.0 - dvt docs generate)
|
|
11
|
+
#
|
|
12
|
+
# DVT v0.54.0: Initial implementation
|
|
13
|
+
# DVT v0.55.0: Added AdaptersRegistry for shipped registry database
|
|
14
|
+
# DVT v0.56.0: Added profile_results, catalog_nodes, lineage_edges tables
|
|
15
|
+
# =============================================================================
|
|
16
|
+
|
|
17
|
+
from dbt.compute.metadata.store import (
|
|
18
|
+
ProjectMetadataStore,
|
|
19
|
+
ColumnMetadata,
|
|
20
|
+
TableMetadata,
|
|
21
|
+
RowCountInfo,
|
|
22
|
+
ColumnProfileResult,
|
|
23
|
+
CatalogNode,
|
|
24
|
+
LineageEdge,
|
|
25
|
+
)
|
|
26
|
+
from dbt.compute.metadata.registry import TypeRegistry, SyntaxRegistry
|
|
27
|
+
from dbt.compute.metadata.adapters_registry import AdaptersRegistry
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"ProjectMetadataStore",
|
|
31
|
+
"ColumnMetadata",
|
|
32
|
+
"TableMetadata",
|
|
33
|
+
"RowCountInfo",
|
|
34
|
+
"ColumnProfileResult",
|
|
35
|
+
"CatalogNode",
|
|
36
|
+
"LineageEdge",
|
|
37
|
+
"TypeRegistry",
|
|
38
|
+
"SyntaxRegistry",
|
|
39
|
+
"AdaptersRegistry",
|
|
40
|
+
]
|
|
Binary file
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# DVT Adapters Registry
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Read-only access to the shipped adapters_registry.duckdb database containing:
|
|
5
|
+
# - Type mappings (adapter -> Spark types)
|
|
6
|
+
# - Syntax rules (quoting, case sensitivity)
|
|
7
|
+
# - Adapter metadata queries (SQL templates)
|
|
8
|
+
#
|
|
9
|
+
# DVT v0.54.0: DuckDB-backed registry
|
|
10
|
+
# =============================================================================
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
import re
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
import duckdb
|
|
19
|
+
except ImportError:
|
|
20
|
+
duckdb = None # Will raise helpful error on first use
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class TypeMapping:
|
|
25
|
+
"""A single type mapping entry."""
|
|
26
|
+
adapter_name: str
|
|
27
|
+
adapter_type: str
|
|
28
|
+
spark_type: str
|
|
29
|
+
spark_version: str = "all"
|
|
30
|
+
is_complex: bool = False
|
|
31
|
+
cast_expression: Optional[str] = None
|
|
32
|
+
notes: Optional[str] = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class SyntaxRule:
|
|
37
|
+
"""Syntax rules for a specific adapter."""
|
|
38
|
+
adapter_name: str
|
|
39
|
+
quote_start: str
|
|
40
|
+
quote_end: str
|
|
41
|
+
case_sensitivity: str # 'lowercase', 'uppercase', 'case_insensitive'
|
|
42
|
+
reserved_keywords: List[str]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class AdapterQuery:
|
|
47
|
+
"""SQL query template for adapter metadata extraction."""
|
|
48
|
+
adapter_name: str
|
|
49
|
+
query_type: str # 'columns', 'tables', 'row_count', 'primary_key'
|
|
50
|
+
query_template: str
|
|
51
|
+
notes: Optional[str] = None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class AdaptersRegistry:
|
|
55
|
+
"""
|
|
56
|
+
Read-only access to the shipped adapters registry database.
|
|
57
|
+
|
|
58
|
+
This registry is shipped with DVT and provides:
|
|
59
|
+
- Type mappings between adapter native types and Spark types
|
|
60
|
+
- Syntax rules for SQL generation (quoting, case sensitivity)
|
|
61
|
+
- Query templates for metadata extraction
|
|
62
|
+
|
|
63
|
+
The registry is stored as a DuckDB database in the package's include/data directory.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
_instance: Optional['AdaptersRegistry'] = None
|
|
67
|
+
_registry_path: Optional[Path] = None
|
|
68
|
+
|
|
69
|
+
def __new__(cls) -> 'AdaptersRegistry':
|
|
70
|
+
"""Singleton pattern for registry access."""
|
|
71
|
+
if cls._instance is None:
|
|
72
|
+
cls._instance = super().__new__(cls)
|
|
73
|
+
cls._instance._conn = None
|
|
74
|
+
return cls._instance
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def get_registry_path(cls) -> Path:
|
|
78
|
+
"""Return path to the shipped adapters_registry.duckdb."""
|
|
79
|
+
if cls._registry_path is not None:
|
|
80
|
+
return cls._registry_path
|
|
81
|
+
|
|
82
|
+
# Find the package's include/data directory
|
|
83
|
+
# This file is at: core/dbt/compute/metadata/adapters_registry.py
|
|
84
|
+
# Registry is at: core/dbt/include/data/adapters_registry.duckdb
|
|
85
|
+
this_file = Path(__file__)
|
|
86
|
+
package_root = this_file.parent.parent.parent # -> core/dbt
|
|
87
|
+
registry_path = package_root / "include" / "data" / "adapters_registry.duckdb"
|
|
88
|
+
|
|
89
|
+
if not registry_path.exists():
|
|
90
|
+
raise FileNotFoundError(
|
|
91
|
+
f"Adapters registry not found at: {registry_path}\n"
|
|
92
|
+
"This file should be shipped with the DVT package. "
|
|
93
|
+
"Please reinstall DVT or rebuild the registry with build_registry.py"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
cls._registry_path = registry_path
|
|
97
|
+
return registry_path
|
|
98
|
+
|
|
99
|
+
def _get_connection(self) -> 'duckdb.DuckDBPyConnection':
|
|
100
|
+
"""Get or create a read-only connection to the registry."""
|
|
101
|
+
if duckdb is None:
|
|
102
|
+
raise ImportError(
|
|
103
|
+
"duckdb is required for the adapters registry. "
|
|
104
|
+
"Install with: pip install duckdb"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
if self._conn is None:
|
|
108
|
+
registry_path = self.get_registry_path()
|
|
109
|
+
self._conn = duckdb.connect(str(registry_path), read_only=True)
|
|
110
|
+
|
|
111
|
+
return self._conn
|
|
112
|
+
|
|
113
|
+
def close(self) -> None:
|
|
114
|
+
"""Close the registry connection."""
|
|
115
|
+
if self._conn is not None:
|
|
116
|
+
self._conn.close()
|
|
117
|
+
self._conn = None
|
|
118
|
+
|
|
119
|
+
# =========================================================================
|
|
120
|
+
# Type Mappings
|
|
121
|
+
# =========================================================================
|
|
122
|
+
|
|
123
|
+
def get_spark_type(
|
|
124
|
+
self,
|
|
125
|
+
adapter_name: str,
|
|
126
|
+
adapter_type: str,
|
|
127
|
+
spark_version: str = "all"
|
|
128
|
+
) -> Optional[TypeMapping]:
|
|
129
|
+
"""
|
|
130
|
+
Look up the Spark type mapping for a given adapter type.
|
|
131
|
+
|
|
132
|
+
:param adapter_name: Source adapter (e.g., 'postgres', 'snowflake')
|
|
133
|
+
:param adapter_type: Adapter's native type (e.g., 'INTEGER', 'VARCHAR')
|
|
134
|
+
:param spark_version: Target Spark version (default 'all')
|
|
135
|
+
:returns: TypeMapping or None if not found
|
|
136
|
+
"""
|
|
137
|
+
conn = self._get_connection()
|
|
138
|
+
|
|
139
|
+
# Normalize inputs
|
|
140
|
+
adapter_name = adapter_name.lower()
|
|
141
|
+
adapter_type = adapter_type.upper().strip()
|
|
142
|
+
|
|
143
|
+
# Remove size specifiers: VARCHAR(255) -> VARCHAR
|
|
144
|
+
adapter_type_normalized = re.sub(r'\([^)]*\)', '', adapter_type).strip()
|
|
145
|
+
|
|
146
|
+
result = conn.execute("""
|
|
147
|
+
SELECT adapter_name, adapter_type, spark_type, spark_version,
|
|
148
|
+
is_complex, cast_expression, notes
|
|
149
|
+
FROM datatype_mappings
|
|
150
|
+
WHERE adapter_name = ?
|
|
151
|
+
AND adapter_type = ?
|
|
152
|
+
AND (spark_version = 'all' OR spark_version = ?)
|
|
153
|
+
ORDER BY
|
|
154
|
+
CASE WHEN spark_version = ? THEN 0 ELSE 1 END
|
|
155
|
+
LIMIT 1
|
|
156
|
+
""", [adapter_name, adapter_type_normalized, spark_version, spark_version]).fetchone()
|
|
157
|
+
|
|
158
|
+
if result:
|
|
159
|
+
return TypeMapping(
|
|
160
|
+
adapter_name=result[0],
|
|
161
|
+
adapter_type=result[1],
|
|
162
|
+
spark_type=result[2],
|
|
163
|
+
spark_version=result[3],
|
|
164
|
+
is_complex=result[4],
|
|
165
|
+
cast_expression=result[5],
|
|
166
|
+
notes=result[6]
|
|
167
|
+
)
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
def get_all_mappings_for_adapter(self, adapter_name: str) -> List[TypeMapping]:
|
|
171
|
+
"""Get all type mappings for a specific adapter."""
|
|
172
|
+
conn = self._get_connection()
|
|
173
|
+
adapter_name = adapter_name.lower()
|
|
174
|
+
|
|
175
|
+
results = conn.execute("""
|
|
176
|
+
SELECT adapter_name, adapter_type, spark_type, spark_version,
|
|
177
|
+
is_complex, cast_expression, notes
|
|
178
|
+
FROM datatype_mappings
|
|
179
|
+
WHERE adapter_name = ?
|
|
180
|
+
ORDER BY adapter_type
|
|
181
|
+
""", [adapter_name]).fetchall()
|
|
182
|
+
|
|
183
|
+
return [
|
|
184
|
+
TypeMapping(
|
|
185
|
+
adapter_name=row[0],
|
|
186
|
+
adapter_type=row[1],
|
|
187
|
+
spark_type=row[2],
|
|
188
|
+
spark_version=row[3],
|
|
189
|
+
is_complex=row[4],
|
|
190
|
+
cast_expression=row[5],
|
|
191
|
+
notes=row[6]
|
|
192
|
+
)
|
|
193
|
+
for row in results
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
def get_supported_adapters(self) -> List[str]:
|
|
197
|
+
"""Get list of all supported adapter names."""
|
|
198
|
+
conn = self._get_connection()
|
|
199
|
+
results = conn.execute("""
|
|
200
|
+
SELECT DISTINCT adapter_name FROM datatype_mappings ORDER BY adapter_name
|
|
201
|
+
""").fetchall()
|
|
202
|
+
return [row[0] for row in results]
|
|
203
|
+
|
|
204
|
+
# =========================================================================
|
|
205
|
+
# Syntax Rules
|
|
206
|
+
# =========================================================================
|
|
207
|
+
|
|
208
|
+
def get_syntax_rule(self, adapter_name: str) -> Optional[SyntaxRule]:
|
|
209
|
+
"""Get syntax rules for a specific adapter."""
|
|
210
|
+
conn = self._get_connection()
|
|
211
|
+
adapter_name = adapter_name.lower()
|
|
212
|
+
|
|
213
|
+
result = conn.execute("""
|
|
214
|
+
SELECT adapter_name, quote_start, quote_end, case_sensitivity, reserved_keywords
|
|
215
|
+
FROM syntax_registry
|
|
216
|
+
WHERE adapter_name = ?
|
|
217
|
+
""", [adapter_name]).fetchone()
|
|
218
|
+
|
|
219
|
+
if result:
|
|
220
|
+
# Parse reserved keywords from comma-separated string
|
|
221
|
+
keywords = []
|
|
222
|
+
if result[4]:
|
|
223
|
+
keywords = [kw.strip() for kw in result[4].split(',') if kw.strip()]
|
|
224
|
+
|
|
225
|
+
return SyntaxRule(
|
|
226
|
+
adapter_name=result[0],
|
|
227
|
+
quote_start=result[1],
|
|
228
|
+
quote_end=result[2],
|
|
229
|
+
case_sensitivity=result[3],
|
|
230
|
+
reserved_keywords=keywords
|
|
231
|
+
)
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
def quote_identifier(self, adapter_name: str, identifier: str) -> str:
|
|
235
|
+
"""Quote an identifier using the adapter's quoting rules."""
|
|
236
|
+
rule = self.get_syntax_rule(adapter_name)
|
|
237
|
+
if not rule:
|
|
238
|
+
return f'"{identifier}"' # Default to double quotes
|
|
239
|
+
return f'{rule.quote_start}{identifier}{rule.quote_end}'
|
|
240
|
+
|
|
241
|
+
def needs_quoting(self, adapter_name: str, identifier: str) -> bool:
|
|
242
|
+
"""Check if an identifier needs quoting (reserved keyword or special chars)."""
|
|
243
|
+
rule = self.get_syntax_rule(adapter_name)
|
|
244
|
+
if not rule:
|
|
245
|
+
return False
|
|
246
|
+
|
|
247
|
+
# Check if it's a reserved keyword
|
|
248
|
+
upper_id = identifier.upper()
|
|
249
|
+
if upper_id in [kw.upper() for kw in rule.reserved_keywords]:
|
|
250
|
+
return True
|
|
251
|
+
|
|
252
|
+
# Check for special characters or spaces
|
|
253
|
+
if not identifier.replace('_', '').isalnum() or ' ' in identifier or '-' in identifier:
|
|
254
|
+
return True
|
|
255
|
+
|
|
256
|
+
return False
|
|
257
|
+
|
|
258
|
+
def normalize_identifier(self, adapter_name: str, identifier: str) -> str:
|
|
259
|
+
"""Normalize an identifier based on the adapter's case sensitivity rules."""
|
|
260
|
+
rule = self.get_syntax_rule(adapter_name)
|
|
261
|
+
if not rule:
|
|
262
|
+
return identifier
|
|
263
|
+
|
|
264
|
+
case_rule = rule.case_sensitivity.lower()
|
|
265
|
+
if case_rule == "uppercase":
|
|
266
|
+
return identifier.upper()
|
|
267
|
+
elif case_rule == "lowercase":
|
|
268
|
+
return identifier.lower()
|
|
269
|
+
return identifier # case_insensitive or preserve
|
|
270
|
+
|
|
271
|
+
# =========================================================================
|
|
272
|
+
# Adapter Queries
|
|
273
|
+
# =========================================================================
|
|
274
|
+
|
|
275
|
+
def get_metadata_query(
|
|
276
|
+
self,
|
|
277
|
+
adapter_name: str,
|
|
278
|
+
query_type: str
|
|
279
|
+
) -> Optional[AdapterQuery]:
|
|
280
|
+
"""
|
|
281
|
+
Get SQL template for metadata extraction.
|
|
282
|
+
|
|
283
|
+
:param adapter_name: Source adapter (e.g., 'postgres', 'snowflake')
|
|
284
|
+
:param query_type: Query type: 'columns', 'tables', 'row_count', 'primary_key'
|
|
285
|
+
:returns: AdapterQuery or None if not found
|
|
286
|
+
"""
|
|
287
|
+
conn = self._get_connection()
|
|
288
|
+
adapter_name = adapter_name.lower()
|
|
289
|
+
|
|
290
|
+
result = conn.execute("""
|
|
291
|
+
SELECT adapter_name, query_type, query_template, notes
|
|
292
|
+
FROM adapter_queries
|
|
293
|
+
WHERE adapter_name = ? AND query_type = ?
|
|
294
|
+
""", [adapter_name, query_type]).fetchone()
|
|
295
|
+
|
|
296
|
+
if result:
|
|
297
|
+
return AdapterQuery(
|
|
298
|
+
adapter_name=result[0],
|
|
299
|
+
query_type=result[1],
|
|
300
|
+
query_template=result[2],
|
|
301
|
+
notes=result[3]
|
|
302
|
+
)
|
|
303
|
+
return None
|
|
304
|
+
|
|
305
|
+
def get_all_queries_for_adapter(self, adapter_name: str) -> List[AdapterQuery]:
|
|
306
|
+
"""Get all query templates for a specific adapter."""
|
|
307
|
+
conn = self._get_connection()
|
|
308
|
+
adapter_name = adapter_name.lower()
|
|
309
|
+
|
|
310
|
+
results = conn.execute("""
|
|
311
|
+
SELECT adapter_name, query_type, query_template, notes
|
|
312
|
+
FROM adapter_queries
|
|
313
|
+
WHERE adapter_name = ?
|
|
314
|
+
ORDER BY query_type
|
|
315
|
+
""", [adapter_name]).fetchall()
|
|
316
|
+
|
|
317
|
+
return [
|
|
318
|
+
AdapterQuery(
|
|
319
|
+
adapter_name=row[0],
|
|
320
|
+
query_type=row[1],
|
|
321
|
+
query_template=row[2],
|
|
322
|
+
notes=row[3]
|
|
323
|
+
)
|
|
324
|
+
for row in results
|
|
325
|
+
]
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
# =============================================================================
|
|
329
|
+
# Module-level convenience functions
|
|
330
|
+
# =============================================================================
|
|
331
|
+
|
|
332
|
+
def get_registry() -> AdaptersRegistry:
|
|
333
|
+
"""Get the singleton AdaptersRegistry instance."""
|
|
334
|
+
return AdaptersRegistry()
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def get_spark_type(
|
|
338
|
+
adapter_name: str,
|
|
339
|
+
adapter_type: str,
|
|
340
|
+
spark_version: str = "all"
|
|
341
|
+
) -> Optional[TypeMapping]:
|
|
342
|
+
"""
|
|
343
|
+
Convenience function to look up Spark type mapping.
|
|
344
|
+
|
|
345
|
+
:param adapter_name: Source adapter (e.g., 'postgres', 'snowflake')
|
|
346
|
+
:param adapter_type: Adapter's native type (e.g., 'INTEGER', 'VARCHAR')
|
|
347
|
+
:param spark_version: Target Spark version (default 'all')
|
|
348
|
+
:returns: TypeMapping or None if not found
|
|
349
|
+
"""
|
|
350
|
+
return get_registry().get_spark_type(adapter_name, adapter_type, spark_version)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def get_syntax_rule(adapter_name: str) -> Optional[SyntaxRule]:
|
|
354
|
+
"""Convenience function to get syntax rules for an adapter."""
|
|
355
|
+
return get_registry().get_syntax_rule(adapter_name)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def get_metadata_query(adapter_name: str, query_type: str) -> Optional[AdapterQuery]:
|
|
359
|
+
"""Convenience function to get a metadata query template."""
|
|
360
|
+
return get_registry().get_metadata_query(adapter_name, query_type)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def quote_identifier(adapter_name: str, identifier: str) -> str:
|
|
364
|
+
"""Convenience function to quote an identifier."""
|
|
365
|
+
return get_registry().quote_identifier(adapter_name, identifier)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def normalize_identifier(adapter_name: str, identifier: str) -> str:
|
|
369
|
+
"""Convenience function to normalize an identifier."""
|
|
370
|
+
return get_registry().normalize_identifier(adapter_name, identifier)
|
|
Binary file
|