dlt-saga 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dlt_saga/__init__.py +46 -0
- dlt_saga/ai_setup_command.py +126 -0
- dlt_saga/cli.py +2221 -0
- dlt_saga/defaults.py +114 -0
- dlt_saga/destinations/__init__.py +10 -0
- dlt_saga/destinations/base.py +677 -0
- dlt_saga/destinations/bigquery/__init__.py +11 -0
- dlt_saga/destinations/bigquery/access.py +287 -0
- dlt_saga/destinations/bigquery/base.py +643 -0
- dlt_saga/destinations/bigquery/config.py +134 -0
- dlt_saga/destinations/bigquery/destination.py +1473 -0
- dlt_saga/destinations/config.py +63 -0
- dlt_saga/destinations/databricks/__init__.py +0 -0
- dlt_saga/destinations/databricks/access.py +203 -0
- dlt_saga/destinations/databricks/config.py +155 -0
- dlt_saga/destinations/databricks/destination.py +843 -0
- dlt_saga/destinations/duckdb/__init__.py +6 -0
- dlt_saga/destinations/duckdb/config.py +66 -0
- dlt_saga/destinations/duckdb/destination.py +371 -0
- dlt_saga/destinations/factory.py +146 -0
- dlt_saga/historize/__init__.py +1 -0
- dlt_saga/historize/config.py +216 -0
- dlt_saga/historize/factory.py +133 -0
- dlt_saga/historize/runner.py +862 -0
- dlt_saga/historize/sql.py +585 -0
- dlt_saga/historize/state.py +315 -0
- dlt_saga/hooks/__init__.py +63 -0
- dlt_saga/hooks/loader.py +170 -0
- dlt_saga/hooks/registry.py +136 -0
- dlt_saga/init_command.py +660 -0
- dlt_saga/packages.py +204 -0
- dlt_saga/pipeline_config/__init__.py +29 -0
- dlt_saga/pipeline_config/base_config.py +515 -0
- dlt_saga/pipeline_config/file_config.py +649 -0
- dlt_saga/pipeline_config/naming.py +191 -0
- dlt_saga/pipelines/__init__.py +13 -0
- dlt_saga/pipelines/api/__init__.py +5 -0
- dlt_saga/pipelines/api/base.py +509 -0
- dlt_saga/pipelines/api/config.py +188 -0
- dlt_saga/pipelines/api/pipeline.py +24 -0
- dlt_saga/pipelines/base_client.py +16 -0
- dlt_saga/pipelines/base_config.py +106 -0
- dlt_saga/pipelines/base_pipeline.py +651 -0
- dlt_saga/pipelines/database/__init__.py +14 -0
- dlt_saga/pipelines/database/client.py +409 -0
- dlt_saga/pipelines/database/config.py +185 -0
- dlt_saga/pipelines/database/pipeline.py +120 -0
- dlt_saga/pipelines/executor.py +80 -0
- dlt_saga/pipelines/filesystem/__init__.py +12 -0
- dlt_saga/pipelines/filesystem/client.py +877 -0
- dlt_saga/pipelines/filesystem/config.py +221 -0
- dlt_saga/pipelines/filesystem/pipeline.py +440 -0
- dlt_saga/pipelines/google_sheets/__init__.py +12 -0
- dlt_saga/pipelines/google_sheets/client.py +137 -0
- dlt_saga/pipelines/google_sheets/config.py +64 -0
- dlt_saga/pipelines/google_sheets/pipeline.py +129 -0
- dlt_saga/pipelines/native_load/__init__.py +1 -0
- dlt_saga/pipelines/native_load/_sql.py +16 -0
- dlt_saga/pipelines/native_load/config.py +455 -0
- dlt_saga/pipelines/native_load/pipeline.py +851 -0
- dlt_saga/pipelines/native_load/state.py +371 -0
- dlt_saga/pipelines/native_load/storage/__init__.py +47 -0
- dlt_saga/pipelines/native_load/storage/adls.py +154 -0
- dlt_saga/pipelines/native_load/storage/base.py +42 -0
- dlt_saga/pipelines/native_load/storage/gcs.py +77 -0
- dlt_saga/pipelines/registry.py +369 -0
- dlt_saga/pipelines/sharepoint/__init__.py +12 -0
- dlt_saga/pipelines/sharepoint/client.py +207 -0
- dlt_saga/pipelines/sharepoint/config.py +128 -0
- dlt_saga/pipelines/sharepoint/pipeline.py +106 -0
- dlt_saga/pipelines/target/__init__.py +0 -0
- dlt_saga/pipelines/target/config.py +304 -0
- dlt_saga/pipelines/target/writer.py +83 -0
- dlt_saga/project_config.py +523 -0
- dlt_saga/report/__init__.py +5 -0
- dlt_saga/report/collector.py +460 -0
- dlt_saga/report/favicon.svg +13 -0
- dlt_saga/report/generator.py +211 -0
- dlt_saga/report/report.css +291 -0
- dlt_saga/report/report.js +1346 -0
- dlt_saga/report/uploader.py +109 -0
- dlt_saga/schemas/dlt_common.json +310 -0
- dlt_saga/session.py +769 -0
- dlt_saga/templates/ai_context.md +268 -0
- dlt_saga/testing/__init__.py +273 -0
- dlt_saga/testing/fixtures.py +111 -0
- dlt_saga/utility/__init__.py +10 -0
- dlt_saga/utility/auth/__init__.py +0 -0
- dlt_saga/utility/auth/databricks.py +220 -0
- dlt_saga/utility/auth/gcp.py +74 -0
- dlt_saga/utility/auth/providers.py +187 -0
- dlt_saga/utility/cli/__init__.py +10 -0
- dlt_saga/utility/cli/common.py +258 -0
- dlt_saga/utility/cli/context.py +225 -0
- dlt_saga/utility/cli/gcloud_auth.py +280 -0
- dlt_saga/utility/cli/logging.py +140 -0
- dlt_saga/utility/cli/profiles.py +523 -0
- dlt_saga/utility/cli/reporting.py +92 -0
- dlt_saga/utility/cli/selectors.py +320 -0
- dlt_saga/utility/env.py +17 -0
- dlt_saga/utility/gcp/__init__.py +7 -0
- dlt_saga/utility/gcp/client_pool.py +390 -0
- dlt_saga/utility/gcp/secrets.py +75 -0
- dlt_saga/utility/generate_schemas.py +844 -0
- dlt_saga/utility/naming.py +166 -0
- dlt_saga/utility/optional_deps.py +116 -0
- dlt_saga/utility/orchestration/__init__.py +7 -0
- dlt_saga/utility/orchestration/cloud_run_trigger.py +180 -0
- dlt_saga/utility/orchestration/execution_plan.py +601 -0
- dlt_saga/utility/orchestration/providers.py +196 -0
- dlt_saga/utility/secrets/__init__.py +27 -0
- dlt_saga/utility/secrets/azure.py +106 -0
- dlt_saga/utility/secrets/providers.py +94 -0
- dlt_saga/utility/secrets/resolver.py +254 -0
- dlt_saga/utility/secrets/secret_str.py +68 -0
- dlt_saga/validate.py +191 -0
- dlt_saga-0.2.2.dist-info/METADATA +230 -0
- dlt_saga-0.2.2.dist-info/RECORD +122 -0
- dlt_saga-0.2.2.dist-info/WHEEL +5 -0
- dlt_saga-0.2.2.dist-info/entry_points.txt +2 -0
- dlt_saga-0.2.2.dist-info/licenses/LICENSE +201 -0
- dlt_saga-0.2.2.dist-info/top_level.txt +1 -0
dlt_saga/__init__.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""dlt-saga: a config-driven data ingestion framework built on dlt."""
|
|
2
|
+
|
|
3
|
+
# Stable plugin API version. Increment when the contract for
|
|
4
|
+
# BasePipeline, Destination, or AccessManager changes in a way
|
|
5
|
+
# that existing plugins must adapt to.
|
|
6
|
+
PLUGIN_API_VERSION = 1
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"PLUGIN_API_VERSION",
|
|
10
|
+
"AuthenticationError",
|
|
11
|
+
"Session",
|
|
12
|
+
"SessionResult",
|
|
13
|
+
"PipelineResult",
|
|
14
|
+
"HookContext",
|
|
15
|
+
"get_hook_registry",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def __getattr__(name: str):
|
|
20
|
+
"""Lazy-load public API symbols on first access.
|
|
21
|
+
|
|
22
|
+
Avoids eagerly importing the full module graph (DestinationFactory,
|
|
23
|
+
BigQuery SDK, etc.) at ``import dlt_saga`` time. This matters because
|
|
24
|
+
dlt's normalize step forks worker processes that inherit the parent's
|
|
25
|
+
memory — eager imports would bloat every worker.
|
|
26
|
+
"""
|
|
27
|
+
if name == "AuthenticationError":
|
|
28
|
+
from dlt_saga.utility.auth.providers import AuthenticationError
|
|
29
|
+
|
|
30
|
+
globals()["AuthenticationError"] = AuthenticationError
|
|
31
|
+
return AuthenticationError
|
|
32
|
+
if name in ("Session", "SessionResult", "PipelineResult"):
|
|
33
|
+
from dlt_saga.session import PipelineResult, Session, SessionResult
|
|
34
|
+
|
|
35
|
+
globals().update(
|
|
36
|
+
Session=Session,
|
|
37
|
+
SessionResult=SessionResult,
|
|
38
|
+
PipelineResult=PipelineResult,
|
|
39
|
+
)
|
|
40
|
+
return globals()[name]
|
|
41
|
+
if name in ("HookContext", "get_hook_registry"):
|
|
42
|
+
from dlt_saga.hooks.registry import HookContext, get_hook_registry
|
|
43
|
+
|
|
44
|
+
globals().update(HookContext=HookContext, get_hook_registry=get_hook_registry)
|
|
45
|
+
return globals()[name]
|
|
46
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""saga ai-setup — generate AI context file for the current project."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import logging
|
|
7
|
+
import re
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from importlib.metadata import version as pkg_version
|
|
10
|
+
from importlib.resources import files as pkg_files
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
# The generated file name in the user's project root.
|
|
16
|
+
AI_CONTEXT_FILENAME = "saga_ai_context.md"
|
|
17
|
+
|
|
18
|
+
# Embedded in the generated file to detect content changes across versions.
|
|
19
|
+
_TEMPLATE_HASH_PREFIX = "<!-- template-hash: "
|
|
20
|
+
_TEMPLATE_HASH_PATTERN = re.compile(r"<!-- template-hash: ([a-f0-9]+) -->")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _get_saga_version() -> str:
|
|
24
|
+
"""Return the installed dlt-saga version, or 'unknown'."""
|
|
25
|
+
try:
|
|
26
|
+
return pkg_version("dlt-saga")
|
|
27
|
+
except Exception:
|
|
28
|
+
return "unknown"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _read_template() -> str:
|
|
32
|
+
"""Read the AI context template shipped with the package."""
|
|
33
|
+
template_path = pkg_files("dlt_saga") / "templates" / "ai_context.md"
|
|
34
|
+
return template_path.read_text(encoding="utf-8")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _template_hash(template: str) -> str:
|
|
38
|
+
"""Return a short hash of the raw template (before rendering)."""
|
|
39
|
+
return hashlib.sha256(template.encode("utf-8")).hexdigest()[:12]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _render_template(template: str, version: str) -> str:
|
|
43
|
+
"""Fill in version, date, and template hash placeholders."""
|
|
44
|
+
now = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
45
|
+
content_hash = _template_hash(template)
|
|
46
|
+
rendered = template.replace("{version}", version).replace("{date}", now)
|
|
47
|
+
# Append hash as an HTML comment at the end of the generated file.
|
|
48
|
+
rendered += f"\n{_TEMPLATE_HASH_PREFIX}{content_hash} -->\n"
|
|
49
|
+
return rendered
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _get_generated_hash(project_dir: Path) -> str | None:
|
|
53
|
+
"""Extract the template hash from an existing generated file, or None."""
|
|
54
|
+
context_file = project_dir / AI_CONTEXT_FILENAME
|
|
55
|
+
if not context_file.exists():
|
|
56
|
+
return None
|
|
57
|
+
try:
|
|
58
|
+
content = context_file.read_text(encoding="utf-8")
|
|
59
|
+
match = _TEMPLATE_HASH_PATTERN.search(content)
|
|
60
|
+
if match:
|
|
61
|
+
return match.group(1)
|
|
62
|
+
except Exception:
|
|
63
|
+
pass
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def check_staleness(project_dir: Path) -> str | None:
|
|
68
|
+
"""Check if the AI context file is outdated.
|
|
69
|
+
|
|
70
|
+
Compares the template hash embedded in the generated file against the
|
|
71
|
+
current template shipped with the package. Only warns if the template
|
|
72
|
+
content actually changed — version-only bumps are silent.
|
|
73
|
+
|
|
74
|
+
Returns a warning message if stale, or None if up-to-date / not present.
|
|
75
|
+
"""
|
|
76
|
+
generated_hash = _get_generated_hash(project_dir)
|
|
77
|
+
if generated_hash is None:
|
|
78
|
+
return None
|
|
79
|
+
try:
|
|
80
|
+
current_hash = _template_hash(_read_template())
|
|
81
|
+
except Exception:
|
|
82
|
+
return None
|
|
83
|
+
if generated_hash != current_hash:
|
|
84
|
+
current_version = _get_saga_version()
|
|
85
|
+
return (
|
|
86
|
+
f"saga_ai_context.md is outdated (template changed in v{current_version}). "
|
|
87
|
+
f"Run `saga ai-setup` to update."
|
|
88
|
+
)
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def run_ai_setup(project_dir: Path | None = None) -> None:
|
|
93
|
+
"""Generate the AI context file and print setup instructions."""
|
|
94
|
+
import typer
|
|
95
|
+
|
|
96
|
+
if project_dir is None:
|
|
97
|
+
project_dir = Path.cwd()
|
|
98
|
+
|
|
99
|
+
version = _get_saga_version()
|
|
100
|
+
template = _read_template()
|
|
101
|
+
content = _render_template(template, version)
|
|
102
|
+
|
|
103
|
+
output_path = project_dir / AI_CONTEXT_FILENAME
|
|
104
|
+
output_path.write_text(content, encoding="utf-8")
|
|
105
|
+
|
|
106
|
+
typer.echo(f"Generated {AI_CONTEXT_FILENAME} (dlt-saga v{version})")
|
|
107
|
+
typer.echo("")
|
|
108
|
+
typer.echo("Add this to your AI assistant's context file:")
|
|
109
|
+
typer.echo("")
|
|
110
|
+
typer.echo(
|
|
111
|
+
" When working with dlt-saga pipelines, pipeline configs, or the saga CLI,"
|
|
112
|
+
)
|
|
113
|
+
typer.echo(" read ./saga_ai_context.md for framework patterns and guidance.")
|
|
114
|
+
typer.echo("")
|
|
115
|
+
typer.echo("Where to place it depends on your tool:")
|
|
116
|
+
typer.echo(
|
|
117
|
+
" - AGENTS.md (recommended — works with Claude Code, Copilot, Cursor, Windsurf, and 20+ others)"
|
|
118
|
+
)
|
|
119
|
+
typer.echo(" - Claude Code only: .claude/CLAUDE.md")
|
|
120
|
+
typer.echo(" - Cursor only: .cursorrules")
|
|
121
|
+
typer.echo(" - GitHub Copilot only: .github/copilot-instructions.md")
|
|
122
|
+
typer.echo(" - Windsurf only: .windsurfrules")
|
|
123
|
+
typer.echo("")
|
|
124
|
+
typer.echo(
|
|
125
|
+
"Re-run `saga ai-setup` after upgrading dlt-saga to keep the context current."
|
|
126
|
+
)
|