dlt-saga 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. dlt_saga/__init__.py +46 -0
  2. dlt_saga/ai_setup_command.py +126 -0
  3. dlt_saga/cli.py +2221 -0
  4. dlt_saga/defaults.py +114 -0
  5. dlt_saga/destinations/__init__.py +10 -0
  6. dlt_saga/destinations/base.py +677 -0
  7. dlt_saga/destinations/bigquery/__init__.py +11 -0
  8. dlt_saga/destinations/bigquery/access.py +287 -0
  9. dlt_saga/destinations/bigquery/base.py +643 -0
  10. dlt_saga/destinations/bigquery/config.py +134 -0
  11. dlt_saga/destinations/bigquery/destination.py +1473 -0
  12. dlt_saga/destinations/config.py +63 -0
  13. dlt_saga/destinations/databricks/__init__.py +0 -0
  14. dlt_saga/destinations/databricks/access.py +203 -0
  15. dlt_saga/destinations/databricks/config.py +155 -0
  16. dlt_saga/destinations/databricks/destination.py +843 -0
  17. dlt_saga/destinations/duckdb/__init__.py +6 -0
  18. dlt_saga/destinations/duckdb/config.py +66 -0
  19. dlt_saga/destinations/duckdb/destination.py +371 -0
  20. dlt_saga/destinations/factory.py +146 -0
  21. dlt_saga/historize/__init__.py +1 -0
  22. dlt_saga/historize/config.py +216 -0
  23. dlt_saga/historize/factory.py +133 -0
  24. dlt_saga/historize/runner.py +862 -0
  25. dlt_saga/historize/sql.py +585 -0
  26. dlt_saga/historize/state.py +315 -0
  27. dlt_saga/hooks/__init__.py +63 -0
  28. dlt_saga/hooks/loader.py +170 -0
  29. dlt_saga/hooks/registry.py +136 -0
  30. dlt_saga/init_command.py +660 -0
  31. dlt_saga/packages.py +204 -0
  32. dlt_saga/pipeline_config/__init__.py +29 -0
  33. dlt_saga/pipeline_config/base_config.py +515 -0
  34. dlt_saga/pipeline_config/file_config.py +649 -0
  35. dlt_saga/pipeline_config/naming.py +191 -0
  36. dlt_saga/pipelines/__init__.py +13 -0
  37. dlt_saga/pipelines/api/__init__.py +5 -0
  38. dlt_saga/pipelines/api/base.py +509 -0
  39. dlt_saga/pipelines/api/config.py +188 -0
  40. dlt_saga/pipelines/api/pipeline.py +24 -0
  41. dlt_saga/pipelines/base_client.py +16 -0
  42. dlt_saga/pipelines/base_config.py +106 -0
  43. dlt_saga/pipelines/base_pipeline.py +651 -0
  44. dlt_saga/pipelines/database/__init__.py +14 -0
  45. dlt_saga/pipelines/database/client.py +409 -0
  46. dlt_saga/pipelines/database/config.py +185 -0
  47. dlt_saga/pipelines/database/pipeline.py +120 -0
  48. dlt_saga/pipelines/executor.py +80 -0
  49. dlt_saga/pipelines/filesystem/__init__.py +12 -0
  50. dlt_saga/pipelines/filesystem/client.py +877 -0
  51. dlt_saga/pipelines/filesystem/config.py +221 -0
  52. dlt_saga/pipelines/filesystem/pipeline.py +440 -0
  53. dlt_saga/pipelines/google_sheets/__init__.py +12 -0
  54. dlt_saga/pipelines/google_sheets/client.py +137 -0
  55. dlt_saga/pipelines/google_sheets/config.py +64 -0
  56. dlt_saga/pipelines/google_sheets/pipeline.py +129 -0
  57. dlt_saga/pipelines/native_load/__init__.py +1 -0
  58. dlt_saga/pipelines/native_load/_sql.py +16 -0
  59. dlt_saga/pipelines/native_load/config.py +455 -0
  60. dlt_saga/pipelines/native_load/pipeline.py +851 -0
  61. dlt_saga/pipelines/native_load/state.py +371 -0
  62. dlt_saga/pipelines/native_load/storage/__init__.py +47 -0
  63. dlt_saga/pipelines/native_load/storage/adls.py +154 -0
  64. dlt_saga/pipelines/native_load/storage/base.py +42 -0
  65. dlt_saga/pipelines/native_load/storage/gcs.py +77 -0
  66. dlt_saga/pipelines/registry.py +369 -0
  67. dlt_saga/pipelines/sharepoint/__init__.py +12 -0
  68. dlt_saga/pipelines/sharepoint/client.py +207 -0
  69. dlt_saga/pipelines/sharepoint/config.py +128 -0
  70. dlt_saga/pipelines/sharepoint/pipeline.py +106 -0
  71. dlt_saga/pipelines/target/__init__.py +0 -0
  72. dlt_saga/pipelines/target/config.py +304 -0
  73. dlt_saga/pipelines/target/writer.py +83 -0
  74. dlt_saga/project_config.py +523 -0
  75. dlt_saga/report/__init__.py +5 -0
  76. dlt_saga/report/collector.py +460 -0
  77. dlt_saga/report/favicon.svg +13 -0
  78. dlt_saga/report/generator.py +211 -0
  79. dlt_saga/report/report.css +291 -0
  80. dlt_saga/report/report.js +1346 -0
  81. dlt_saga/report/uploader.py +109 -0
  82. dlt_saga/schemas/dlt_common.json +310 -0
  83. dlt_saga/session.py +769 -0
  84. dlt_saga/templates/ai_context.md +268 -0
  85. dlt_saga/testing/__init__.py +273 -0
  86. dlt_saga/testing/fixtures.py +111 -0
  87. dlt_saga/utility/__init__.py +10 -0
  88. dlt_saga/utility/auth/__init__.py +0 -0
  89. dlt_saga/utility/auth/databricks.py +220 -0
  90. dlt_saga/utility/auth/gcp.py +74 -0
  91. dlt_saga/utility/auth/providers.py +187 -0
  92. dlt_saga/utility/cli/__init__.py +10 -0
  93. dlt_saga/utility/cli/common.py +258 -0
  94. dlt_saga/utility/cli/context.py +225 -0
  95. dlt_saga/utility/cli/gcloud_auth.py +280 -0
  96. dlt_saga/utility/cli/logging.py +140 -0
  97. dlt_saga/utility/cli/profiles.py +523 -0
  98. dlt_saga/utility/cli/reporting.py +92 -0
  99. dlt_saga/utility/cli/selectors.py +320 -0
  100. dlt_saga/utility/env.py +17 -0
  101. dlt_saga/utility/gcp/__init__.py +7 -0
  102. dlt_saga/utility/gcp/client_pool.py +390 -0
  103. dlt_saga/utility/gcp/secrets.py +75 -0
  104. dlt_saga/utility/generate_schemas.py +844 -0
  105. dlt_saga/utility/naming.py +166 -0
  106. dlt_saga/utility/optional_deps.py +116 -0
  107. dlt_saga/utility/orchestration/__init__.py +7 -0
  108. dlt_saga/utility/orchestration/cloud_run_trigger.py +180 -0
  109. dlt_saga/utility/orchestration/execution_plan.py +601 -0
  110. dlt_saga/utility/orchestration/providers.py +196 -0
  111. dlt_saga/utility/secrets/__init__.py +27 -0
  112. dlt_saga/utility/secrets/azure.py +106 -0
  113. dlt_saga/utility/secrets/providers.py +94 -0
  114. dlt_saga/utility/secrets/resolver.py +254 -0
  115. dlt_saga/utility/secrets/secret_str.py +68 -0
  116. dlt_saga/validate.py +191 -0
  117. dlt_saga-0.2.2.dist-info/METADATA +230 -0
  118. dlt_saga-0.2.2.dist-info/RECORD +122 -0
  119. dlt_saga-0.2.2.dist-info/WHEEL +5 -0
  120. dlt_saga-0.2.2.dist-info/entry_points.txt +2 -0
  121. dlt_saga-0.2.2.dist-info/licenses/LICENSE +201 -0
  122. dlt_saga-0.2.2.dist-info/top_level.txt +1 -0
dlt_saga/__init__.py ADDED
@@ -0,0 +1,46 @@
1
+ """dlt-saga: a config-driven data ingestion framework built on dlt."""
2
+
3
+ # Stable plugin API version. Increment when the contract for
4
+ # BasePipeline, Destination, or AccessManager changes in a way
5
+ # that existing plugins must adapt to.
6
+ PLUGIN_API_VERSION = 1
7
+
8
+ __all__ = [
9
+ "PLUGIN_API_VERSION",
10
+ "AuthenticationError",
11
+ "Session",
12
+ "SessionResult",
13
+ "PipelineResult",
14
+ "HookContext",
15
+ "get_hook_registry",
16
+ ]
17
+
18
+
19
+ def __getattr__(name: str):
20
+ """Lazy-load public API symbols on first access.
21
+
22
+ Avoids eagerly importing the full module graph (DestinationFactory,
23
+ BigQuery SDK, etc.) at ``import dlt_saga`` time. This matters because
24
+ dlt's normalize step forks worker processes that inherit the parent's
25
+ memory — eager imports would bloat every worker.
26
+ """
27
+ if name == "AuthenticationError":
28
+ from dlt_saga.utility.auth.providers import AuthenticationError
29
+
30
+ globals()["AuthenticationError"] = AuthenticationError
31
+ return AuthenticationError
32
+ if name in ("Session", "SessionResult", "PipelineResult"):
33
+ from dlt_saga.session import PipelineResult, Session, SessionResult
34
+
35
+ globals().update(
36
+ Session=Session,
37
+ SessionResult=SessionResult,
38
+ PipelineResult=PipelineResult,
39
+ )
40
+ return globals()[name]
41
+ if name in ("HookContext", "get_hook_registry"):
42
+ from dlt_saga.hooks.registry import HookContext, get_hook_registry
43
+
44
+ globals().update(HookContext=HookContext, get_hook_registry=get_hook_registry)
45
+ return globals()[name]
46
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -0,0 +1,126 @@
1
+ """saga ai-setup — generate AI context file for the current project."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import logging
7
+ import re
8
+ from datetime import datetime, timezone
9
+ from importlib.metadata import version as pkg_version
10
+ from importlib.resources import files as pkg_files
11
+ from pathlib import Path
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # The generated file name in the user's project root.
16
+ AI_CONTEXT_FILENAME = "saga_ai_context.md"
17
+
18
+ # Embedded in the generated file to detect content changes across versions.
19
+ _TEMPLATE_HASH_PREFIX = "<!-- template-hash: "
20
+ _TEMPLATE_HASH_PATTERN = re.compile(r"<!-- template-hash: ([a-f0-9]+) -->")
21
+
22
+
23
+ def _get_saga_version() -> str:
24
+ """Return the installed dlt-saga version, or 'unknown'."""
25
+ try:
26
+ return pkg_version("dlt-saga")
27
+ except Exception:
28
+ return "unknown"
29
+
30
+
31
+ def _read_template() -> str:
32
+ """Read the AI context template shipped with the package."""
33
+ template_path = pkg_files("dlt_saga") / "templates" / "ai_context.md"
34
+ return template_path.read_text(encoding="utf-8")
35
+
36
+
37
+ def _template_hash(template: str) -> str:
38
+ """Return a short hash of the raw template (before rendering)."""
39
+ return hashlib.sha256(template.encode("utf-8")).hexdigest()[:12]
40
+
41
+
42
+ def _render_template(template: str, version: str) -> str:
43
+ """Fill in version, date, and template hash placeholders."""
44
+ now = datetime.now(timezone.utc).strftime("%Y-%m-%d")
45
+ content_hash = _template_hash(template)
46
+ rendered = template.replace("{version}", version).replace("{date}", now)
47
+ # Append hash as an HTML comment at the end of the generated file.
48
+ rendered += f"\n{_TEMPLATE_HASH_PREFIX}{content_hash} -->\n"
49
+ return rendered
50
+
51
+
52
+ def _get_generated_hash(project_dir: Path) -> str | None:
53
+ """Extract the template hash from an existing generated file, or None."""
54
+ context_file = project_dir / AI_CONTEXT_FILENAME
55
+ if not context_file.exists():
56
+ return None
57
+ try:
58
+ content = context_file.read_text(encoding="utf-8")
59
+ match = _TEMPLATE_HASH_PATTERN.search(content)
60
+ if match:
61
+ return match.group(1)
62
+ except Exception:
63
+ pass
64
+ return None
65
+
66
+
67
+ def check_staleness(project_dir: Path) -> str | None:
68
+ """Check if the AI context file is outdated.
69
+
70
+ Compares the template hash embedded in the generated file against the
71
+ current template shipped with the package. Only warns if the template
72
+ content actually changed — version-only bumps are silent.
73
+
74
+ Returns a warning message if stale, or None if up-to-date / not present.
75
+ """
76
+ generated_hash = _get_generated_hash(project_dir)
77
+ if generated_hash is None:
78
+ return None
79
+ try:
80
+ current_hash = _template_hash(_read_template())
81
+ except Exception:
82
+ return None
83
+ if generated_hash != current_hash:
84
+ current_version = _get_saga_version()
85
+ return (
86
+ f"saga_ai_context.md is outdated (template changed in v{current_version}). "
87
+ f"Run `saga ai-setup` to update."
88
+ )
89
+ return None
90
+
91
+
92
+ def run_ai_setup(project_dir: Path | None = None) -> None:
93
+ """Generate the AI context file and print setup instructions."""
94
+ import typer
95
+
96
+ if project_dir is None:
97
+ project_dir = Path.cwd()
98
+
99
+ version = _get_saga_version()
100
+ template = _read_template()
101
+ content = _render_template(template, version)
102
+
103
+ output_path = project_dir / AI_CONTEXT_FILENAME
104
+ output_path.write_text(content, encoding="utf-8")
105
+
106
+ typer.echo(f"Generated {AI_CONTEXT_FILENAME} (dlt-saga v{version})")
107
+ typer.echo("")
108
+ typer.echo("Add this to your AI assistant's context file:")
109
+ typer.echo("")
110
+ typer.echo(
111
+ " When working with dlt-saga pipelines, pipeline configs, or the saga CLI,"
112
+ )
113
+ typer.echo(" read ./saga_ai_context.md for framework patterns and guidance.")
114
+ typer.echo("")
115
+ typer.echo("Where to place it depends on your tool:")
116
+ typer.echo(
117
+ " - AGENTS.md (recommended — works with Claude Code, Copilot, Cursor, Windsurf, and 20+ others)"
118
+ )
119
+ typer.echo(" - Claude Code only: .claude/CLAUDE.md")
120
+ typer.echo(" - Cursor only: .cursorrules")
121
+ typer.echo(" - GitHub Copilot only: .github/copilot-instructions.md")
122
+ typer.echo(" - Windsurf only: .windsurfrules")
123
+ typer.echo("")
124
+ typer.echo(
125
+ "Re-run `saga ai-setup` after upgrading dlt-saga to keep the context current."
126
+ )