dbdocs 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dbdocs/__init__.py ADDED
File without changes
dbdocs/__main__.py ADDED
@@ -0,0 +1,3 @@
1
+ from dbdocs import main
2
+
3
+ main.main()
dbdocs/cli/__init__.py ADDED
File without changes
dbdocs/cli/main.py ADDED
@@ -0,0 +1,86 @@
1
+ import functools
2
+ import importlib.metadata
3
+ import socketserver
4
+ from http.server import SimpleHTTPRequestHandler
5
+
6
+ import click
7
+
8
+ from dbdocs.core.config import DbDocsConfig
9
+ from dbdocs.core.exceptions import DbDocsError
10
+ from dbdocs.core.log import logger
11
+ from dbdocs.site import deploy as deploy_module
12
+ from dbdocs.site.builder import ReportBuilder
13
+
14
+ __version__ = importlib.metadata.version("dbdocs")
15
+
16
+
17
+ # dbdocs
18
+ @click.group(
19
+ context_settings={"help_option_names": ["-h", "--help"]},
20
+ invoke_without_command=True,
21
+ no_args_is_help=True,
22
+ epilog="Specify one of these sub-commands and you can find more help from there.",
23
+ )
24
+ @click.version_option(__version__)
25
+ @click.option("-c", "--config", "config_path", default=None, help="Path to dbdocs.yml.")
26
+ @click.pass_context
27
+ def dbdocs(ctx, config_path):
28
+ """Alternative dbt docs site: dbt docs + ERD + column-level lineage."""
29
+ logger.info("Run with dbdocs==%s", __version__)
30
+ try:
31
+ ctx.obj = DbDocsConfig.load(config_path)
32
+ except DbDocsError as exc:
33
+ raise click.ClickException(str(exc)) from exc
34
+
35
+
36
+ @dbdocs.command(name="generate")
37
+ @click.option("-o", "--output-dir", default=None, help="Where to write the site (default: config).")
38
+ @click.option(
39
+ "--dialect", default=None, help="SQL dialect for column lineage (default: adapter_type)."
40
+ )
41
+ @click.pass_obj
42
+ def generate(config: DbDocsConfig, output_dir, dialect):
43
+ """Build the self-contained site from dbt artifacts."""
44
+ if dialect is not None:
45
+ config.dialect = dialect
46
+ try:
47
+ out = ReportBuilder(config).generate(output_dir=output_dir)
48
+ except DbDocsError as exc:
49
+ raise click.ClickException(str(exc)) from exc
50
+ click.echo(f"Generated site into {out}")
51
+
52
+
53
+ @dbdocs.command(name="serve")
54
+ @click.option("-p", "--port", default=8000, show_default=True, help="Port to serve on.")
55
+ @click.pass_obj
56
+ def serve(config: DbDocsConfig, port):
57
+ """Serve the generated site locally (static http server)."""
58
+ handler = functools.partial(SimpleHTTPRequestHandler, directory=config.output_path)
59
+ click.echo(f"Serving {config.output_path} at http://127.0.0.1:{port} (Ctrl-C to stop)")
60
+ socketserver.ThreadingTCPServer.allow_reuse_address = True
61
+ with socketserver.ThreadingTCPServer(("127.0.0.1", port), handler) as httpd:
62
+ httpd.serve_forever()
63
+
64
+
65
+ @dbdocs.command(name="deploy")
66
+ @click.option("--version", "version", required=True, help="Version label to deploy (e.g. 1.2).")
67
+ @click.option("--alias", default=None, help="Moving alias for this version (e.g. latest).")
68
+ @click.option(
69
+ "--title", default=None, help="Display title for this version (default: the version)."
70
+ )
71
+ @click.option(
72
+ "--delete", "delete", is_flag=True, default=False, help="Delete this version instead."
73
+ )
74
+ @click.option("--push/--no-push", default=False, help="Publish to the gh-pages branch.")
75
+ @click.pass_obj
76
+ def deploy(config: DbDocsConfig, version, alias, title, delete, push):
77
+ """Generate a versioned build and update the version index (or --delete one)."""
78
+ try:
79
+ if delete:
80
+ deploy_module.delete(config, version=version, push=push)
81
+ click.echo(f"Deleted version {version}")
82
+ return
83
+ out = deploy_module.deploy(config, version=version, alias=alias, push=push, title=title)
84
+ except DbDocsError as exc:
85
+ raise click.ClickException(str(exc)) from exc
86
+ click.echo(f"Deployed version {version} into {out}")
File without changes
@@ -0,0 +1,82 @@
1
+ """Loading dbt artifacts (manifest/catalog) via the dbterd parser.
2
+
3
+ dbterd parses ``manifest.json`` / ``catalog.json`` into ``dbt_artifacts_parser``
4
+ Pydantic models. Two cross-cutting gotchas live here so the rest of dbdocs never
5
+ has to think about them:
6
+
7
+ * **Schema field aliasing.** ``dbt_artifacts_parser`` aliases the ``schema``
8
+ field to ``schema_`` to avoid clobbering Pydantic's ``BaseModel.schema()`` —
9
+ so ``node.schema`` is a *bound method*, not the value. Always read
10
+ ``node.schema_``; :func:`db_schema` centralizes that.
11
+ * **Schema-version relaxation.** Passing the detected schema version to
12
+ ``read_manifest``/``read_catalog`` makes dbterd apply its relaxation policies,
13
+ keeping parsing robust across dbt versions (including dbt Core 2.0).
14
+ """
15
+
16
+ import json
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ from dbterd.helpers import file
21
+
22
+ #: Bucket label used when a node/source has no database or schema set.
23
+ UNKNOWN = "_unknown"
24
+
25
+ #: unique_id prefixes surfaced as catalog nodes (tests/macros/etc. excluded).
26
+ NODE_PREFIXES = ("model.", "seed.", "snapshot.")
27
+
28
+
29
+ def artifact_version(target_path: str, artifact: str) -> "int | None":
30
+ """Resolve a dbt artifact's schema version int from its ``dbt_schema_version``.
31
+
32
+ Returns ``None`` (auto-detect, strict) if the version can't be determined —
33
+ e.g. the file is missing or not valid JSON.
34
+ """
35
+ artifact_path = Path(target_path) / f"{artifact}.json"
36
+ try:
37
+ metadata = json.loads(artifact_path.read_text(encoding="utf-8")).get("metadata", {})
38
+ except (OSError, json.JSONDecodeError):
39
+ return None
40
+ extracted = file.extract_artifact_version_from_file(metadata.get("dbt_schema_version", ""))
41
+ return int(extracted) if extracted else None
42
+
43
+
44
+ def load_artifacts(target_path: str) -> "tuple[Any, Any]":
45
+ """Return the dbterd-parsed ``(manifest, catalog)`` for a dbt target dir."""
46
+ manifest = file.read_manifest(
47
+ path=target_path, version=artifact_version(target_path, "manifest")
48
+ )
49
+ catalog = file.read_catalog(path=target_path, version=artifact_version(target_path, "catalog"))
50
+ return manifest, catalog
51
+
52
+
53
+ def adapter_type(target_path: str) -> "str | None":
54
+ """The warehouse adapter (``snowflake``/``bigquery``/…) from manifest metadata.
55
+
56
+ Read from the raw JSON rather than the parsed model so it works regardless of
57
+ how the parser exposes ``metadata``. Used as the default sqlglot dialect for
58
+ column-level lineage. ``None`` if unreadable.
59
+ """
60
+ manifest_path = Path(target_path) / "manifest.json"
61
+ try:
62
+ metadata = json.loads(manifest_path.read_text(encoding="utf-8")).get("metadata", {})
63
+ except (OSError, json.JSONDecodeError):
64
+ return None
65
+ return metadata.get("adapter_type")
66
+
67
+
68
+ def db_schema(entity: Any) -> "tuple[str, str]":
69
+ """The ``(database, schema)`` an entity lands in, with safe fallbacks.
70
+
71
+ Reads ``schema_`` (the Pydantic alias — ``schema`` is a bound method) and
72
+ falls back to :data:`UNKNOWN` when either part is missing, so grouping never
73
+ produces a ``None`` bucket.
74
+ """
75
+ database = getattr(entity, "database", None) or UNKNOWN
76
+ schema = getattr(entity, "schema_", None) or UNKNOWN
77
+ return str(database), str(schema)
78
+
79
+
80
+ def node_name(unique_id: str) -> str:
81
+ """The dbt node's short name — the last dotted segment of its unique_id."""
82
+ return unique_id.split(".")[-1]
dbdocs/core/config.py ADDED
@@ -0,0 +1,117 @@
1
+ from dataclasses import asdict, dataclass, field, fields
2
+ from pathlib import Path
3
+
4
+ import yaml
5
+
6
+ from dbdocs.core.exceptions import DbDocsConfigError
7
+
8
+ DEFAULT_CONFIG_FILENAME = "dbdocs.yml"
9
+
10
+
11
+ @dataclass
12
+ class DbDocsConfig:
13
+ """Site configuration for a dbdocs build.
14
+
15
+ Loaded from a ``dbdocs.yml`` in the working directory; every field has a
16
+ default so the file is optional. ``version`` is intentionally absent — it is
17
+ a ``deploy`` CLI argument, not site config.
18
+
19
+ ``target_dir`` is where the dbt artifacts are read from; ``output_dir`` is
20
+ where the generated self-contained site is written.
21
+ """
22
+
23
+ site_name: str = "dbt docs"
24
+ site_url: str = "https://github.com/datnguye/dbt-docs"
25
+ site_author: str = "Dat Nguyen"
26
+ site_description: str = "Alternative dbt documentation site"
27
+ repo_name: str = "datnguye/dbt-docs"
28
+ repo_url: str = "https://github.com/datnguye/dbt-docs"
29
+ project_name: str = "dbt docs"
30
+ #: The footer's Buy-me-a-coffee badge shows by default; set false to hide it.
31
+ show_buy_me_a_coffee: bool = True
32
+ #: Project README rendered on the overview (relative to the working dir). Set
33
+ #: empty to omit the README section. Missing file ⇒ section simply absent.
34
+ readme: str = "README.md"
35
+ target_dir: str = "target"
36
+ #: Where the generated site is written. Nested under the dbt ``target/`` by
37
+ #: default so docs sit alongside the artifacts they're built from.
38
+ output_dir: str = "target/site"
39
+ #: SQL dialect for column-lineage parsing; ``None`` ⇒ derive from the
40
+ #: artifact's ``adapter_type`` (e.g. snowflake, bigquery, postgres).
41
+ dialect: "str | None" = None
42
+ #: Alias the SPA's version switcher treats as the default landing version.
43
+ default_version: str = "latest"
44
+ #: dbterd ERD options (``algo``, ``entity_name_format``, ``select``,
45
+ #: ``resource_type``, …) passed straight to ``DbtErd``. Configured here so the
46
+ #: ERD shape lives in ``dbdocs.yml`` rather than a separate ``.dbterd.yml``.
47
+ dbterd: dict = field(default_factory=dict)
48
+
49
+ @classmethod
50
+ def load(cls, path: "str | Path | None" = None) -> "DbDocsConfig":
51
+ """Load config from ``path`` (or ``./dbdocs.yml``); all-defaults if absent."""
52
+ config_path = Path(path) if path is not None else Path.cwd() / DEFAULT_CONFIG_FILENAME
53
+ if not config_path.is_file():
54
+ return cls()
55
+
56
+ try:
57
+ raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
58
+ except yaml.YAMLError as exc:
59
+ raise DbDocsConfigError(f"Could not parse {config_path}: {exc}") from exc
60
+
61
+ if raw is None:
62
+ return cls()
63
+ if not isinstance(raw, dict):
64
+ raise DbDocsConfigError(
65
+ f"{config_path} must contain a mapping, got {type(raw).__name__}"
66
+ )
67
+
68
+ known = {f.name for f in fields(cls)}
69
+ unknown = set(raw) - known
70
+ if unknown:
71
+ raise DbDocsConfigError(
72
+ f"Unknown keys in {config_path}: {', '.join(sorted(unknown))}. "
73
+ f"Allowed keys: {', '.join(sorted(known))}."
74
+ )
75
+ return cls(**raw)
76
+
77
+ #: Build-control fields that are not part of the site's display metadata.
78
+ _NON_METADATA_FIELDS = (
79
+ "target_dir",
80
+ "output_dir",
81
+ "dialect",
82
+ "default_version",
83
+ "dbterd",
84
+ "readme",
85
+ )
86
+
87
+ def render_context(self) -> dict:
88
+ """The site display metadata injected into the SPA's ``metadata`` block.
89
+
90
+ Excludes build-control fields (where artifacts are read, where the site
91
+ is written, the lineage dialect override) that aren't site metadata.
92
+ """
93
+ context = asdict(self)
94
+ for field_name in self._NON_METADATA_FIELDS:
95
+ context.pop(field_name, None)
96
+ return context
97
+
98
+ @property
99
+ def target_path(self) -> str:
100
+ """Absolute path to the dbt target/ dir where the artifacts live.
101
+
102
+ A relative ``target_dir`` is resolved against the current working
103
+ directory **at access time** — this is intentional and must stay aligned
104
+ with dbterd's ``DbtErd``, which also reads artifacts from ``./target``
105
+ relative to the cwd. An absolute ``target_dir`` is returned unchanged.
106
+ """
107
+ return str(Path.cwd() / self.target_dir)
108
+
109
+ @property
110
+ def output_path(self) -> str:
111
+ """Absolute path to the dir the generated site is written into.
112
+
113
+ Resolved against the cwd at access time, mirroring ``target_path`` — a
114
+ relative ``output_dir`` follows the working directory, an absolute one is
115
+ returned unchanged.
116
+ """
117
+ return str(Path.cwd() / self.output_dir)
@@ -0,0 +1,24 @@
1
+ """dbdocs exception types.
2
+
3
+ Multiple exception classes may share one file (per the project's Python style).
4
+ """
5
+
6
+
7
+ class DbDocsError(Exception):
8
+ """Base class for all dbdocs errors."""
9
+
10
+
11
+ class DbDocsConfigError(DbDocsError):
12
+ """Raised when dbdocs.yml is malformed or holds invalid values."""
13
+
14
+
15
+ class LineageError(DbDocsError):
16
+ """Raised when column-level lineage can't be parsed for a model.
17
+
18
+ Always caught per-model by the extractor so one unparseable model never
19
+ fails the whole ``generate`` — the model is skipped and logged instead.
20
+ """
21
+
22
+
23
+ class DeployError(DbDocsError):
24
+ """Raised when a versioned deploy step (e.g. the git push) fails."""
dbdocs/core/log.py ADDED
@@ -0,0 +1,58 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ #: Where the DEBUG-level file log is streamed (relative to the working dir).
5
+ LOG_FILE = Path("logs") / "dbdocs.log"
6
+ #: Plain (non-ANSI) line format for the file — colour codes don't belong in a file.
7
+ FILE_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(filename)s:%(lineno)d)"
8
+
9
+
10
+ class LogFormatter(logging.Formatter):
11
+ grey = "\x1b[38;20m"
12
+ blue = "\x1b[34;20m"
13
+ yellow = "\x1b[33;20m"
14
+ red = "\x1b[31;20m"
15
+ bold_red = "\x1b[31;1m"
16
+ reset = "\x1b[0m"
17
+ format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(filename)s:%(lineno)d)"
18
+
19
+ FORMATS = {
20
+ logging.DEBUG: blue + format + reset,
21
+ logging.INFO: grey + format + reset,
22
+ logging.WARNING: yellow + format + reset,
23
+ logging.ERROR: red + format + reset,
24
+ logging.CRITICAL: bold_red + format + reset,
25
+ }
26
+
27
+ def format(self, record):
28
+ log_fmt = self.FORMATS.get(record.levelno)
29
+ formatter = logging.Formatter(log_fmt)
30
+ return formatter.format(record)
31
+
32
+
33
+ # Named "dbdocs" (not "dbterd") so our handler/level config doesn't collide with
34
+ # the dbterd library's own logger of the same name.
35
+ logger = logging.getLogger("dbdocs")
36
+ logger.setLevel(logging.DEBUG)
37
+ # Emit only through our own handlers. Without this, records also propagate to the
38
+ # root logger — which dbterd configures via basicConfig — producing duplicate,
39
+ # differently-formatted "INFO:dbdocs:…" lines.
40
+ logger.propagate = False
41
+
42
+ if len(logger.handlers) == 0: # pragma: no cover - import-time handler guard
43
+ ch = logging.StreamHandler()
44
+ ch.setLevel(logging.DEBUG)
45
+ ch.setFormatter(LogFormatter())
46
+ logger.addHandler(ch)
47
+
48
+ # Stream everything (DEBUG and up) to logs/dbdocs.log too. Best-effort: if the
49
+ # logs dir can't be created/written (read-only fs), the console handler still
50
+ # works and we don't crash on import.
51
+ try:
52
+ LOG_FILE.parent.mkdir(parents=True, exist_ok=True)
53
+ fh = logging.FileHandler(LOG_FILE, encoding="utf-8")
54
+ fh.setLevel(logging.DEBUG)
55
+ fh.setFormatter(logging.Formatter(FILE_FORMAT))
56
+ logger.addHandler(fh)
57
+ except OSError:
58
+ pass
File without changes
@@ -0,0 +1,267 @@
1
+ """Column-level lineage engine: trace a SELECT's output column to its sources.
2
+
3
+ A self-contained lineage builder over sqlglot's optimizer, with case-insensitive
4
+ column resolution and cycle-safe recursion so it copes with dbt-compiled SQL
5
+ (uppercased warehouse identifiers, recursive CTEs) without relying on sqlglot's
6
+ internal, version-unstable lineage API.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import typing as t
13
+ from dataclasses import dataclass, field
14
+
15
+ from sqlglot import Schema, exp, maybe_parse
16
+ from sqlglot.errors import SqlglotError
17
+ from sqlglot.optimizer import (
18
+ Scope,
19
+ build_scope,
20
+ find_all_in_scope,
21
+ normalize_identifiers,
22
+ qualify,
23
+ )
24
+ from sqlglot.optimizer.scope import ScopeType
25
+
26
+ if t.TYPE_CHECKING:
27
+ from sqlglot.dialects.dialect import DialectType
28
+
29
+ logger = logging.getLogger("sqlglot")
30
+
31
+
32
+ @dataclass
33
+ class Node:
34
+ name: str
35
+ expression: exp.Expression
36
+ source: exp.Expression
37
+ downstream: list[Node] = field(default_factory=list)
38
+ source_name: str = ""
39
+ reference_node_name: str = ""
40
+
41
+ def walk(self) -> t.Iterator[Node]:
42
+ yield self
43
+ for d in self.downstream:
44
+ yield from d.walk()
45
+
46
+
47
+ def lineage(
48
+ column: str | exp.Column,
49
+ sql: str | exp.Expression,
50
+ schema: dict | Schema | None = None,
51
+ sources: t.Mapping[str, str | exp.Query] | None = None,
52
+ dialect: DialectType = None,
53
+ scope: Scope | None = None,
54
+ trim_selects: bool = True,
55
+ **kwargs,
56
+ ) -> Node:
57
+ """Build the lineage graph for a column of a SQL query."""
58
+ expression = maybe_parse(sql, dialect=dialect)
59
+ column = normalize_identifiers.normalize_identifiers(column, dialect=dialect).name
60
+
61
+ if sources:
62
+ expression = exp.expand(
63
+ expression,
64
+ {k: t.cast(exp.Query, maybe_parse(v, dialect=dialect)) for k, v in sources.items()},
65
+ dialect=dialect,
66
+ )
67
+
68
+ if not scope:
69
+ expression = qualify.qualify(
70
+ expression,
71
+ dialect=dialect,
72
+ schema=schema,
73
+ **{
74
+ "validate_qualify_columns": False,
75
+ "identify": False,
76
+ "allow_partial_qualification": True,
77
+ **kwargs,
78
+ },
79
+ )
80
+ scope = build_scope(expression)
81
+
82
+ if not scope:
83
+ raise SqlglotError("Cannot build lineage, sql must be SELECT")
84
+
85
+ select_names_original = {select.alias_or_name for select in scope.expression.selects}
86
+ select_names_lower = {name.lower(): name for name in select_names_original}
87
+ # Case-insensitive resolution: dbt/warehouse casing rarely matches exactly.
88
+ if column not in select_names_original:
89
+ column_lower = column.lower()
90
+ if column_lower in select_names_lower:
91
+ column = select_names_lower[column_lower]
92
+ else:
93
+ raise SqlglotError(f"Cannot find column '{column}' in query.")
94
+
95
+ return to_node(column, scope, dialect, trim_selects=trim_selects)
96
+
97
+
98
+ def to_node(
99
+ column: str | int,
100
+ scope: Scope,
101
+ dialect: DialectType,
102
+ scope_name: str | None = None,
103
+ upstream: Node | None = None,
104
+ source_name: str | None = None,
105
+ reference_node_name: str | None = None,
106
+ trim_selects: bool = True,
107
+ visited: set | None = None,
108
+ ) -> Node | None:
109
+ if visited is None:
110
+ visited = set()
111
+
112
+ key = (column, id(scope))
113
+ if key in visited:
114
+ # Already visited this column-scope: stop, or recursive CTEs loop forever.
115
+ return None
116
+ visited.add(key)
117
+
118
+ select = (
119
+ scope.expression.selects[column]
120
+ if isinstance(column, int)
121
+ else next(
122
+ (select for select in scope.expression.selects if select.alias_or_name == column),
123
+ exp.Star() if scope.expression.is_star else scope.expression,
124
+ )
125
+ )
126
+
127
+ if isinstance(scope.expression, exp.Subquery):
128
+ for source in scope.subquery_scopes:
129
+ return to_node(
130
+ column,
131
+ scope=source,
132
+ dialect=dialect,
133
+ upstream=upstream,
134
+ source_name=source_name,
135
+ reference_node_name=reference_node_name,
136
+ trim_selects=trim_selects,
137
+ visited=visited,
138
+ )
139
+ if isinstance(scope.expression, exp.SetOperation):
140
+ name = type(scope.expression).__name__.upper()
141
+ upstream = upstream or Node(name=name, source=scope.expression, expression=select)
142
+
143
+ index = (
144
+ column
145
+ if isinstance(column, int)
146
+ else next(
147
+ (
148
+ i
149
+ for i, select in enumerate(scope.expression.selects)
150
+ if select.alias_or_name == column or select.is_star
151
+ ),
152
+ -1,
153
+ )
154
+ )
155
+
156
+ if index == -1:
157
+ raise ValueError(f"Could not find {column} in {scope.expression}")
158
+
159
+ for s in scope.union_scopes:
160
+ to_node(
161
+ index,
162
+ scope=s,
163
+ dialect=dialect,
164
+ upstream=upstream,
165
+ source_name=source_name,
166
+ reference_node_name=reference_node_name,
167
+ trim_selects=trim_selects,
168
+ visited=visited,
169
+ )
170
+ return upstream
171
+
172
+ if trim_selects and isinstance(scope.expression, exp.Select):
173
+ source = exp.Select()
174
+ source.set("expressions", [select])
175
+ source.set("from", scope.expression.args.get("from"))
176
+ source.set("where", scope.expression.args.get("where"))
177
+ source.set("group", scope.expression.args.get("group"))
178
+ else:
179
+ source = scope.expression
180
+
181
+ node = Node(
182
+ name=f"{scope_name}.{column}" if scope_name else str(column),
183
+ source=source,
184
+ expression=select,
185
+ source_name=source_name or "",
186
+ reference_node_name=reference_node_name or "",
187
+ )
188
+
189
+ if upstream:
190
+ upstream.downstream.append(node)
191
+
192
+ subquery_scopes = {
193
+ id(subquery_scope.expression): subquery_scope for subquery_scope in scope.subquery_scopes
194
+ }
195
+
196
+ for subquery in find_all_in_scope(select, exp.UNWRAPPED_QUERIES):
197
+ subquery_scope = subquery_scopes.get(id(subquery))
198
+ if not subquery_scope:
199
+ logger.warning("Unknown subquery scope: %s", subquery.sql(dialect=dialect))
200
+ continue
201
+
202
+ for name in subquery.named_selects:
203
+ to_node(
204
+ name,
205
+ scope=subquery_scope,
206
+ dialect=dialect,
207
+ upstream=node,
208
+ trim_selects=trim_selects,
209
+ visited=visited,
210
+ )
211
+
212
+ if select.is_star:
213
+ for source in scope.sources.values():
214
+ if isinstance(source, Scope):
215
+ source = source.expression
216
+ node.downstream.append(
217
+ Node(name=select.sql(comments=False), source=source, expression=source)
218
+ )
219
+
220
+ source_columns = set(find_all_in_scope(select, exp.Column))
221
+
222
+ if isinstance(source, exp.UDTF):
223
+ source_columns |= set(source.find_all(exp.Column))
224
+ derived_tables = [
225
+ source.expression.parent
226
+ for source in scope.sources.values()
227
+ if isinstance(source, Scope) and source.is_derived_table
228
+ ]
229
+ else:
230
+ derived_tables = scope.derived_tables
231
+
232
+ source_names = {
233
+ dt.alias: dt.comments[0].split()[1]
234
+ for dt in derived_tables
235
+ if dt.comments and dt.comments[0].startswith("source: ")
236
+ }
237
+
238
+ for c in source_columns:
239
+ table = c.table
240
+ source = scope.sources.get(table)
241
+
242
+ if isinstance(source, Scope):
243
+ reference_node_name = None
244
+ if source.scope_type == ScopeType.DERIVED_TABLE and table not in source_names:
245
+ reference_node_name = table
246
+ elif source.scope_type == ScopeType.CTE:
247
+ selected_node, _ = scope.selected_sources.get(table, (None, None))
248
+ reference_node_name = selected_node.name if selected_node else None
249
+
250
+ to_node(
251
+ c.name,
252
+ scope=source,
253
+ dialect=dialect,
254
+ scope_name=table,
255
+ upstream=node,
256
+ source_name=source_names.get(table) or source_name,
257
+ reference_node_name=reference_node_name,
258
+ trim_selects=trim_selects,
259
+ visited=visited,
260
+ )
261
+ else:
262
+ source = source or exp.Placeholder()
263
+ node.downstream.append(
264
+ Node(name=c.sql(comments=False), source=source, expression=source)
265
+ )
266
+
267
+ return node