dlin-cli 0.2.0a2__tar.gz → 0.2.0b2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/Cargo.lock +4 -4
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/Cargo.toml +4 -8
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/PKG-INFO +137 -5
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/README.md +136 -4
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin/Cargo.toml +1 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin/README.md +136 -4
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin/src/cli.rs +129 -32
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin/src/main.rs +79 -7
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin/tests/integration_test.rs +226 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/README.md +136 -4
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/column_lineage/cross_model.rs +29 -29
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/column_lineage/impact.rs +92 -18
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/column_lineage/mod.rs +1 -1
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/column_lineage/single_model.rs +10 -1
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/column_lineage/tests/core.rs +93 -7
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/column_lineage/tests/impact.rs +120 -1
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/column_lineage/tests/mod.rs +133 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/column_lineage/types.rs +4 -3
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/filter.rs +160 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/parser/manifest.rs +79 -4
- dlin_cli-0.2.0b2/crates/dlin-core/src/render/column_graph.rs +1209 -0
- dlin_cli-0.2.0b2/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__dot_all_transformation_types.snap +39 -0
- dlin_cli-0.2.0b2/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__dot_escapes_special_chars.snap +24 -0
- dlin_cli-0.2.0b2/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__dot_id_collision_avoided.snap +24 -0
- dlin_cli-0.2.0b2/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__dot_impact_indirect.snap +30 -0
- dlin_cli-0.2.0b2/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__dot_impact_single.snap +24 -0
- dlin_cli-0.2.0b2/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__dot_single_model.snap +24 -0
- dlin_cli-0.2.0b2/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__dot_via_path.snap +31 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__impact_mermaid_indirect_edge_label.snap +6 -2
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__impact_plain_multi_hop.snap +1 -1
- dlin_cli-0.2.0b2/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__impact_plain_non_direct_intermediate.snap +6 -0
- dlin_cli-0.2.0b2/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__plain_model_path_non_direct_annotation.snap +6 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/tests/column_lineage_test.rs +4 -2
- dlin_cli-0.2.0a2/crates/dlin-core/src/render/column_graph.rs +0 -621
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/LICENSE +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/Cargo.toml +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/error.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/builder.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/column_lineage/cache.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/column_lineage/schema.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/column_lineage/tests/cache.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/impact.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/mod.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/snapshots/dlin_core__graph__filter__tests__collapse_snapshot.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/snapshots/dlin_core__graph__filter__tests__collapse_snapshot_bfs_pseudoendpoint.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/snapshots/dlin_core__graph__filter__tests__collapse_snapshot_endpoints_fan_out.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/snapshots/dlin_core__graph__filter__tests__collapse_snapshot_endpoints_leaf_model.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/snapshots/dlin_core__graph__filter__tests__collapse_snapshot_multiple_focus_models.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/snapshots/dlin_core__graph__filter__tests__collapse_snapshot_no_source_exposure.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/snapshots/dlin_core__graph__filter__tests__collapse_snapshot_preserve_focus.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/snapshots/dlin_core__graph__filter__tests__snapshot_transitive_node_type_filter.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/snapshots/dlin_core__graph__filter__tests__snapshot_transitive_select_filter.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/snapshots/dlin_core__graph__filter__tests__snapshot_transitive_select_with_node_type.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/graph/types.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/input.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/lib.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/parser/cache.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/parser/columns.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/parser/discovery.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/parser/jinja.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/parser/mod.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/parser/project.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/parser/sql.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/parser/yaml_schema.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/ascii.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/dot.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/html.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/impact.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/json.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/layout.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/list.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/mermaid.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/mod.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/plain.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__impact_mermaid.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__impact_plain.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__mermaid_dotted_table_name.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__mermaid_id_collision_avoided.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__mermaid_label_escaping.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__mermaid_single_model.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__plain_no_sources.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__column_graph__tests__plain_single_model.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__dot__tests__group_by_node_type.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__dot__tests__snapshot_all_edge_types.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__dot__tests__snapshot_direction_tb.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__dot__tests__snapshot_direction_tb_grouped.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__dot__tests__snapshot_group_by_directory.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__dot__tests__snapshot_lineage.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__dot__tests__snapshot_transitive_edges.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__html__tests__snapshot_html_json.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__impact__tests__snapshot_impact_json.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__impact__tests__snapshot_impact_json_with_sql.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__impact__tests__snapshot_impact_text.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__impact__tests__snapshot_impact_text_with_sql.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__json__tests__snapshot_json_with_sql.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__json__tests__snapshot_lineage.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__json__tests__snapshot_node_metadata.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__list__tests__snapshot_list_json.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__list__tests__snapshot_list_plain.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__mermaid__tests__group_by_node_type.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__mermaid__tests__mixed_direct_and_transitive_edges.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__mermaid__tests__show_columns_escapes_quotes.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__mermaid__tests__show_columns_lineage.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__mermaid__tests__show_columns_single_model.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__mermaid__tests__show_columns_with_collapse.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__mermaid__tests__show_columns_with_grouping.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__mermaid__tests__snapshot_direction_tb.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__mermaid__tests__snapshot_direction_tb_grouped.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__mermaid__tests__snapshot_group_by_directory.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__mermaid__tests__snapshot_lineage.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__mermaid__tests__transitive_edge_rendering.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__plain__tests__snapshot_plain.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__summary__tests__snapshot_summary_json.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/snapshots/dlin_core__render__summary__tests__snapshot_summary_text.snap +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/summary.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/crates/dlin-core/src/render/svg.rs +0 -0
- {dlin_cli-0.2.0a2 → dlin_cli-0.2.0b2}/pyproject.toml +0 -0
|
@@ -231,7 +231,7 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
|
|
231
231
|
|
|
232
232
|
[[package]]
|
|
233
233
|
name = "dlin"
|
|
234
|
-
version = "0.2.0-
|
|
234
|
+
version = "0.2.0-beta.2"
|
|
235
235
|
dependencies = [
|
|
236
236
|
"anyhow",
|
|
237
237
|
"clap",
|
|
@@ -247,7 +247,7 @@ dependencies = [
|
|
|
247
247
|
|
|
248
248
|
[[package]]
|
|
249
249
|
name = "dlin-core"
|
|
250
|
-
version = "0.2.0-
|
|
250
|
+
version = "0.2.0-beta.2"
|
|
251
251
|
dependencies = [
|
|
252
252
|
"anyhow",
|
|
253
253
|
"clap",
|
|
@@ -617,9 +617,9 @@ checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
|
|
|
617
617
|
|
|
618
618
|
[[package]]
|
|
619
619
|
name = "polyglot-sql"
|
|
620
|
-
version = "0.4.
|
|
620
|
+
version = "0.4.2"
|
|
621
621
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
622
|
-
checksum = "
|
|
622
|
+
checksum = "3e264f8907f3a2050232cd06503afb637525038ddd388a2c9e03424e336cd9c8"
|
|
623
623
|
dependencies = [
|
|
624
624
|
"serde",
|
|
625
625
|
"serde_json",
|
|
@@ -3,7 +3,7 @@ members = ["crates/*"]
|
|
|
3
3
|
resolver = "3"
|
|
4
4
|
|
|
5
5
|
[workspace.package]
|
|
6
|
-
version = "0.2.0-
|
|
6
|
+
version = "0.2.0-beta.2"
|
|
7
7
|
edition = "2024"
|
|
8
8
|
license = "MIT"
|
|
9
9
|
repository = "https://github.com/eitsupi/dlin"
|
|
@@ -23,7 +23,7 @@ indexmap = "2"
|
|
|
23
23
|
minijinja = "2"
|
|
24
24
|
rayon = "1"
|
|
25
25
|
globset = "0.4"
|
|
26
|
-
polyglot-sql = { version = "0.4.
|
|
26
|
+
polyglot-sql = { version = "0.4.2", default-features = false, features = ["all-dialects", "semantic"] }
|
|
27
27
|
path-slash = "0.2.1"
|
|
28
28
|
clap = { version = "4", features = ["derive", "env"] }
|
|
29
29
|
libc = "0.2"
|
|
@@ -34,18 +34,14 @@ insta = "1"
|
|
|
34
34
|
serial_test = "3.4.0"
|
|
35
35
|
|
|
36
36
|
# internal
|
|
37
|
-
dlin-core = { version = "0.2.0-
|
|
37
|
+
dlin-core = { version = "0.2.0-beta.2", path = "crates/dlin-core" }
|
|
38
38
|
|
|
39
39
|
[workspace.lints.rust]
|
|
40
40
|
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tarpaulin_include)'] }
|
|
41
41
|
|
|
42
42
|
[profile.dist]
|
|
43
43
|
inherits = "release"
|
|
44
|
-
|
|
45
|
-
# the dlin binary, bloating it from ~7.5 MB to ~12 MB. Use thin LTO until the
|
|
46
|
-
# upstream semantic→generate feature dependency is removed from polyglot-sql,
|
|
47
|
-
# after which generator.rs will be excluded from the build entirely.
|
|
48
|
-
lto = "thin"
|
|
44
|
+
lto = "fat"
|
|
49
45
|
codegen-units = 1
|
|
50
46
|
strip = true
|
|
51
47
|
opt-level = "s"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dlin-cli
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.0b2
|
|
4
4
|
Classifier: Development Status :: 4 - Beta
|
|
5
5
|
Classifier: Environment :: Console
|
|
6
6
|
Classifier: Intended Audience :: Developers
|
|
@@ -26,10 +26,12 @@ Project-URL: Repository, https://github.com/eitsupi/dlin
|
|
|
26
26
|
[](https://pypi.org/project/dlin-cli/)
|
|
27
27
|
[](https://deepwiki.com/eitsupi/dlin)
|
|
28
28
|
|
|
29
|
-
dbt lineage
|
|
29
|
+
dbt model lineage CLI that parses SQL files directly. No `dbt compile`, no Python, no `manifest.json` (for model-level lineage).
|
|
30
30
|
|
|
31
31
|
Builds a dependency graph from `ref()` and `source()` calls in SQL. Designed for AI agents and CI pipelines.
|
|
32
32
|
|
|
33
|
+
Experimental column-level lineage (`dlin column upstream` / `dlin column downstream`) is also available. It requires `dbt compile` and `manifest.json`.
|
|
34
|
+
|
|
33
35
|
## Motivation
|
|
34
36
|
|
|
35
37
|
When I edited dbt models in VS Code, [dbt Power User](https://marketplace.visualstudio.com/items?itemName=innoverio.vscode-dbt-power-user) was my go-to companion for navigating lineage. AI agents have no such companion. I watched them `grep` through dbt projects to find model dependencies. It works, but they end up calling `grep` repeatedly and relying on fragile string matching to piece together `ref()` and `source()` relationships.
|
|
@@ -38,7 +40,7 @@ dlin is designed to fill that gap: a CLI tool that lets AI agents understand a d
|
|
|
38
40
|
|
|
39
41
|
To replace `grep`, speed and size matter. dlin is a small, self-contained binary with no runtime dependencies. It parses SQL directly, evaluates common Jinja patterns without Python, parallelizes file I/O, and caches aggressively.
|
|
40
42
|
|
|
41
|
-
The key idea behind dlin is that finding the right models fast is what matters most.
|
|
43
|
+
The key idea behind dlin is that finding the right models fast is what matters most. The hard part for agents is knowing which models to look at in the first place. dlin focuses on making model-level lineage as fast as possible, and also offers experimental column-level lineage for deeper analysis.
|
|
42
44
|
|
|
43
45
|
## Install
|
|
44
46
|
|
|
@@ -114,11 +116,12 @@ The key line is **"Do NOT grep/cat/find through SQL files"** — without it, age
|
|
|
114
116
|
|
|
115
117
|
## Features
|
|
116
118
|
|
|
117
|
-
- **No dependencies**: single binary, no Python, no `manifest.json`
|
|
119
|
+
- **No dependencies for model lineage**: single binary, no Python, no `manifest.json`
|
|
118
120
|
- **Recursive upstream / downstream**: `-u N` / `-d N` to control traversal depth
|
|
119
121
|
- **Impact analysis with severity**: `dlin impact` scores downstream nodes and flags exposure reachability
|
|
120
122
|
- **Composable**: stdin accepts model names or file paths; pipe with `jq`, `dlin list`, `git diff`, etc.
|
|
121
123
|
- **Agent-friendly**: `--error-format json` emits structured `{"level","what","why","hint"}` on stderr; `--help` is designed for tool discovery
|
|
124
|
+
- **Column-level lineage** (experimental): traces columns across models with transformation classification; requires `dbt compile` and `manifest.json`
|
|
122
125
|
|
|
123
126
|
## Mermaid diagrams
|
|
124
127
|
|
|
@@ -252,6 +255,135 @@ dlin graph -o dot | dot -Tsvg > out.svg # Graphviz rendering
|
|
|
252
255
|
|
|
253
256
|
Output formats: ASCII (default), JSON, Mermaid, Graphviz DOT, Plain, SVG, HTML.
|
|
254
257
|
|
|
258
|
+
## Column-level lineage (Experimental)
|
|
259
|
+
|
|
260
|
+
> [!WARNING]
|
|
261
|
+
> Column-level lineage depends on [polyglot-sql](https://github.com/tobilg/polyglot) for SQL parsing. Coverage varies by SQL complexity and dialect. Patterns such as `SELECT *` chains, STRUCT expansion, and some database-specific syntax may not resolve correctly.
|
|
262
|
+
|
|
263
|
+
`dlin column upstream` and `dlin column downstream` trace columns across models. Unlike model-level commands, they always require a compiled `manifest.json`. Run `dbt compile` first.
|
|
264
|
+
|
|
265
|
+
```sh
|
|
266
|
+
# Where does each output column of orders come from?
|
|
267
|
+
dlin column upstream orders
|
|
268
|
+
|
|
269
|
+
# What downstream columns are affected if stg_orders.order_id changes?
|
|
270
|
+
dlin column downstream stg_orders --column order_id
|
|
271
|
+
|
|
272
|
+
# Mermaid flowchart
|
|
273
|
+
dlin column upstream customers -o mermaid
|
|
274
|
+
dlin column downstream stg_orders --column order_id -o mermaid
|
|
275
|
+
|
|
276
|
+
# Specific columns only
|
|
277
|
+
dlin column upstream orders --column order_id --column status
|
|
278
|
+
|
|
279
|
+
# Verify manifest freshness before querying
|
|
280
|
+
dlin check-manifest && dlin column upstream orders
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
### Column upstream
|
|
284
|
+
|
|
285
|
+
Traces each output column of a model back to its raw source columns, following references across intermediate models.
|
|
286
|
+
|
|
287
|
+
```sh
|
|
288
|
+
dlin column upstream customers -o mermaid
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
```mermaid
|
|
292
|
+
flowchart LR
|
|
293
|
+
subgraph sg0["customers"]
|
|
294
|
+
n0_0["customer_id"]
|
|
295
|
+
n0_1["email"]
|
|
296
|
+
n0_2["first_name"]
|
|
297
|
+
n0_3["last_name"]
|
|
298
|
+
n0_4["lifetime_value"]
|
|
299
|
+
n0_5["order_count"]
|
|
300
|
+
end
|
|
301
|
+
subgraph sg1["orders"]
|
|
302
|
+
n1_0["order_id"]
|
|
303
|
+
n1_1["total_amount"]
|
|
304
|
+
end
|
|
305
|
+
subgraph sg2["raw.customers"]
|
|
306
|
+
n2_0["email"]
|
|
307
|
+
n2_1["first_name"]
|
|
308
|
+
n2_2["id"]
|
|
309
|
+
n2_3["last_name"]
|
|
310
|
+
end
|
|
311
|
+
subgraph sg3["raw.orders"]
|
|
312
|
+
n3_0["id"]
|
|
313
|
+
end
|
|
314
|
+
subgraph sg4["raw.payments"]
|
|
315
|
+
n4_0["amount"]
|
|
316
|
+
end
|
|
317
|
+
subgraph sg5["stg_customers"]
|
|
318
|
+
n5_0["customer_id"]
|
|
319
|
+
n5_1["email"]
|
|
320
|
+
n5_2["first_name"]
|
|
321
|
+
n5_3["last_name"]
|
|
322
|
+
end
|
|
323
|
+
subgraph sg6["stg_orders"]
|
|
324
|
+
n6_0["order_id"]
|
|
325
|
+
end
|
|
326
|
+
subgraph sg7["stg_payments"]
|
|
327
|
+
n7_0["amount"]
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
n2_2 -->|"direct"|n5_0
|
|
331
|
+
n5_0 -->|"direct"|n0_0
|
|
332
|
+
n2_0 -->|"direct"|n5_1
|
|
333
|
+
n5_1 -->|"direct"|n0_1
|
|
334
|
+
n2_1 -->|"direct"|n5_2
|
|
335
|
+
n5_2 -->|"direct"|n0_2
|
|
336
|
+
n2_3 -->|"direct"|n5_3
|
|
337
|
+
n5_3 -->|"direct"|n0_3
|
|
338
|
+
n4_0 -->|"direct"|n7_0
|
|
339
|
+
n7_0 -->|"direct"|n1_1
|
|
340
|
+
n1_1 -->|"aggregation"|n0_4
|
|
341
|
+
n3_0 -->|"direct"|n6_0
|
|
342
|
+
n6_0 -->|"direct"|n1_0
|
|
343
|
+
n1_0 -->|"aggregation"|n0_5
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
`customer_id`, `email`, etc. pass through `stg_customers` unchanged from `raw.customers` (all `direct`). `lifetime_value` and `order_count` are aggregated at the `customers` model — the final edge to `customers` is labeled `aggregation`, while all upstream hops carry their actual transformation type (here `direct`, since staging and mart models pass columns through unchanged).
|
|
347
|
+
|
|
348
|
+
Transformation types shown on edges: `direct`, `aggregation`, `expression`, `cast`, `conditional`, `unknown`.
|
|
349
|
+
|
|
350
|
+
### Column downstream
|
|
351
|
+
|
|
352
|
+
Traces a column forward to all downstream models and columns that depend on it.
|
|
353
|
+
|
|
354
|
+
```sh
|
|
355
|
+
dlin column downstream stg_orders --column order_id -o mermaid
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
```mermaid
|
|
359
|
+
flowchart LR
|
|
360
|
+
subgraph sg0["customers"]
|
|
361
|
+
n0_0["order_count"]
|
|
362
|
+
end
|
|
363
|
+
subgraph sg1["order_enriched"]
|
|
364
|
+
n1_0["order_id"]
|
|
365
|
+
end
|
|
366
|
+
subgraph sg2["orders"]
|
|
367
|
+
n2_0["order_id"]
|
|
368
|
+
end
|
|
369
|
+
subgraph sg3["stg_orders"]
|
|
370
|
+
n3_0["order_id"]
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
n2_0 -->|"aggregation"|n0_0
|
|
374
|
+
n3_0 -->|"direct"|n1_0
|
|
375
|
+
n3_0 -->|"direct"|n2_0
|
|
376
|
+
```
|
|
377
|
+
|
|
378
|
+
`stg_orders.order_id` flows directly into `orders.order_id` and `order_enriched.order_id`. `orders.order_id` is then aggregated into `customers.order_count`. Each edge shows its per-hop transformation type.
|
|
379
|
+
|
|
380
|
+
### Known limitations
|
|
381
|
+
|
|
382
|
+
- **Requires `dbt compile`**: no SQL parse mode fallback; manifest with compiled SQL is always needed
|
|
383
|
+
- **SELECT \* chains**: resolution depends on YAML column definitions in upstream models; unresolved columns are reported in `errors[]`
|
|
384
|
+
- **Dialect-specific syntax**: pass `--dialect bigquery` (or other dialect) for better coverage
|
|
385
|
+
- **Performance**: first run parses all upstream models; results are cached in `.dlin_cache/` for subsequent queries
|
|
386
|
+
|
|
255
387
|
## Key subcommands
|
|
256
388
|
|
|
257
389
|
### `list`
|
|
@@ -290,7 +422,7 @@ dlin graph --node-type model,source # filter by node type
|
|
|
290
422
|
|
|
291
423
|
## Data sources
|
|
292
424
|
|
|
293
|
-
dlin aims to work without `dbt compile
|
|
425
|
+
dlin aims to work without `dbt compile` (except for column-level lineage, which always requires `manifest.json`). By default it parses SQL files directly, but it can also leverage a pre-compiled `manifest.json` for additional accuracy when one is available.
|
|
294
426
|
|
|
295
427
|
**SQL parsing (default)**: extracts `ref()` and `source()` from SQL via regex + Jinja template evaluation. No Python or dbt needed. Generic tests (`not_null`, `unique`, `relationships`, etc.) are inferred from YAML schema declarations.
|
|
296
428
|
|
|
@@ -4,10 +4,12 @@
|
|
|
4
4
|
[](https://pypi.org/project/dlin-cli/)
|
|
5
5
|
[](https://deepwiki.com/eitsupi/dlin)
|
|
6
6
|
|
|
7
|
-
dbt lineage
|
|
7
|
+
dbt model lineage CLI that parses SQL files directly. No `dbt compile`, no Python, no `manifest.json` (for model-level lineage).
|
|
8
8
|
|
|
9
9
|
Builds a dependency graph from `ref()` and `source()` calls in SQL. Designed for AI agents and CI pipelines.
|
|
10
10
|
|
|
11
|
+
Experimental column-level lineage (`dlin column upstream` / `dlin column downstream`) is also available. It requires `dbt compile` and `manifest.json`.
|
|
12
|
+
|
|
11
13
|
## Motivation
|
|
12
14
|
|
|
13
15
|
When I edited dbt models in VS Code, [dbt Power User](https://marketplace.visualstudio.com/items?itemName=innoverio.vscode-dbt-power-user) was my go-to companion for navigating lineage. AI agents have no such companion. I watched them `grep` through dbt projects to find model dependencies. It works, but they end up calling `grep` repeatedly and relying on fragile string matching to piece together `ref()` and `source()` relationships.
|
|
@@ -16,7 +18,7 @@ dlin is designed to fill that gap: a CLI tool that lets AI agents understand a d
|
|
|
16
18
|
|
|
17
19
|
To replace `grep`, speed and size matter. dlin is a small, self-contained binary with no runtime dependencies. It parses SQL directly, evaluates common Jinja patterns without Python, parallelizes file I/O, and caches aggressively.
|
|
18
20
|
|
|
19
|
-
The key idea behind dlin is that finding the right models fast is what matters most.
|
|
21
|
+
The key idea behind dlin is that finding the right models fast is what matters most. The hard part for agents is knowing which models to look at in the first place. dlin focuses on making model-level lineage as fast as possible, and also offers experimental column-level lineage for deeper analysis.
|
|
20
22
|
|
|
21
23
|
## Install
|
|
22
24
|
|
|
@@ -92,11 +94,12 @@ The key line is **"Do NOT grep/cat/find through SQL files"** — without it, age
|
|
|
92
94
|
|
|
93
95
|
## Features
|
|
94
96
|
|
|
95
|
-
- **No dependencies**: single binary, no Python, no `manifest.json`
|
|
97
|
+
- **No dependencies for model lineage**: single binary, no Python, no `manifest.json`
|
|
96
98
|
- **Recursive upstream / downstream**: `-u N` / `-d N` to control traversal depth
|
|
97
99
|
- **Impact analysis with severity**: `dlin impact` scores downstream nodes and flags exposure reachability
|
|
98
100
|
- **Composable**: stdin accepts model names or file paths; pipe with `jq`, `dlin list`, `git diff`, etc.
|
|
99
101
|
- **Agent-friendly**: `--error-format json` emits structured `{"level","what","why","hint"}` on stderr; `--help` is designed for tool discovery
|
|
102
|
+
- **Column-level lineage** (experimental): traces columns across models with transformation classification; requires `dbt compile` and `manifest.json`
|
|
100
103
|
|
|
101
104
|
## Mermaid diagrams
|
|
102
105
|
|
|
@@ -230,6 +233,135 @@ dlin graph -o dot | dot -Tsvg > out.svg # Graphviz rendering
|
|
|
230
233
|
|
|
231
234
|
Output formats: ASCII (default), JSON, Mermaid, Graphviz DOT, Plain, SVG, HTML.
|
|
232
235
|
|
|
236
|
+
## Column-level lineage (Experimental)
|
|
237
|
+
|
|
238
|
+
> [!WARNING]
|
|
239
|
+
> Column-level lineage depends on [polyglot-sql](https://github.com/tobilg/polyglot) for SQL parsing. Coverage varies by SQL complexity and dialect. Patterns such as `SELECT *` chains, STRUCT expansion, and some database-specific syntax may not resolve correctly.
|
|
240
|
+
|
|
241
|
+
`dlin column upstream` and `dlin column downstream` trace columns across models. Unlike model-level commands, they always require a compiled `manifest.json`. Run `dbt compile` first.
|
|
242
|
+
|
|
243
|
+
```sh
|
|
244
|
+
# Where does each output column of orders come from?
|
|
245
|
+
dlin column upstream orders
|
|
246
|
+
|
|
247
|
+
# What downstream columns are affected if stg_orders.order_id changes?
|
|
248
|
+
dlin column downstream stg_orders --column order_id
|
|
249
|
+
|
|
250
|
+
# Mermaid flowchart
|
|
251
|
+
dlin column upstream customers -o mermaid
|
|
252
|
+
dlin column downstream stg_orders --column order_id -o mermaid
|
|
253
|
+
|
|
254
|
+
# Specific columns only
|
|
255
|
+
dlin column upstream orders --column order_id --column status
|
|
256
|
+
|
|
257
|
+
# Verify manifest freshness before querying
|
|
258
|
+
dlin check-manifest && dlin column upstream orders
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
### Column upstream
|
|
262
|
+
|
|
263
|
+
Traces each output column of a model back to its raw source columns, following references across intermediate models.
|
|
264
|
+
|
|
265
|
+
```sh
|
|
266
|
+
dlin column upstream customers -o mermaid
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
```mermaid
|
|
270
|
+
flowchart LR
|
|
271
|
+
subgraph sg0["customers"]
|
|
272
|
+
n0_0["customer_id"]
|
|
273
|
+
n0_1["email"]
|
|
274
|
+
n0_2["first_name"]
|
|
275
|
+
n0_3["last_name"]
|
|
276
|
+
n0_4["lifetime_value"]
|
|
277
|
+
n0_5["order_count"]
|
|
278
|
+
end
|
|
279
|
+
subgraph sg1["orders"]
|
|
280
|
+
n1_0["order_id"]
|
|
281
|
+
n1_1["total_amount"]
|
|
282
|
+
end
|
|
283
|
+
subgraph sg2["raw.customers"]
|
|
284
|
+
n2_0["email"]
|
|
285
|
+
n2_1["first_name"]
|
|
286
|
+
n2_2["id"]
|
|
287
|
+
n2_3["last_name"]
|
|
288
|
+
end
|
|
289
|
+
subgraph sg3["raw.orders"]
|
|
290
|
+
n3_0["id"]
|
|
291
|
+
end
|
|
292
|
+
subgraph sg4["raw.payments"]
|
|
293
|
+
n4_0["amount"]
|
|
294
|
+
end
|
|
295
|
+
subgraph sg5["stg_customers"]
|
|
296
|
+
n5_0["customer_id"]
|
|
297
|
+
n5_1["email"]
|
|
298
|
+
n5_2["first_name"]
|
|
299
|
+
n5_3["last_name"]
|
|
300
|
+
end
|
|
301
|
+
subgraph sg6["stg_orders"]
|
|
302
|
+
n6_0["order_id"]
|
|
303
|
+
end
|
|
304
|
+
subgraph sg7["stg_payments"]
|
|
305
|
+
n7_0["amount"]
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
n2_2 -->|"direct"|n5_0
|
|
309
|
+
n5_0 -->|"direct"|n0_0
|
|
310
|
+
n2_0 -->|"direct"|n5_1
|
|
311
|
+
n5_1 -->|"direct"|n0_1
|
|
312
|
+
n2_1 -->|"direct"|n5_2
|
|
313
|
+
n5_2 -->|"direct"|n0_2
|
|
314
|
+
n2_3 -->|"direct"|n5_3
|
|
315
|
+
n5_3 -->|"direct"|n0_3
|
|
316
|
+
n4_0 -->|"direct"|n7_0
|
|
317
|
+
n7_0 -->|"direct"|n1_1
|
|
318
|
+
n1_1 -->|"aggregation"|n0_4
|
|
319
|
+
n3_0 -->|"direct"|n6_0
|
|
320
|
+
n6_0 -->|"direct"|n1_0
|
|
321
|
+
n1_0 -->|"aggregation"|n0_5
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
`customer_id`, `email`, etc. pass through `stg_customers` unchanged from `raw.customers` (all `direct`). `lifetime_value` and `order_count` are aggregated at the `customers` model — the final edge to `customers` is labeled `aggregation`, while all upstream hops carry their actual transformation type (here `direct`, since staging and mart models pass columns through unchanged).
|
|
325
|
+
|
|
326
|
+
Transformation types shown on edges: `direct`, `aggregation`, `expression`, `cast`, `conditional`, `unknown`.
|
|
327
|
+
|
|
328
|
+
### Column downstream
|
|
329
|
+
|
|
330
|
+
Traces a column forward to all downstream models and columns that depend on it.
|
|
331
|
+
|
|
332
|
+
```sh
|
|
333
|
+
dlin column downstream stg_orders --column order_id -o mermaid
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
```mermaid
|
|
337
|
+
flowchart LR
|
|
338
|
+
subgraph sg0["customers"]
|
|
339
|
+
n0_0["order_count"]
|
|
340
|
+
end
|
|
341
|
+
subgraph sg1["order_enriched"]
|
|
342
|
+
n1_0["order_id"]
|
|
343
|
+
end
|
|
344
|
+
subgraph sg2["orders"]
|
|
345
|
+
n2_0["order_id"]
|
|
346
|
+
end
|
|
347
|
+
subgraph sg3["stg_orders"]
|
|
348
|
+
n3_0["order_id"]
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
n2_0 -->|"aggregation"|n0_0
|
|
352
|
+
n3_0 -->|"direct"|n1_0
|
|
353
|
+
n3_0 -->|"direct"|n2_0
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
`stg_orders.order_id` flows directly into `orders.order_id` and `order_enriched.order_id`. `orders.order_id` is then aggregated into `customers.order_count`. Each edge shows its per-hop transformation type.
|
|
357
|
+
|
|
358
|
+
### Known limitations
|
|
359
|
+
|
|
360
|
+
- **Requires `dbt compile`**: no SQL parse mode fallback; manifest with compiled SQL is always needed
|
|
361
|
+
- **SELECT \* chains**: resolution depends on YAML column definitions in upstream models; unresolved columns are reported in `errors[]`
|
|
362
|
+
- **Dialect-specific syntax**: pass `--dialect bigquery` (or other dialect) for better coverage
|
|
363
|
+
- **Performance**: first run parses all upstream models; results are cached in `.dlin_cache/` for subsequent queries
|
|
364
|
+
|
|
233
365
|
## Key subcommands
|
|
234
366
|
|
|
235
367
|
### `list`
|
|
@@ -268,7 +400,7 @@ dlin graph --node-type model,source # filter by node type
|
|
|
268
400
|
|
|
269
401
|
## Data sources
|
|
270
402
|
|
|
271
|
-
dlin aims to work without `dbt compile
|
|
403
|
+
dlin aims to work without `dbt compile` (except for column-level lineage, which always requires `manifest.json`). By default it parses SQL files directly, but it can also leverage a pre-compiled `manifest.json` for additional accuracy when one is available.
|
|
272
404
|
|
|
273
405
|
**SQL parsing (default)**: extracts `ref()` and `source()` from SQL via regex + Jinja template evaluation. No Python or dbt needed. Generic tests (`not_null`, `unique`, `relationships`, etc.) are inferred from YAML schema declarations.
|
|
274
406
|
|
|
@@ -18,6 +18,7 @@ path = "src/main.rs"
|
|
|
18
18
|
dlin-core = { workspace = true, features = ["clap", "column-lineage"] }
|
|
19
19
|
clap = { workspace = true }
|
|
20
20
|
anyhow = { workspace = true }
|
|
21
|
+
regex = { workspace = true }
|
|
21
22
|
serde_json = { workspace = true }
|
|
22
23
|
path-slash = { workspace = true }
|
|
23
24
|
polyglot-sql = { workspace = true }
|
|
@@ -4,10 +4,12 @@
|
|
|
4
4
|
[](https://pypi.org/project/dlin-cli/)
|
|
5
5
|
[](https://deepwiki.com/eitsupi/dlin)
|
|
6
6
|
|
|
7
|
-
dbt lineage
|
|
7
|
+
dbt model lineage CLI that parses SQL files directly. No `dbt compile`, no Python, no `manifest.json` (for model-level lineage).
|
|
8
8
|
|
|
9
9
|
Builds a dependency graph from `ref()` and `source()` calls in SQL. Designed for AI agents and CI pipelines.
|
|
10
10
|
|
|
11
|
+
Experimental column-level lineage (`dlin column upstream` / `dlin column downstream`) is also available. It requires `dbt compile` and `manifest.json`.
|
|
12
|
+
|
|
11
13
|
## Motivation
|
|
12
14
|
|
|
13
15
|
When I edited dbt models in VS Code, [dbt Power User](https://marketplace.visualstudio.com/items?itemName=innoverio.vscode-dbt-power-user) was my go-to companion for navigating lineage. AI agents have no such companion. I watched them `grep` through dbt projects to find model dependencies. It works, but they end up calling `grep` repeatedly and relying on fragile string matching to piece together `ref()` and `source()` relationships.
|
|
@@ -16,7 +18,7 @@ dlin is designed to fill that gap: a CLI tool that lets AI agents understand a d
|
|
|
16
18
|
|
|
17
19
|
To replace `grep`, speed and size matter. dlin is a small, self-contained binary with no runtime dependencies. It parses SQL directly, evaluates common Jinja patterns without Python, parallelizes file I/O, and caches aggressively.
|
|
18
20
|
|
|
19
|
-
The key idea behind dlin is that finding the right models fast is what matters most.
|
|
21
|
+
The key idea behind dlin is that finding the right models fast is what matters most. The hard part for agents is knowing which models to look at in the first place. dlin focuses on making model-level lineage as fast as possible, and also offers experimental column-level lineage for deeper analysis.
|
|
20
22
|
|
|
21
23
|
## Install
|
|
22
24
|
|
|
@@ -92,11 +94,12 @@ The key line is **"Do NOT grep/cat/find through SQL files"** — without it, age
|
|
|
92
94
|
|
|
93
95
|
## Features
|
|
94
96
|
|
|
95
|
-
- **No dependencies**: single binary, no Python, no `manifest.json`
|
|
97
|
+
- **No dependencies for model lineage**: single binary, no Python, no `manifest.json`
|
|
96
98
|
- **Recursive upstream / downstream**: `-u N` / `-d N` to control traversal depth
|
|
97
99
|
- **Impact analysis with severity**: `dlin impact` scores downstream nodes and flags exposure reachability
|
|
98
100
|
- **Composable**: stdin accepts model names or file paths; pipe with `jq`, `dlin list`, `git diff`, etc.
|
|
99
101
|
- **Agent-friendly**: `--error-format json` emits structured `{"level","what","why","hint"}` on stderr; `--help` is designed for tool discovery
|
|
102
|
+
- **Column-level lineage** (experimental): traces columns across models with transformation classification; requires `dbt compile` and `manifest.json`
|
|
100
103
|
|
|
101
104
|
## Mermaid diagrams
|
|
102
105
|
|
|
@@ -230,6 +233,135 @@ dlin graph -o dot | dot -Tsvg > out.svg # Graphviz rendering
|
|
|
230
233
|
|
|
231
234
|
Output formats: ASCII (default), JSON, Mermaid, Graphviz DOT, Plain, SVG, HTML.
|
|
232
235
|
|
|
236
|
+
## Column-level lineage (Experimental)
|
|
237
|
+
|
|
238
|
+
> [!WARNING]
|
|
239
|
+
> Column-level lineage depends on [polyglot-sql](https://github.com/tobilg/polyglot) for SQL parsing. Coverage varies by SQL complexity and dialect. Patterns such as `SELECT *` chains, STRUCT expansion, and some database-specific syntax may not resolve correctly.
|
|
240
|
+
|
|
241
|
+
`dlin column upstream` and `dlin column downstream` trace columns across models. Unlike model-level commands, they always require a compiled `manifest.json`. Run `dbt compile` first.
|
|
242
|
+
|
|
243
|
+
```sh
|
|
244
|
+
# Where does each output column of orders come from?
|
|
245
|
+
dlin column upstream orders
|
|
246
|
+
|
|
247
|
+
# What downstream columns are affected if stg_orders.order_id changes?
|
|
248
|
+
dlin column downstream stg_orders --column order_id
|
|
249
|
+
|
|
250
|
+
# Mermaid flowchart
|
|
251
|
+
dlin column upstream customers -o mermaid
|
|
252
|
+
dlin column downstream stg_orders --column order_id -o mermaid
|
|
253
|
+
|
|
254
|
+
# Specific columns only
|
|
255
|
+
dlin column upstream orders --column order_id --column status
|
|
256
|
+
|
|
257
|
+
# Verify manifest freshness before querying
|
|
258
|
+
dlin check-manifest && dlin column upstream orders
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
### Column upstream
|
|
262
|
+
|
|
263
|
+
Traces each output column of a model back to its raw source columns, following references across intermediate models.
|
|
264
|
+
|
|
265
|
+
```sh
|
|
266
|
+
dlin column upstream customers -o mermaid
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
```mermaid
|
|
270
|
+
flowchart LR
|
|
271
|
+
subgraph sg0["customers"]
|
|
272
|
+
n0_0["customer_id"]
|
|
273
|
+
n0_1["email"]
|
|
274
|
+
n0_2["first_name"]
|
|
275
|
+
n0_3["last_name"]
|
|
276
|
+
n0_4["lifetime_value"]
|
|
277
|
+
n0_5["order_count"]
|
|
278
|
+
end
|
|
279
|
+
subgraph sg1["orders"]
|
|
280
|
+
n1_0["order_id"]
|
|
281
|
+
n1_1["total_amount"]
|
|
282
|
+
end
|
|
283
|
+
subgraph sg2["raw.customers"]
|
|
284
|
+
n2_0["email"]
|
|
285
|
+
n2_1["first_name"]
|
|
286
|
+
n2_2["id"]
|
|
287
|
+
n2_3["last_name"]
|
|
288
|
+
end
|
|
289
|
+
subgraph sg3["raw.orders"]
|
|
290
|
+
n3_0["id"]
|
|
291
|
+
end
|
|
292
|
+
subgraph sg4["raw.payments"]
|
|
293
|
+
n4_0["amount"]
|
|
294
|
+
end
|
|
295
|
+
subgraph sg5["stg_customers"]
|
|
296
|
+
n5_0["customer_id"]
|
|
297
|
+
n5_1["email"]
|
|
298
|
+
n5_2["first_name"]
|
|
299
|
+
n5_3["last_name"]
|
|
300
|
+
end
|
|
301
|
+
subgraph sg6["stg_orders"]
|
|
302
|
+
n6_0["order_id"]
|
|
303
|
+
end
|
|
304
|
+
subgraph sg7["stg_payments"]
|
|
305
|
+
n7_0["amount"]
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
n2_2 -->|"direct"|n5_0
|
|
309
|
+
n5_0 -->|"direct"|n0_0
|
|
310
|
+
n2_0 -->|"direct"|n5_1
|
|
311
|
+
n5_1 -->|"direct"|n0_1
|
|
312
|
+
n2_1 -->|"direct"|n5_2
|
|
313
|
+
n5_2 -->|"direct"|n0_2
|
|
314
|
+
n2_3 -->|"direct"|n5_3
|
|
315
|
+
n5_3 -->|"direct"|n0_3
|
|
316
|
+
n4_0 -->|"direct"|n7_0
|
|
317
|
+
n7_0 -->|"direct"|n1_1
|
|
318
|
+
n1_1 -->|"aggregation"|n0_4
|
|
319
|
+
n3_0 -->|"direct"|n6_0
|
|
320
|
+
n6_0 -->|"direct"|n1_0
|
|
321
|
+
n1_0 -->|"aggregation"|n0_5
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
`customer_id`, `email`, etc. pass through `stg_customers` unchanged from `raw.customers` (all `direct`). `lifetime_value` and `order_count` are aggregated at the `customers` model — the final edge to `customers` is labeled `aggregation`, while all upstream hops carry their actual transformation type (here `direct`, since staging and mart models pass columns through unchanged).
|
|
325
|
+
|
|
326
|
+
Transformation types shown on edges: `direct`, `aggregation`, `expression`, `cast`, `conditional`, `unknown`.
|
|
327
|
+
|
|
328
|
+
### Column downstream
|
|
329
|
+
|
|
330
|
+
Traces a column forward to all downstream models and columns that depend on it.
|
|
331
|
+
|
|
332
|
+
```sh
|
|
333
|
+
dlin column downstream stg_orders --column order_id -o mermaid
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
```mermaid
|
|
337
|
+
flowchart LR
|
|
338
|
+
subgraph sg0["customers"]
|
|
339
|
+
n0_0["order_count"]
|
|
340
|
+
end
|
|
341
|
+
subgraph sg1["order_enriched"]
|
|
342
|
+
n1_0["order_id"]
|
|
343
|
+
end
|
|
344
|
+
subgraph sg2["orders"]
|
|
345
|
+
n2_0["order_id"]
|
|
346
|
+
end
|
|
347
|
+
subgraph sg3["stg_orders"]
|
|
348
|
+
n3_0["order_id"]
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
n2_0 -->|"aggregation"|n0_0
|
|
352
|
+
n3_0 -->|"direct"|n1_0
|
|
353
|
+
n3_0 -->|"direct"|n2_0
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
`stg_orders.order_id` flows directly into `orders.order_id` and `order_enriched.order_id`. `orders.order_id` is then aggregated into `customers.order_count`. Each edge shows its per-hop transformation type.
|
|
357
|
+
|
|
358
|
+
### Known limitations
|
|
359
|
+
|
|
360
|
+
- **Requires `dbt compile`**: no SQL parse mode fallback; manifest with compiled SQL is always needed
|
|
361
|
+
- **SELECT \* chains**: resolution depends on YAML column definitions in upstream models; unresolved columns are reported in `errors[]`
|
|
362
|
+
- **Dialect-specific syntax**: pass `--dialect bigquery` (or other dialect) for better coverage
|
|
363
|
+
- **Performance**: first run parses all upstream models; results are cached in `.dlin_cache/` for subsequent queries
|
|
364
|
+
|
|
233
365
|
## Key subcommands
|
|
234
366
|
|
|
235
367
|
### `list`
|
|
@@ -268,7 +400,7 @@ dlin graph --node-type model,source # filter by node type
|
|
|
268
400
|
|
|
269
401
|
## Data sources
|
|
270
402
|
|
|
271
|
-
dlin aims to work without `dbt compile
|
|
403
|
+
dlin aims to work without `dbt compile` (except for column-level lineage, which always requires `manifest.json`). By default it parses SQL files directly, but it can also leverage a pre-compiled `manifest.json` for additional accuracy when one is available.
|
|
272
404
|
|
|
273
405
|
**SQL parsing (default)**: extracts `ref()` and `source()` from SQL via regex + Jinja template evaluation. No Python or dbt needed. Generic tests (`not_null`, `unique`, `relationships`, etc.) are inferred from YAML schema declarations.
|
|
274
406
|
|