datalex-cli 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. datalex_cli-0.1.1/LICENSE +21 -0
  2. datalex_cli-0.1.1/PKG-INFO +228 -0
  3. datalex_cli-0.1.1/README.md +176 -0
  4. datalex_cli-0.1.1/datalex_cli.egg-info/PKG-INFO +228 -0
  5. datalex_cli-0.1.1/datalex_cli.egg-info/SOURCES.txt +82 -0
  6. datalex_cli-0.1.1/datalex_cli.egg-info/dependency_links.txt +1 -0
  7. datalex_cli-0.1.1/datalex_cli.egg-info/entry_points.txt +2 -0
  8. datalex_cli-0.1.1/datalex_cli.egg-info/requires.txt +36 -0
  9. datalex_cli-0.1.1/datalex_cli.egg-info/top_level.txt +2 -0
  10. datalex_cli-0.1.1/packages/cli/src/datalex_cli/__init__.py +1 -0
  11. datalex_cli-0.1.1/packages/cli/src/datalex_cli/datalex_cli.py +658 -0
  12. datalex_cli-0.1.1/packages/cli/src/datalex_cli/main.py +2925 -0
  13. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/__init__.py +94 -0
  14. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/_schemas/datalex/common.schema.json +127 -0
  15. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/_schemas/datalex/domain.schema.json +24 -0
  16. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/_schemas/datalex/entity.schema.json +158 -0
  17. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/_schemas/datalex/model.schema.json +141 -0
  18. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/_schemas/datalex/policy.schema.json +70 -0
  19. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/_schemas/datalex/project.schema.json +82 -0
  20. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/_schemas/datalex/snippet.schema.json +24 -0
  21. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/_schemas/datalex/source.schema.json +104 -0
  22. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/_schemas/datalex/term.schema.json +30 -0
  23. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/canonical.py +166 -0
  24. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/completion.py +204 -0
  25. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/connectors/__init__.py +39 -0
  26. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/connectors/base.py +417 -0
  27. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/connectors/bigquery.py +229 -0
  28. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/connectors/databricks.py +262 -0
  29. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/connectors/mysql.py +266 -0
  30. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/connectors/postgres.py +309 -0
  31. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/connectors/redshift.py +298 -0
  32. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/connectors/snowflake.py +336 -0
  33. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/connectors/sqlserver.py +425 -0
  34. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/datalex/__init__.py +26 -0
  35. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/datalex/diff.py +188 -0
  36. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/datalex/errors.py +85 -0
  37. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/datalex/loader.py +512 -0
  38. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/datalex/migrate_layout.py +382 -0
  39. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/datalex/parse_cache.py +102 -0
  40. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/datalex/project.py +214 -0
  41. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/datalex/types.py +224 -0
  42. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/dbt/__init__.py +18 -0
  43. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/dbt/emit.py +344 -0
  44. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/dbt/manifest.py +329 -0
  45. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/dbt/profiles.py +185 -0
  46. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/dbt/sync.py +279 -0
  47. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/dbt/warehouse.py +215 -0
  48. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/dialects/__init__.py +15 -0
  49. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/dialects/_common.py +48 -0
  50. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/dialects/base.py +47 -0
  51. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/dialects/postgres.py +164 -0
  52. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/dialects/registry.py +36 -0
  53. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/dialects/snowflake.py +129 -0
  54. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/diffing.py +358 -0
  55. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/docs_generator.py +797 -0
  56. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/doctor.py +181 -0
  57. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/generators.py +478 -0
  58. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/importers.py +1176 -0
  59. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/issues.py +23 -0
  60. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/loader.py +21 -0
  61. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/migrate.py +316 -0
  62. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/modeling.py +679 -0
  63. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/packages.py +430 -0
  64. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/policy.py +1037 -0
  65. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/resolver.py +456 -0
  66. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/schema.py +54 -0
  67. datalex_cli-0.1.1/packages/core_engine/src/datalex_core/semantic.py +1561 -0
  68. datalex_cli-0.1.1/pyproject.toml +63 -0
  69. datalex_cli-0.1.1/setup.cfg +4 -0
  70. datalex_cli-0.1.1/tests/test_cli_dx.py +690 -0
  71. datalex_cli-0.1.1/tests/test_connectors.py +873 -0
  72. datalex_cli-0.1.1/tests/test_docs_generation.py +360 -0
  73. datalex_cli-0.1.1/tests/test_importers_v2.py +392 -0
  74. datalex_cli-0.1.1/tests/test_inference.py +286 -0
  75. datalex_cli-0.1.1/tests/test_integrations.py +114 -0
  76. datalex_cli-0.1.1/tests/test_modeling_v3.py +197 -0
  77. datalex_cli-0.1.1/tests/test_multi_model.py +540 -0
  78. datalex_cli-0.1.1/tests/test_mvp.py +102 -0
  79. datalex_cli-0.1.1/tests/test_performance.py +77 -0
  80. datalex_cli-0.1.1/tests/test_phase1_modeling_core.py +286 -0
  81. datalex_cli-0.1.1/tests/test_phase5_web_ui.py +729 -0
  82. datalex_cli-0.1.1/tests/test_policy_engine_v2.py +992 -0
  83. datalex_cli-0.1.1/tests/test_real_scenarios.py +81 -0
  84. datalex_cli-0.1.1/tests/test_schema_v2.py +808 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 duckcode.ai
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,228 @@
1
+ Metadata-Version: 2.4
2
+ Name: datalex-cli
3
+ Version: 0.1.1
4
+ Summary: Git-native data modeling for dbt users
5
+ Author-email: DuckCode AI Labs <hello@duckcode.ai>
6
+ License: MIT
7
+ Project-URL: Homepage, https://duckcode.ai
8
+ Project-URL: Repository, https://github.com/duckcode-ai/DataLex
9
+ Project-URL: Issues, https://github.com/duckcode-ai/DataLex/issues
10
+ Keywords: dbt,data-modeling,yaml,git-native,erd,data-warehouse
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Database
20
+ Classifier: Topic :: Software Development :: Code Generators
21
+ Requires-Python: >=3.9
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: PyYAML>=6.0
25
+ Requires-Dist: jsonschema>=4.0
26
+ Provides-Extra: duckdb
27
+ Requires-Dist: duckdb>=0.9; extra == "duckdb"
28
+ Provides-Extra: postgres
29
+ Requires-Dist: psycopg2-binary; extra == "postgres"
30
+ Provides-Extra: mysql
31
+ Requires-Dist: mysql-connector-python; extra == "mysql"
32
+ Provides-Extra: snowflake
33
+ Requires-Dist: snowflake-connector-python; extra == "snowflake"
34
+ Provides-Extra: bigquery
35
+ Requires-Dist: google-cloud-bigquery; extra == "bigquery"
36
+ Provides-Extra: databricks
37
+ Requires-Dist: databricks-sql-connector; extra == "databricks"
38
+ Provides-Extra: sqlserver
39
+ Requires-Dist: pyodbc; extra == "sqlserver"
40
+ Provides-Extra: redshift
41
+ Requires-Dist: redshift-connector; extra == "redshift"
42
+ Provides-Extra: all
43
+ Requires-Dist: duckdb>=0.9; extra == "all"
44
+ Requires-Dist: psycopg2-binary; extra == "all"
45
+ Requires-Dist: mysql-connector-python; extra == "all"
46
+ Requires-Dist: snowflake-connector-python; extra == "all"
47
+ Requires-Dist: google-cloud-bigquery; extra == "all"
48
+ Requires-Dist: databricks-sql-connector; extra == "all"
49
+ Requires-Dist: pyodbc; extra == "all"
50
+ Requires-Dist: redshift-connector; extra == "all"
51
+ Dynamic: license-file
52
+
53
+ <div align="center">
54
+ <a href="https://duckcode.ai/" target="_blank" rel="noopener noreferrer">
55
+ <img src="Assets/DataLex.png" alt="DataLex by DuckCode AI Labs" width="220" />
56
+ </a>
57
+
58
+ # DataLex
59
+
60
+ **Git-native data modeling for dbt users.**
61
+
62
+ Point us at your dbt project and warehouse — we produce versioned, reviewable YAML
63
+ with contracts, lineage, ERDs, and clean round-trip back to dbt.
64
+
65
+ <p align="center">
66
+ <a href="https://github.com/duckcode-ai/DataLex/blob/main/LICENSE">
67
+ <img src="https://img.shields.io/github/license/duckcode-ai/DataLex?style=for-the-badge&color=22c55e" alt="MIT License" />
68
+ </a>
69
+ <a href="https://discord.gg/Dnm6bUvk">
70
+ <img src="https://img.shields.io/badge/Discord-Join%20Community-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord Community" />
71
+ </a>
72
+ <a href="https://github.com/duckcode-ai/DataLex/stargazers">
73
+ <img src="https://img.shields.io/github/stars/duckcode-ai/DataLex?style=for-the-badge&color=f59e0b" alt="GitHub Stars" />
74
+ </a>
75
+ </p>
76
+ </div>
77
+
78
+ <p align="center">
79
+ <img src="Assets/Overview.png" alt="DataLex Visual Studio — file tree, YAML editor, and React Flow ERD on the same entity" width="100%" />
80
+ </p>
81
+
82
+ ## 60-second demo
83
+
84
+ ```bash
85
+ git clone https://github.com/duckcode-ai/DataLex.git
86
+ cd DataLex
87
+ pip install -e '.[duckdb]'
88
+
89
+ # 1. Build a local DuckDB warehouse (no external credentials)
90
+ python examples/jaffle_shop_demo/setup.py
91
+
92
+ # 2. Sync the dbt project into DataLex YAML
93
+ ./datalex datalex dbt sync examples/jaffle_shop_demo \
94
+ --out-root examples/jaffle_shop_demo/datalex-out
95
+
96
+ # 3. Emit dbt-parseable YAML back, with contracts enforced
97
+ ./datalex datalex dbt emit examples/jaffle_shop_demo/datalex-out \
98
+ --out-dir examples/jaffle_shop_demo/dbt-out
99
+ ```
100
+
101
+ Open `examples/jaffle_shop_demo/datalex-out/sources/jaffle_shop_raw.yaml` —
102
+ every column has its warehouse type, descriptions from the manifest, and a
103
+ `meta.datalex.dbt.unique_id` stamp so re-running the sync never clobbers
104
+ anything you've hand-authored.
105
+
106
+ ## What it does
107
+
108
+ DataLex treats your data models as code. On top of a stricter YAML
109
+ substrate (the **DataLex** layout — one file per entity, `kind:`-dispatched,
110
+ streaming-safe for 10K+ entities), it gives you:
111
+
112
+ - **`datalex datalex dbt sync <project>`** — reads `target/manifest.json` + your
113
+ `profiles.yml`, introspects live column types, and merges them into
114
+ DataLex YAML. Idempotent: user-authored `description:`, `tags:`,
115
+ `sensitivity:`, and `tests:` survive re-sync.
116
+ - **`datalex datalex dbt emit`** — writes `sources.yml` and `schema.yml` with
117
+ `contract.enforced: true` and `data_type:` on every column. `dbt parse`
118
+ succeeds out of the box.
119
+ - **`datalex datalex emit ddl --dialect ...`** — Postgres, Snowflake, BigQuery,
120
+ Databricks, MySQL, SQL Server, Redshift. Same source, all dialects.
121
+ - **`datalex datalex diff`** — semantic diff with explicit rename tracking
122
+ (`previous_name:`), breaking-change gate for CI.
123
+ - **Cross-repo package imports** — pin `acme/warehouse-core@1.4.0` in
124
+ `imports:`, lockfile + content hash drift detection, Git-or-path
125
+ resolution, on-disk parse cache for large projects.
126
+ - **Visual studio** — React Flow UI for editing entities, relationships,
127
+ and metadata; same YAML files as the CLI.
128
+
129
+ ## Supported warehouses
130
+
131
+ | Warehouse | `dbt sync` introspection | Forward DDL | Reverse engineering |
132
+ |---|:---:|:---:|:---:|
133
+ | DuckDB | ✓ | — | — |
134
+ | PostgreSQL | ✓ | ✓ | ✓ |
135
+ | Snowflake | (fallback) | ✓ | ✓ |
136
+ | BigQuery | (fallback) | ✓ | ✓ |
137
+ | Databricks | (fallback) | ✓ | ✓ |
138
+ | MySQL | (fallback) | ✓ | ✓ |
139
+ | SQL Server / Azure SQL | (fallback) | ✓ | ✓ |
140
+ | Redshift | (fallback) | ✓ | ✓ |
141
+
142
+ "Fallback" = uses the existing full-schema connector (slower than the
143
+ per-table path but already works today; a narrow introspection path ships
144
+ per-dialect over time).
145
+
146
+ ## Install
147
+
148
+ ```bash
149
+ git clone https://github.com/duckcode-ai/DataLex.git
150
+ cd DataLex
151
+
152
+ python3 -m venv .venv
153
+ source .venv/bin/activate
154
+ pip install -e . # puts `datalex` on PATH
155
+ pip install -e '.[duckdb]' # add warehouse drivers you need
156
+
157
+ # optional — only needed for the Visual Studio
158
+ npm --prefix packages/api-server install
159
+ npm --prefix packages/web-app install
160
+ ```
161
+
162
+ Available extras: `duckdb`, `postgres`, `mysql`, `snowflake`,
163
+ `bigquery`, `databricks`, `sqlserver`, `redshift`, or `all`.
164
+
165
+ Prereqs: Python 3.9+, Git. Node.js 18+ if you want the UI.
166
+
167
+ ## Project layout
168
+
169
+ ```text
170
+ DataLex/
171
+ packages/
172
+ core_engine/ # Python: loader, dialects, dbt integration, packages
173
+ src/datalex_core/
174
+ _schemas/datalex/ # JSON Schema per `kind:` — bundled with the package
175
+ cli/ # `datalex` entry point
176
+ api-server/ # Node.js API (UI backend)
177
+ web-app/ # React Flow studio
178
+ examples/
179
+ jaffle_shop_demo/ # zero-setup dbt-sync demo (DuckDB)
180
+ model-examples/ # sample projects and scenario walkthroughs
181
+ docs/ # architecture, specs, runbooks
182
+ tests/ # unittest suite (core engine + datalex)
183
+ ```
184
+
185
+ ## Visual Studio (optional)
186
+
187
+ If you want the UI on top of your DataLex project, run the two dev servers:
188
+
189
+ ```bash
190
+ # Terminal 1
191
+ npm --prefix packages/api-server run dev
192
+ # Terminal 2
193
+ npm --prefix packages/web-app run dev
194
+ ```
195
+
196
+ Then open `http://localhost:5173`. The UI reads and writes the same YAML
197
+ files the CLI does — no database, no hosted service.
198
+
199
+ ## CI / GitOps
200
+
201
+ DataLex is designed to live in your repo next to your dbt project.
202
+ A typical CI step:
203
+
204
+ ```bash
205
+ ./datalex datalex validate datalex/
206
+ ./datalex datalex diff datalex-main/ datalex/ --exit-on-breaking
207
+ ./datalex datalex dbt emit datalex/ --out-dir dbt/
208
+ dbt parse
209
+ ```
210
+
211
+ ## Documentation
212
+
213
+ - **[Tutorial: dbt sync in 5 minutes](docs/tutorial-dbt-sync.md)** — the
214
+ full jaffle_shop walkthrough with explanations.
215
+ - **[DataLex layout reference](docs/datalex-layout.md)** — what each
216
+ `kind:` file looks like and how the loader discovers them.
217
+ - **[CLI cheat sheet](docs/cli.md)** — every `datalex datalex …` subcommand on
218
+ one page.
219
+ - **[Architecture](docs/architecture.md)** — core engine modules and
220
+ end-to-end data flow.
221
+ - Pre-DataLex specs have moved to [docs/archive/](docs/archive/).
222
+
223
+ ## Community
224
+
225
+ - Discord: [![Join Discord](https://img.shields.io/badge/Discord-Join%20DuckCode%20AI-5865F2?logo=discord&logoColor=white)](https://discord.gg/Dnm6bUvk)
226
+ - Issues: [![GitHub Issues](https://img.shields.io/badge/Issues-Report%20or%20Request-0ea5e9)](https://github.com/duckcode-ai/DataLex/issues)
227
+ - Contributing: `CONTRIBUTING.md`
228
+ - License: [![MIT](https://img.shields.io/badge/License-MIT-22c55e?style=flat-square)](LICENSE)
@@ -0,0 +1,176 @@
1
+ <div align="center">
2
+ <a href="https://duckcode.ai/" target="_blank" rel="noopener noreferrer">
3
+ <img src="Assets/DataLex.png" alt="DataLex by DuckCode AI Labs" width="220" />
4
+ </a>
5
+
6
+ # DataLex
7
+
8
+ **Git-native data modeling for dbt users.**
9
+
10
+ Point us at your dbt project and warehouse — we produce versioned, reviewable YAML
11
+ with contracts, lineage, ERDs, and clean round-trip back to dbt.
12
+
13
+ <p align="center">
14
+ <a href="https://github.com/duckcode-ai/DataLex/blob/main/LICENSE">
15
+ <img src="https://img.shields.io/github/license/duckcode-ai/DataLex?style=for-the-badge&color=22c55e" alt="MIT License" />
16
+ </a>
17
+ <a href="https://discord.gg/Dnm6bUvk">
18
+ <img src="https://img.shields.io/badge/Discord-Join%20Community-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord Community" />
19
+ </a>
20
+ <a href="https://github.com/duckcode-ai/DataLex/stargazers">
21
+ <img src="https://img.shields.io/github/stars/duckcode-ai/DataLex?style=for-the-badge&color=f59e0b" alt="GitHub Stars" />
22
+ </a>
23
+ </p>
24
+ </div>
25
+
26
+ <p align="center">
27
+ <img src="Assets/Overview.png" alt="DataLex Visual Studio — file tree, YAML editor, and React Flow ERD on the same entity" width="100%" />
28
+ </p>
29
+
30
+ ## 60-second demo
31
+
32
+ ```bash
33
+ git clone https://github.com/duckcode-ai/DataLex.git
34
+ cd DataLex
35
+ pip install -e '.[duckdb]'
36
+
37
+ # 1. Build a local DuckDB warehouse (no external credentials)
38
+ python examples/jaffle_shop_demo/setup.py
39
+
40
+ # 2. Sync the dbt project into DataLex YAML
41
+ ./datalex datalex dbt sync examples/jaffle_shop_demo \
42
+ --out-root examples/jaffle_shop_demo/datalex-out
43
+
44
+ # 3. Emit dbt-parseable YAML back, with contracts enforced
45
+ ./datalex datalex dbt emit examples/jaffle_shop_demo/datalex-out \
46
+ --out-dir examples/jaffle_shop_demo/dbt-out
47
+ ```
48
+
49
+ Open `examples/jaffle_shop_demo/datalex-out/sources/jaffle_shop_raw.yaml` —
50
+ every column has its warehouse type, descriptions from the manifest, and a
51
+ `meta.datalex.dbt.unique_id` stamp so re-running the sync never clobbers
52
+ anything you've hand-authored.
53
+
54
+ ## What it does
55
+
56
+ DataLex treats your data models as code. On top of a stricter YAML
57
+ substrate (the **DataLex** layout — one file per entity, `kind:`-dispatched,
58
+ streaming-safe for 10K+ entities), it gives you:
59
+
60
+ - **`datalex datalex dbt sync <project>`** — reads `target/manifest.json` + your
61
+ `profiles.yml`, introspects live column types, and merges them into
62
+ DataLex YAML. Idempotent: user-authored `description:`, `tags:`,
63
+ `sensitivity:`, and `tests:` survive re-sync.
64
+ - **`datalex datalex dbt emit`** — writes `sources.yml` and `schema.yml` with
65
+ `contract.enforced: true` and `data_type:` on every column. `dbt parse`
66
+ succeeds out of the box.
67
+ - **`datalex datalex emit ddl --dialect ...`** — Postgres, Snowflake, BigQuery,
68
+ Databricks, MySQL, SQL Server, Redshift. Same source, all dialects.
69
+ - **`datalex datalex diff`** — semantic diff with explicit rename tracking
70
+ (`previous_name:`), breaking-change gate for CI.
71
+ - **Cross-repo package imports** — pin `acme/warehouse-core@1.4.0` in
72
+ `imports:`, lockfile + content hash drift detection, Git-or-path
73
+ resolution, on-disk parse cache for large projects.
74
+ - **Visual studio** — React Flow UI for editing entities, relationships,
75
+ and metadata; same YAML files as the CLI.
76
+
77
+ ## Supported warehouses
78
+
79
+ | Warehouse | `dbt sync` introspection | Forward DDL | Reverse engineering |
80
+ |---|:---:|:---:|:---:|
81
+ | DuckDB | ✓ | — | — |
82
+ | PostgreSQL | ✓ | ✓ | ✓ |
83
+ | Snowflake | (fallback) | ✓ | ✓ |
84
+ | BigQuery | (fallback) | ✓ | ✓ |
85
+ | Databricks | (fallback) | ✓ | ✓ |
86
+ | MySQL | (fallback) | ✓ | ✓ |
87
+ | SQL Server / Azure SQL | (fallback) | ✓ | ✓ |
88
+ | Redshift | (fallback) | ✓ | ✓ |
89
+
90
+ "Fallback" = uses the existing full-schema connector (slower than the
91
+ per-table path but already works today; a narrow introspection path ships
92
+ per-dialect over time).
93
+
94
+ ## Install
95
+
96
+ ```bash
97
+ git clone https://github.com/duckcode-ai/DataLex.git
98
+ cd DataLex
99
+
100
+ python3 -m venv .venv
101
+ source .venv/bin/activate
102
+ pip install -e . # puts `datalex` on PATH
103
+ pip install -e '.[duckdb]' # add warehouse drivers you need
104
+
105
+ # optional — only needed for the Visual Studio
106
+ npm --prefix packages/api-server install
107
+ npm --prefix packages/web-app install
108
+ ```
109
+
110
+ Available extras: `duckdb`, `postgres`, `mysql`, `snowflake`,
111
+ `bigquery`, `databricks`, `sqlserver`, `redshift`, or `all`.
112
+
113
+ Prereqs: Python 3.9+, Git. Node.js 18+ if you want the UI.
114
+
115
+ ## Project layout
116
+
117
+ ```text
118
+ DataLex/
119
+ packages/
120
+ core_engine/ # Python: loader, dialects, dbt integration, packages
121
+ src/datalex_core/
122
+ _schemas/datalex/ # JSON Schema per `kind:` — bundled with the package
123
+ cli/ # `datalex` entry point
124
+ api-server/ # Node.js API (UI backend)
125
+ web-app/ # React Flow studio
126
+ examples/
127
+ jaffle_shop_demo/ # zero-setup dbt-sync demo (DuckDB)
128
+ model-examples/ # sample projects and scenario walkthroughs
129
+ docs/ # architecture, specs, runbooks
130
+ tests/ # unittest suite (core engine + datalex)
131
+ ```
132
+
133
+ ## Visual Studio (optional)
134
+
135
+ If you want the UI on top of your DataLex project, run the two dev servers:
136
+
137
+ ```bash
138
+ # Terminal 1
139
+ npm --prefix packages/api-server run dev
140
+ # Terminal 2
141
+ npm --prefix packages/web-app run dev
142
+ ```
143
+
144
+ Then open `http://localhost:5173`. The UI reads and writes the same YAML
145
+ files the CLI does — no database, no hosted service.
146
+
147
+ ## CI / GitOps
148
+
149
+ DataLex is designed to live in your repo next to your dbt project.
150
+ A typical CI step:
151
+
152
+ ```bash
153
+ ./datalex datalex validate datalex/
154
+ ./datalex datalex diff datalex-main/ datalex/ --exit-on-breaking
155
+ ./datalex datalex dbt emit datalex/ --out-dir dbt/
156
+ dbt parse
157
+ ```
158
+
159
+ ## Documentation
160
+
161
+ - **[Tutorial: dbt sync in 5 minutes](docs/tutorial-dbt-sync.md)** — the
162
+ full jaffle_shop walkthrough with explanations.
163
+ - **[DataLex layout reference](docs/datalex-layout.md)** — what each
164
+ `kind:` file looks like and how the loader discovers them.
165
+ - **[CLI cheat sheet](docs/cli.md)** — every `datalex datalex …` subcommand on
166
+ one page.
167
+ - **[Architecture](docs/architecture.md)** — core engine modules and
168
+ end-to-end data flow.
169
+ - Pre-DataLex specs have moved to [docs/archive/](docs/archive/).
170
+
171
+ ## Community
172
+
173
+ - Discord: [![Join Discord](https://img.shields.io/badge/Discord-Join%20DuckCode%20AI-5865F2?logo=discord&logoColor=white)](https://discord.gg/Dnm6bUvk)
174
+ - Issues: [![GitHub Issues](https://img.shields.io/badge/Issues-Report%20or%20Request-0ea5e9)](https://github.com/duckcode-ai/DataLex/issues)
175
+ - Contributing: `CONTRIBUTING.md`
176
+ - License: [![MIT](https://img.shields.io/badge/License-MIT-22c55e?style=flat-square)](LICENSE)
@@ -0,0 +1,228 @@
1
+ Metadata-Version: 2.4
2
+ Name: datalex-cli
3
+ Version: 0.1.1
4
+ Summary: Git-native data modeling for dbt users
5
+ Author-email: DuckCode AI Labs <hello@duckcode.ai>
6
+ License: MIT
7
+ Project-URL: Homepage, https://duckcode.ai
8
+ Project-URL: Repository, https://github.com/duckcode-ai/DataLex
9
+ Project-URL: Issues, https://github.com/duckcode-ai/DataLex/issues
10
+ Keywords: dbt,data-modeling,yaml,git-native,erd,data-warehouse
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Database
20
+ Classifier: Topic :: Software Development :: Code Generators
21
+ Requires-Python: >=3.9
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: PyYAML>=6.0
25
+ Requires-Dist: jsonschema>=4.0
26
+ Provides-Extra: duckdb
27
+ Requires-Dist: duckdb>=0.9; extra == "duckdb"
28
+ Provides-Extra: postgres
29
+ Requires-Dist: psycopg2-binary; extra == "postgres"
30
+ Provides-Extra: mysql
31
+ Requires-Dist: mysql-connector-python; extra == "mysql"
32
+ Provides-Extra: snowflake
33
+ Requires-Dist: snowflake-connector-python; extra == "snowflake"
34
+ Provides-Extra: bigquery
35
+ Requires-Dist: google-cloud-bigquery; extra == "bigquery"
36
+ Provides-Extra: databricks
37
+ Requires-Dist: databricks-sql-connector; extra == "databricks"
38
+ Provides-Extra: sqlserver
39
+ Requires-Dist: pyodbc; extra == "sqlserver"
40
+ Provides-Extra: redshift
41
+ Requires-Dist: redshift-connector; extra == "redshift"
42
+ Provides-Extra: all
43
+ Requires-Dist: duckdb>=0.9; extra == "all"
44
+ Requires-Dist: psycopg2-binary; extra == "all"
45
+ Requires-Dist: mysql-connector-python; extra == "all"
46
+ Requires-Dist: snowflake-connector-python; extra == "all"
47
+ Requires-Dist: google-cloud-bigquery; extra == "all"
48
+ Requires-Dist: databricks-sql-connector; extra == "all"
49
+ Requires-Dist: pyodbc; extra == "all"
50
+ Requires-Dist: redshift-connector; extra == "all"
51
+ Dynamic: license-file
52
+
53
+ <div align="center">
54
+ <a href="https://duckcode.ai/" target="_blank" rel="noopener noreferrer">
55
+ <img src="Assets/DataLex.png" alt="DataLex by DuckCode AI Labs" width="220" />
56
+ </a>
57
+
58
+ # DataLex
59
+
60
+ **Git-native data modeling for dbt users.**
61
+
62
+ Point us at your dbt project and warehouse — we produce versioned, reviewable YAML
63
+ with contracts, lineage, ERDs, and clean round-trip back to dbt.
64
+
65
+ <p align="center">
66
+ <a href="https://github.com/duckcode-ai/DataLex/blob/main/LICENSE">
67
+ <img src="https://img.shields.io/github/license/duckcode-ai/DataLex?style=for-the-badge&color=22c55e" alt="MIT License" />
68
+ </a>
69
+ <a href="https://discord.gg/Dnm6bUvk">
70
+ <img src="https://img.shields.io/badge/Discord-Join%20Community-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord Community" />
71
+ </a>
72
+ <a href="https://github.com/duckcode-ai/DataLex/stargazers">
73
+ <img src="https://img.shields.io/github/stars/duckcode-ai/DataLex?style=for-the-badge&color=f59e0b" alt="GitHub Stars" />
74
+ </a>
75
+ </p>
76
+ </div>
77
+
78
+ <p align="center">
79
+ <img src="Assets/Overview.png" alt="DataLex Visual Studio — file tree, YAML editor, and React Flow ERD on the same entity" width="100%" />
80
+ </p>
81
+
82
+ ## 60-second demo
83
+
84
+ ```bash
85
+ git clone https://github.com/duckcode-ai/DataLex.git
86
+ cd DataLex
87
+ pip install -e '.[duckdb]'
88
+
89
+ # 1. Build a local DuckDB warehouse (no external credentials)
90
+ python examples/jaffle_shop_demo/setup.py
91
+
92
+ # 2. Sync the dbt project into DataLex YAML
93
+ ./datalex datalex dbt sync examples/jaffle_shop_demo \
94
+ --out-root examples/jaffle_shop_demo/datalex-out
95
+
96
+ # 3. Emit dbt-parseable YAML back, with contracts enforced
97
+ ./datalex datalex dbt emit examples/jaffle_shop_demo/datalex-out \
98
+ --out-dir examples/jaffle_shop_demo/dbt-out
99
+ ```
100
+
101
+ Open `examples/jaffle_shop_demo/datalex-out/sources/jaffle_shop_raw.yaml` —
102
+ every column has its warehouse type, descriptions from the manifest, and a
103
+ `meta.datalex.dbt.unique_id` stamp so re-running the sync never clobbers
104
+ anything you've hand-authored.
105
+
106
+ ## What it does
107
+
108
+ DataLex treats your data models as code. On top of a stricter YAML
109
+ substrate (the **DataLex** layout — one file per entity, `kind:`-dispatched,
110
+ streaming-safe for 10K+ entities), it gives you:
111
+
112
+ - **`datalex datalex dbt sync <project>`** — reads `target/manifest.json` + your
113
+ `profiles.yml`, introspects live column types, and merges them into
114
+ DataLex YAML. Idempotent: user-authored `description:`, `tags:`,
115
+ `sensitivity:`, and `tests:` survive re-sync.
116
+ - **`datalex datalex dbt emit`** — writes `sources.yml` and `schema.yml` with
117
+ `contract.enforced: true` and `data_type:` on every column. `dbt parse`
118
+ succeeds out of the box.
119
+ - **`datalex datalex emit ddl --dialect ...`** — Postgres, Snowflake, BigQuery,
120
+ Databricks, MySQL, SQL Server, Redshift. Same source, all dialects.
121
+ - **`datalex datalex diff`** — semantic diff with explicit rename tracking
122
+ (`previous_name:`), breaking-change gate for CI.
123
+ - **Cross-repo package imports** — pin `acme/warehouse-core@1.4.0` in
124
+ `imports:`, lockfile + content hash drift detection, Git-or-path
125
+ resolution, on-disk parse cache for large projects.
126
+ - **Visual studio** — React Flow UI for editing entities, relationships,
127
+ and metadata; same YAML files as the CLI.
128
+
129
+ ## Supported warehouses
130
+
131
+ | Warehouse | `dbt sync` introspection | Forward DDL | Reverse engineering |
132
+ |---|:---:|:---:|:---:|
133
+ | DuckDB | ✓ | — | — |
134
+ | PostgreSQL | ✓ | ✓ | ✓ |
135
+ | Snowflake | (fallback) | ✓ | ✓ |
136
+ | BigQuery | (fallback) | ✓ | ✓ |
137
+ | Databricks | (fallback) | ✓ | ✓ |
138
+ | MySQL | (fallback) | ✓ | ✓ |
139
+ | SQL Server / Azure SQL | (fallback) | ✓ | ✓ |
140
+ | Redshift | (fallback) | ✓ | ✓ |
141
+
142
+ "Fallback" = uses the existing full-schema connector (slower than the
143
+ per-table path but already works today; a narrow introspection path ships
144
+ per-dialect over time).
145
+
146
+ ## Install
147
+
148
+ ```bash
149
+ git clone https://github.com/duckcode-ai/DataLex.git
150
+ cd DataLex
151
+
152
+ python3 -m venv .venv
153
+ source .venv/bin/activate
154
+ pip install -e . # puts `datalex` on PATH
155
+ pip install -e '.[duckdb]' # add warehouse drivers you need
156
+
157
+ # optional — only needed for the Visual Studio
158
+ npm --prefix packages/api-server install
159
+ npm --prefix packages/web-app install
160
+ ```
161
+
162
+ Available extras: `duckdb`, `postgres`, `mysql`, `snowflake`,
163
+ `bigquery`, `databricks`, `sqlserver`, `redshift`, or `all`.
164
+
165
+ Prereqs: Python 3.9+, Git. Node.js 18+ if you want the UI.
166
+
167
+ ## Project layout
168
+
169
+ ```text
170
+ DataLex/
171
+ packages/
172
+ core_engine/ # Python: loader, dialects, dbt integration, packages
173
+ src/datalex_core/
174
+ _schemas/datalex/ # JSON Schema per `kind:` — bundled with the package
175
+ cli/ # `datalex` entry point
176
+ api-server/ # Node.js API (UI backend)
177
+ web-app/ # React Flow studio
178
+ examples/
179
+ jaffle_shop_demo/ # zero-setup dbt-sync demo (DuckDB)
180
+ model-examples/ # sample projects and scenario walkthroughs
181
+ docs/ # architecture, specs, runbooks
182
+ tests/ # unittest suite (core engine + datalex)
183
+ ```
184
+
185
+ ## Visual Studio (optional)
186
+
187
+ If you want the UI on top of your DataLex project, run the two dev servers:
188
+
189
+ ```bash
190
+ # Terminal 1
191
+ npm --prefix packages/api-server run dev
192
+ # Terminal 2
193
+ npm --prefix packages/web-app run dev
194
+ ```
195
+
196
+ Then open `http://localhost:5173`. The UI reads and writes the same YAML
197
+ files the CLI does — no database, no hosted service.
198
+
199
+ ## CI / GitOps
200
+
201
+ DataLex is designed to live in your repo next to your dbt project.
202
+ A typical CI step:
203
+
204
+ ```bash
205
+ ./datalex datalex validate datalex/
206
+ ./datalex datalex diff datalex-main/ datalex/ --exit-on-breaking
207
+ ./datalex datalex dbt emit datalex/ --out-dir dbt/
208
+ dbt parse
209
+ ```
210
+
211
+ ## Documentation
212
+
213
+ - **[Tutorial: dbt sync in 5 minutes](docs/tutorial-dbt-sync.md)** — the
214
+ full jaffle_shop walkthrough with explanations.
215
+ - **[DataLex layout reference](docs/datalex-layout.md)** — what each
216
+ `kind:` file looks like and how the loader discovers them.
217
+ - **[CLI cheat sheet](docs/cli.md)** — every `datalex datalex …` subcommand on
218
+ one page.
219
+ - **[Architecture](docs/architecture.md)** — core engine modules and
220
+ end-to-end data flow.
221
+ - Pre-DataLex specs have moved to [docs/archive/](docs/archive/).
222
+
223
+ ## Community
224
+
225
+ - Discord: [![Join Discord](https://img.shields.io/badge/Discord-Join%20DuckCode%20AI-5865F2?logo=discord&logoColor=white)](https://discord.gg/Dnm6bUvk)
226
+ - Issues: [![GitHub Issues](https://img.shields.io/badge/Issues-Report%20or%20Request-0ea5e9)](https://github.com/duckcode-ai/DataLex/issues)
227
+ - Contributing: `CONTRIBUTING.md`
228
+ - License: [![MIT](https://img.shields.io/badge/License-MIT-22c55e?style=flat-square)](LICENSE)