databricks-schema 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint-and-test:
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - name: Install uv
17
+ uses: astral-sh/setup-uv@v5
18
+
19
+ - name: Install dependencies
20
+ run: uv sync --all-groups
21
+
22
+ - name: Format check
23
+ run: uv run ruff format --check databricks_schema/ tests/
24
+
25
+ - name: Lint
26
+ run: uv run ruff check databricks_schema/ tests/
27
+
28
+ - name: Tests
29
+ run: uv run pytest
@@ -0,0 +1,50 @@
1
+ name: Release
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ release:
9
+ runs-on: ubuntu-latest
10
+ environment: pypi
11
+ permissions:
12
+ contents: write # required to upload assets to the release
13
+ id-token: write # required for OIDC trusted publishing to PyPI
14
+
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Install uv
19
+ uses: astral-sh/setup-uv@v5
20
+
21
+ - name: Verify version matches tag
22
+ run: |
23
+ TAG="${{ github.ref_name }}"
24
+ TAG_VERSION="${TAG#v}"
25
+ TOML_VERSION=$(grep '^version = ' pyproject.toml | sed 's/version = "\(.*\)"/\1/')
26
+ echo "Tag version: $TAG_VERSION"
27
+ echo "pyproject.toml: $TOML_VERSION"
28
+ if [ "$TAG_VERSION" != "$TOML_VERSION" ]; then
29
+ echo "ERROR: version mismatch — update pyproject.toml before tagging"
30
+ exit 1
31
+ fi
32
+
33
+ - name: Install dependencies
34
+ run: uv sync --all-groups
35
+
36
+ - name: Lint and test
37
+ run: |
38
+ uv run ruff check databricks_schema/ tests/
39
+ uv run pytest
40
+
41
+ - name: Build wheel and sdist
42
+ run: uv build
43
+
44
+ - name: Upload artifacts to release
45
+ run: gh release upload "${{ github.ref_name }}" dist/* --clobber
46
+ env:
47
+ GH_TOKEN: ${{ github.token }}
48
+
49
+ - name: Publish to PyPI
50
+ run: uv publish --trusted-publishing always
@@ -0,0 +1,13 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+ # Tests
13
+ .test_output/
@@ -0,0 +1 @@
1
+ 3.13
@@ -0,0 +1,61 @@
1
+ # CLAUDE.md
2
+
3
+ ## Project
4
+
5
+ `databricks-schema` — CLI + Python library that extracts, diffs, and generates SQL for Databricks Unity Catalog schemas stored as YAML or JSON files (one file per schema). Pydantic v2 models serve as the intermediate representation for future bidirectional sync.
6
+
7
+ ## Key commands
8
+
9
+ ```bash
10
+ uv sync --all-groups # install deps (including dev)
11
+ uv run pytest # run tests
12
+ uv run ruff check databricks_schema/ tests/
13
+ uv run ruff format databricks_schema/ tests/
14
+ uv run databricks-schema --help
15
+ ```
16
+
17
+ ## Package layout
18
+
19
+ ```
20
+ databricks_schema/
21
+ models.py # Pydantic v2 models: Catalog, Schema, Table, Column, PrimaryKey, ForeignKey
22
+ extractor.py # CatalogExtractor — wraps databricks-sdk
23
+ yaml_io.py # schema/catalog to/from YAML and JSON; _strip_empty removes None + empty collections
24
+ diff.py # diff_schemas / diff_catalog_with_dir; FieldChange, ColumnDiff, TableDiff, SchemaDiff, CatalogDiff
25
+ sql_gen.py # schema_diff_to_sql — pure SQL generation from SchemaDiff; no SDK/IO
26
+ cli.py # argparse CLI: extract, diff, generate-sql, list-catalogs, list-schemas
27
+ __init__.py # public re-exports
28
+ tests/
29
+ test_models.py
30
+ test_extractor.py # all SDK calls mocked with MagicMock
31
+ test_yaml_io.py
32
+ test_diff.py # pure model comparison, no SDK calls
33
+ test_sql_gen.py # pure SQL generation tests, no SDK calls
34
+ ```
35
+
36
+ ## Conventions
37
+
38
+ - Package manager: `uv`; do not use `pip` directly
39
+ - Ruff: select E, W, F, I, UP; line-length 100; target py311
40
+ - Use `X | None` not `Optional[X]`; use `datetime.UTC` not `timezone.utc`
41
+ - `from __future__ import annotations` in every module
42
+ - Imports at the top of each module — never inside functions
43
+ - Use full package imports (`from databricks_schema.models import ...`), not relative imports (`from .models import ...`)
44
+ - Tags = Unity Catalog governance key/value tags — not `properties`
45
+ - FK refs store only `ref_schema` + `ref_table` (no catalog)
46
+ - Column order in YAML = SDK position (None → 9999)
47
+ - `_strip_empty`: removes `None` and empty `dict`/`list`; preserves `False`, `0`, empty strings
48
+ - `yaml_io.py` has parallel YAML and JSON functions (`schema_to_yaml`/`schema_to_json`, etc.)
49
+ - `diff_catalog_with_dir` accepts `fmt: Literal["yaml", "json"] = "yaml"` to select file format
50
+ - CLI: catalog is a required positional argument (not a flag)
51
+ - `extract` `--format`/`-f` selects output format (`yaml` default, `json` opt-in); dest is `fmt`
52
+ - `diff` auto-detects format from files present in the directory; exits 2 on mixed YAML+JSON
53
+ - `TableType` is re-exported from `databricks.sdk.service.catalog` — do not redefine it
54
+ - `--include-metadata` flag (on `extract`, `diff`, `generate-sql`) enables `owner` + `storage_location`; both are excluded by default
55
+ - `diff` command exits 0 (no changes) or 1 (differences found) — useful in CI
56
+ - Diff result types are dataclasses (not Pydantic); comparison functions are pure (no SDK calls)
57
+ - `generate-sql` auto-detects format from files; exits 2 on mixed YAML+JSON or empty directory
58
+ - `generate-sql` destructive statements commented out by default; `--allow-drop` emits real DROPs
59
+ - `sql_gen.py` is pure (no SDK, no I/O); diff direction: `FieldChange.old` = stored (target), `.new` = live
60
+ - FK refs in SQL use same catalog as the source table (`ref_schema` + `ref_table` from model)
61
+ - Unsupported field changes (`table_type`) emit `-- TODO: unsupported change: …` comments
@@ -0,0 +1,201 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
@@ -0,0 +1,293 @@
1
+ Metadata-Version: 2.4
2
+ Name: databricks-schema
3
+ Version: 0.5.0
4
+ Summary: Databricks Unity Catalog schema extractor
5
+ License-File: LICENSE
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: databricks-sdk>=0.64.0
8
+ Requires-Dist: pydantic>=2.0
9
+ Requires-Dist: pyyaml>=6.0
10
+ Description-Content-Type: text/markdown
11
+
12
+ # databricks-schema
13
+
14
+ A CLI tool and Python library that uses the Databricks SDK to extract and diff Unity Catalog schemas as YAML files. It can also generate Databricks Spark SQL to apply schema changes across catalogs.
15
+
16
+ ## Overview
17
+
18
+ Extract a catalog to YAML files, then diff those files against a catalog — the same one to detect drift or a different one to compare environments (e.g. prod vs test):
19
+
20
+ ```bash
21
+ # 1. Find the catalog you want to snapshot
22
+ databricks-schema list-catalogs
23
+
24
+ # 2. Extract its schemas to YAML files (one file per schema)
25
+ databricks-schema extract prod_catalog --output-dir ./schemas/
26
+
27
+ # 3. Diff those files against a catalog (same or different)
28
+ databricks-schema diff test_catalog ./schemas/
29
+
30
+ # 4. Generate SQL to bring that catalog in line with the YAML files
31
+ databricks-schema generate-sql test_catalog ./schemas/ --output-dir ./migrations/
32
+ ```
33
+
34
+ The YAML files act as a version-controllable snapshot of your schema. The `diff` command exits with code `1` when differences are found, making it suitable for CI pipelines.
35
+
36
+ ## Output Format
37
+
38
+ Each schema is written to `{output-dir}/{schema-name}.yaml`. Fields with no value (null comments, empty tag dicts, empty FK lists) are omitted. Use `--format json` to write `.json` files with the same structure.
39
+
40
+ ```yaml
41
+ name: main
42
+ comment: Main production schema
43
+ tags:
44
+ env: prod
45
+ tables:
46
+ - name: users
47
+ table_type: MANAGED
48
+ comment: User accounts
49
+ tags:
50
+ domain: identity
51
+ columns:
52
+ - name: id
53
+ data_type: bigint
54
+ nullable: false
55
+ comment: Primary key
56
+ - name: email
57
+ data_type: string
58
+ - name: org_id
59
+ data_type: bigint
60
+ primary_key:
61
+ name: pk_users
62
+ columns:
63
+ - id
64
+ foreign_keys:
65
+ - name: fk_org
66
+ columns:
67
+ - org_id
68
+ ref_schema: orgs
69
+ ref_table: organizations
70
+ ref_columns:
71
+ - id
72
+ ```
73
+
74
+ ## Installation
75
+
76
+ Requires Python 3.11+ and [uv](https://github.com/astral-sh/uv).
77
+
78
+ ```bash
79
+ git clone <repo>
80
+ cd databricks-schema
81
+ uv sync
82
+ ```
83
+
84
+ For development (includes pytest and ruff):
85
+
86
+ ```bash
87
+ uv sync --all-groups
88
+ ```
89
+
90
+ ## Authentication
91
+
92
+ The tool uses the [Databricks SDK](https://github.com/databricks/databricks-sdk-py) for auth. Configure it via environment variables:
93
+
94
+ ```bash
95
+ export DATABRICKS_HOST=https://<workspace>.cloud.databricks.com
96
+ export DATABRICKS_TOKEN=<your-personal-access-token>
97
+ ```
98
+
99
+ Or use a [Databricks CLI profile](https://docs.databricks.com/dev-tools/cli/profiles.html) (`~/.databrickscfg`) — the SDK will pick it up automatically.
100
+
101
+ You can also pass credentials directly as flags (see `--host` / `--token` below).
102
+
103
+ ## CLI Usage
104
+
105
+ ```
106
+ databricks-schema [OPTIONS] COMMAND [ARGS]...
107
+ ```
108
+
109
+ ### `extract`
110
+
111
+ Extract all schemas from a catalog to YAML files:
112
+
113
+ ```bash
114
+ databricks-schema extract <catalog> --output-dir ./schemas/
115
+ ```
116
+
117
+ Use `--format json` to write `.json` files instead of `.yaml`.
118
+
119
+ Extract specific schemas only:
120
+
121
+ ```bash
122
+ databricks-schema extract <catalog> --schema main --schema raw --output-dir ./schemas/
123
+ ```
124
+
125
+ Print a single schema to stdout (no `--output-dir`):
126
+
127
+ ```bash
128
+ databricks-schema extract <catalog> --schema main
129
+ ```
130
+
131
+ Skip tag lookups for faster extraction (tags will be absent from output):
132
+
133
+ ```bash
134
+ databricks-schema extract <catalog> --output-dir ./schemas/ --no-tags
135
+ ```
136
+
137
+ Include additional metadata (`owner`, `storage_location`) in the output:
138
+
139
+ ```bash
140
+ databricks-schema extract <catalog> --output-dir ./schemas/ --include-metadata
141
+ ```
142
+
143
+ Control the number of parallel workers (default: 4):
144
+
145
+ ```bash
146
+ databricks-schema extract <catalog> --output-dir ./schemas/ --workers 8
147
+ ```
148
+
149
+ ### `diff`
150
+
151
+ Compare the live catalog against previously extracted schema files (format auto-detected from the directory — YAML or JSON, not mixed):
152
+
153
+ ```bash
154
+ databricks-schema diff <catalog> ./schemas/
155
+ ```
156
+
157
+ Compare specific schemas only:
158
+
159
+ ```bash
160
+ databricks-schema diff <catalog> ./schemas/ --schema main --schema raw
161
+ ```
162
+
163
+ Skip tag lookups during comparison:
164
+
165
+ ```bash
166
+ databricks-schema diff <catalog> ./schemas/ --no-tags
167
+ ```
168
+
169
+ Include additional metadata (`owner`, `storage_location`) in the comparison:
170
+
171
+ ```bash
172
+ databricks-schema diff <catalog> ./schemas/ --include-metadata
173
+ ```
174
+
175
+ Exits with code `0` if no differences are found, `1` if there are — making it suitable for CI pipelines. Output example:
176
+
177
+ ```
178
+ ~ Schema: main [MODIFIED]
179
+ ~ Table: users [MODIFIED]
180
+ ~ Column: score [MODIFIED]
181
+ data_type: 'int' -> 'double'
182
+ + Column: phone [ADDED]
183
+ + Table: events [ADDED]
184
+ - Schema: legacy [REMOVED]
185
+ ```
186
+
187
+ Markers: `+` added in catalog, `-` removed from catalog, `~` modified.
188
+
189
+ ### `generate-sql`
190
+
191
+ Generate Databricks Spark SQL statements to bring the live catalog in line with local schema files (format auto-detected, YAML or JSON, not mixed). Statements are printed to stdout by default:
192
+
193
+ ```bash
194
+ databricks-schema generate-sql <catalog> ./schemas/
195
+ ```
196
+
197
+ Write one `.sql` file per schema to a directory instead:
198
+
199
+ ```bash
200
+ databricks-schema generate-sql <catalog> ./schemas/ --output-dir ./migrations/
201
+ ```
202
+
203
+ Destructive statements (`DROP SCHEMA`, `DROP TABLE`, `DROP COLUMN`) are emitted as SQL comments by default. Pass `--allow-drop` to emit them as executable statements:
204
+
205
+ ```bash
206
+ databricks-schema generate-sql <catalog> ./schemas/ --allow-drop
207
+ ```
208
+
209
+ Filter to specific schemas:
210
+
211
+ ```bash
212
+ databricks-schema generate-sql <catalog> ./schemas/ --schema main --schema raw
213
+ ```
214
+
215
+ Skip tag lookups for faster comparison:
216
+
217
+ ```bash
218
+ databricks-schema generate-sql <catalog> ./schemas/ --no-tags
219
+ ```
220
+
221
+ Include additional metadata (`owner`, `storage_location`) in the comparison:
222
+
223
+ ```bash
224
+ databricks-schema generate-sql <catalog> ./schemas/ --include-metadata
225
+ ```
226
+
227
+ ### `list-catalogs`
228
+
229
+ List all accessible catalogs:
230
+
231
+ ```bash
232
+ databricks-schema list-catalogs
233
+ ```
234
+
235
+ ### `list-schemas`
236
+
237
+ List schemas in a catalog:
238
+
239
+ ```bash
240
+ databricks-schema list-schemas <catalog>
241
+ ```
242
+
243
+ ## Python Library Usage
244
+
245
+ ```python
246
+ from pathlib import Path
247
+ from databricks_schema import CatalogExtractor, catalog_to_yaml, schema_from_yaml
248
+ from databricks_schema import diff_catalog_with_dir, diff_schemas, schema_diff_to_sql
249
+
250
+ # Extract using configured auth (max_workers controls parallel table extraction)
251
+ extractor = CatalogExtractor(max_workers=4)
252
+ catalog = extractor.extract_catalog("my_catalog", schema_filter=["main", "raw"])
253
+
254
+ # Skip tag lookups for faster extraction
255
+ catalog = extractor.extract_catalog("my_catalog", include_tags=False)
256
+
257
+ # Include additional metadata (owner, storage_location)
258
+ catalog = extractor.extract_catalog("my_catalog", include_metadata=True)
259
+
260
+ # Serialise to YAML
261
+ yaml_text = catalog_to_yaml(catalog)
262
+
263
+ # Deserialise from YAML
264
+ schema = schema_from_yaml(open("schemas/main.yaml").read())
265
+ print(schema.tables[0].columns)
266
+
267
+ # Compare live catalog against local YAML files
268
+ result = diff_catalog_with_dir(catalog, Path("./schemas/"))
269
+ if result.has_changes:
270
+ for schema_diff in result.schemas:
271
+ print(schema_diff.name, schema_diff.status)
272
+
273
+ # Compare two Schema objects directly
274
+ stored = schema_from_yaml(open("schemas/main.yaml").read())
275
+ diff = diff_schemas(live=catalog.schemas[0], stored=stored)
276
+
277
+ # Generate SQL to bring live in line with stored
278
+ sql = schema_diff_to_sql("my_catalog", diff, stored_schema=stored, allow_drop=False)
279
+ print(sql)
280
+ ```
281
+
282
+ ## Development
283
+
284
+ ```bash
285
+ # Run tests
286
+ uv run pytest
287
+
288
+ # Lint
289
+ uv run ruff check databricks_schema/ tests/
290
+
291
+ # Format
292
+ uv run ruff format databricks_schema/ tests/
293
+ ```