dbx-sync 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbx_sync-0.1.0/.github/CODEOWNERS +1 -0
- dbx_sync-0.1.0/.github/workflows/release.yml +38 -0
- dbx_sync-0.1.0/.github/workflows/test-release.yml +39 -0
- dbx_sync-0.1.0/.gitignore +22 -0
- dbx_sync-0.1.0/.python-version +1 -0
- dbx_sync-0.1.0/AGENTS.md +42 -0
- dbx_sync-0.1.0/CONTRIBUTING.md +72 -0
- dbx_sync-0.1.0/LICENSE +21 -0
- dbx_sync-0.1.0/PKG-INFO +118 -0
- dbx_sync-0.1.0/README.md +108 -0
- dbx_sync-0.1.0/pyproject.toml +57 -0
- dbx_sync-0.1.0/src/dbx_sync/__init__.py +5 -0
- dbx_sync-0.1.0/src/dbx_sync/__main__.py +4 -0
- dbx_sync-0.1.0/src/dbx_sync/cli.py +95 -0
- dbx_sync-0.1.0/src/dbx_sync/sync.py +725 -0
- dbx_sync-0.1.0/tests/test_cli.py +98 -0
- dbx_sync-0.1.0/tests/test_sync.py +1011 -0
- dbx_sync-0.1.0/uv.lock +344 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
* @gramhagen
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
name: Release to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
# Publish on any tag starting with a `v`, e.g., v1.2.3
|
|
7
|
+
- v*
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
pypi:
|
|
11
|
+
name: Publish to PyPI
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
environment:
|
|
14
|
+
name: pypi
|
|
15
|
+
permissions:
|
|
16
|
+
id-token: write
|
|
17
|
+
contents: read
|
|
18
|
+
steps:
|
|
19
|
+
- name: Checkout
|
|
20
|
+
uses: actions/checkout@v5
|
|
21
|
+
|
|
22
|
+
- name: Install uv
|
|
23
|
+
uses: astral-sh/setup-uv@v6
|
|
24
|
+
|
|
25
|
+
- name: Install Python 3.12
|
|
26
|
+
run: uv python install 3.12
|
|
27
|
+
|
|
28
|
+
- name: Install project and dev dependencies
|
|
29
|
+
run: uv sync --dev
|
|
30
|
+
|
|
31
|
+
- name: Run test suite
|
|
32
|
+
run: uv run pytest
|
|
33
|
+
|
|
34
|
+
- name: Build
|
|
35
|
+
run: uv build
|
|
36
|
+
|
|
37
|
+
- name: Publish
|
|
38
|
+
run: uv publish dist/*
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
# Publish on any tag starting with a `v`, e.g., v1.2.3
|
|
7
|
+
- v*
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test-pypi:
|
|
11
|
+
name: Publish to Test PyPI
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
# Environment and permissions trusted publishing.
|
|
14
|
+
environment:
|
|
15
|
+
name: test-pypi
|
|
16
|
+
permissions:
|
|
17
|
+
id-token: write
|
|
18
|
+
contents: read
|
|
19
|
+
steps:
|
|
20
|
+
- name: Checkout
|
|
21
|
+
uses: actions/checkout@v5
|
|
22
|
+
|
|
23
|
+
- name: Install uv
|
|
24
|
+
uses: astral-sh/setup-uv@v6
|
|
25
|
+
|
|
26
|
+
- name: Install Python 3.12
|
|
27
|
+
run: uv python install 3.12
|
|
28
|
+
|
|
29
|
+
- name: Install project and dev dependencies
|
|
30
|
+
run: uv sync --dev
|
|
31
|
+
|
|
32
|
+
- name: Run test suite
|
|
33
|
+
run: uv run pytest
|
|
34
|
+
|
|
35
|
+
- name: Build
|
|
36
|
+
run: uv build
|
|
37
|
+
|
|
38
|
+
- name: Publish
|
|
39
|
+
run: uv publish --index test-pypi dist/*
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Python bytecode and caches
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
|
|
5
|
+
# Virtual environments
|
|
6
|
+
.venv/
|
|
7
|
+
|
|
8
|
+
# Tool caches
|
|
9
|
+
.pytest_cache/
|
|
10
|
+
.ruff_cache/
|
|
11
|
+
.coverage
|
|
12
|
+
coverage.xml
|
|
13
|
+
htmlcov/
|
|
14
|
+
|
|
15
|
+
# Build artifacts
|
|
16
|
+
build/
|
|
17
|
+
dist/
|
|
18
|
+
*.egg-info/
|
|
19
|
+
|
|
20
|
+
# Editor settings
|
|
21
|
+
.DS_Store
|
|
22
|
+
.vscode/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
dbx_sync-0.1.0/AGENTS.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Agent Workflow Notes
|
|
2
|
+
|
|
3
|
+
This repository uses a single shared instruction file for coding agents.
|
|
4
|
+
|
|
5
|
+
## Default workflow
|
|
6
|
+
|
|
7
|
+
1. Sync dependencies with `uv sync --dev`.
|
|
8
|
+
2. Implement code inside `src/dbx_sync/` using typed functions and small modules.
|
|
9
|
+
3. Add or update tests in `tests/` for each user-visible change.
|
|
10
|
+
4. Run `uv run ruff format .`, `uv run ruff check .`, `uv run ty check`, and `uv run pytest` before handing work back.
|
|
11
|
+
5. Use `uv build` and `uv publish` for packaging and release operations.
|
|
12
|
+
|
|
13
|
+
## Guardrails
|
|
14
|
+
|
|
15
|
+
- Keep configuration centralized in `pyproject.toml` where practical.
|
|
16
|
+
- Prefer extending the package API instead of adding loose scripts at repository root.
|
|
17
|
+
- Evolve behavior incrementally with tests rather than broad rewrites.
|
|
18
|
+
- Keep agent-specific workflow guidance here instead of splitting it across multiple files.
|
|
19
|
+
|
|
20
|
+
## Python Conventions
|
|
21
|
+
|
|
22
|
+
- Write readable, maintainable Python with descriptive names and explicit type hints.
|
|
23
|
+
- Break down complex logic into smaller helper functions instead of growing large multi-purpose functions.
|
|
24
|
+
- Use docstrings for non-trivial public functions and classes, following PEP 257 and concise Google-style sections when helpful.
|
|
25
|
+
- Prefer clear exception handling and cover edge cases such as missing inputs, invalid data, and empty results.
|
|
26
|
+
- Keep comments focused on intent or non-obvious design decisions; avoid narrating obvious code.
|
|
27
|
+
- Use logging module formatting for log messages instead of f-strings in logging calls.
|
|
28
|
+
|
|
29
|
+
## Python Style
|
|
30
|
+
|
|
31
|
+
- Follow PEP 8 with 4-space indentation.
|
|
32
|
+
- Keep code consistent with the repository toolchain: Ruff formatting, Ty type checking, and the line-length configured in `pyproject.toml`.
|
|
33
|
+
- Use modern built-in generics and standard typing features appropriate for Python 3.10+.
|
|
34
|
+
- Place docstrings immediately after function, class, or module declarations.
|
|
35
|
+
- Prefer straightforward, idiomatic Python over clever or overly abstract patterns.
|
|
36
|
+
|
|
37
|
+
## Testing Expectations
|
|
38
|
+
|
|
39
|
+
- Add unit tests for critical paths and behavior changes.
|
|
40
|
+
- Include edge-case coverage for empty inputs, invalid state, and error handling when those paths matter.
|
|
41
|
+
- Keep tests readable and focused on behavior rather than implementation detail.
|
|
42
|
+
- When adapting logic from another codebase, translate the relevant tests into this repo's current API instead of copying obsolete cases unchanged.
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Contributing
|
|
2
|
+
|
|
3
|
+
## Local Setup
|
|
4
|
+
|
|
5
|
+
Local development targets Python 3.12.
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
uv sync --dev
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
The Databricks CLI is required for running the tool against a real workspace.
|
|
12
|
+
|
|
13
|
+
- Install or update the Databricks CLI: <https://learn.microsoft.com/en-us/azure/databricks/dev-tools/cli/install>
|
|
14
|
+
- Configure a Databricks CLI profile: <https://learn.microsoft.com/en-us/azure/databricks/dev-tools/cli/reference/configure-commands#create-a-configuration-profile>
|
|
15
|
+
|
|
16
|
+
## Development Workflow
|
|
17
|
+
|
|
18
|
+
- Implement changes inside `src/dbx_sync/`.
|
|
19
|
+
- Add or update tests in `tests/` for behavior changes.
|
|
20
|
+
- Keep changes typed, readable, and small enough to review comfortably.
|
|
21
|
+
- Use `uv run` for project commands so the local environment and lockfile stay authoritative.
|
|
22
|
+
|
|
23
|
+
## Validation
|
|
24
|
+
|
|
25
|
+
Run the full local validation suite before finishing non-trivial changes.
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
uv run ruff format .
|
|
29
|
+
uv run ruff check .
|
|
30
|
+
uv run ty check
|
|
31
|
+
uv run pytest
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Generate a line-by-line HTML coverage report when needed.
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
uv run pytest --cov=dbx_sync --cov-report term-missing --cov-report html
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
The HTML coverage report is written to `htmlcov/`.
|
|
41
|
+
|
|
42
|
+
## CLI Notes
|
|
43
|
+
|
|
44
|
+
- Required positional arguments: local directory, workspace path.
|
|
45
|
+
- Optional flags: `--profile`, `--poll-interval`, `--log-level`, `--dry-run`, `--watch`, `--force`.
|
|
46
|
+
- The current sync scope is a single folder level only; local discovery is not recursive.
|
|
47
|
+
- The current local tracking scope is Databricks notebook files with supported notebook extensions.
|
|
48
|
+
- Use `--force` to clear saved sync state and trigger a fresh comparison.
|
|
49
|
+
|
|
50
|
+
## Packaging And Release
|
|
51
|
+
|
|
52
|
+
Build and publish with uv.
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
uv build
|
|
56
|
+
uv publish
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
If you want to test the packaged CLI experience locally, install it as a uv tool:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
uv tool install .
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## License
|
|
66
|
+
|
|
67
|
+
This repository is released under the MIT license. See `LICENSE` for the full text.
|
|
68
|
+
|
|
69
|
+
## Agent Guidance
|
|
70
|
+
|
|
71
|
+
- Shared coding-agent instructions live in `AGENTS.md`.
|
|
72
|
+
- Keep repository-specific conventions there rather than creating overlapping instruction files.
|
dbx_sync-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 dbx-sync contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
dbx_sync-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dbx-sync
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Synchronize Databricks workspace content with a local directory.
|
|
5
|
+
Author: gramhagen
|
|
6
|
+
License: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
|
|
11
|
+
# dbx-sync
|
|
12
|
+
|
|
13
|
+
```text
|
|
14
|
+
__ __
|
|
15
|
+
,___/ // /____ __ _____ __ ______ _____
|
|
16
|
+
/ __ // __ \\ \ / /_____/ ___// / / / __ \/ ___/
|
|
17
|
+
/ /_/ // /_/ // X \____(__ )/ /_/ / / / / /__
|
|
18
|
+
/_____//_____//__/ \__\ /____/ \__, /_/ /_/\___/
|
|
19
|
+
/____/
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Are you tired of bouncing between the Databricks workspace UI and your local editor, copying changes by hand, and pretending that counts as a workflow? Well now there's `dbx-sync`.
|
|
23
|
+
|
|
24
|
+
`dbx-sync` keeps a single Databricks workspace folder and a single local directory in sync so you can work with your favorite tools and still stay aligned with what is running in Databricks.
|
|
25
|
+
|
|
26
|
+
Build locally, run in Databricks, tweak it there, then jump back to local coding. Skip the usual copy-paste ritual or one-way imports to weird folders.
|
|
27
|
+
|
|
28
|
+
Great for AI coding-agent workflows, including GitHub Copilot and Claude-based setups that work best against a real local folder.
|
|
29
|
+
|
|
30
|
+
Worried about losing files? `dbx-sync` does not delete files locally or remotely, but it can overwrite content if both sides changed while you were not syncing. Use version control locally and Databricks revision history remotely when you need rollback.
|
|
31
|
+
|
|
32
|
+
Current scope notes:
|
|
33
|
+
|
|
34
|
+
- Sync is limited to a single local folder and a single Databricks workspace folder.
|
|
35
|
+
- File and folder discovery is not recursive.
|
|
36
|
+
- Local tracking currently covers notebook files with Databricks notebook extensions: `.py`, `.sql`, `.scala`, `.r`, and `.ipynb`.
|
|
37
|
+
|
|
38
|
+
## Prerequisites
|
|
39
|
+
|
|
40
|
+
- Databricks CLI 0.205 or newer
|
|
41
|
+
- With a configured Databricks CLI profile
|
|
42
|
+
|
|
43
|
+
## Install
|
|
44
|
+
|
|
45
|
+
### Recommended: install as a uv tool
|
|
46
|
+
|
|
47
|
+
Install `dbx-sync` as a tool so you can run it directly from your shell:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
uv tool install dbx-sync
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Alternative: install with pip
|
|
54
|
+
|
|
55
|
+
If you prefer a standard virtual environment workflow, install the package with `pip`:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
python -m pip install dbx-sync
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Alternative: run from a local checkout
|
|
62
|
+
|
|
63
|
+
If you are developing on the project itself, install the local environment and run it with `uv run`:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
uv sync --dev
|
|
67
|
+
uv run dbx-sync ./local-project /Workspace/Users/me/project
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Usage
|
|
71
|
+
|
|
72
|
+
Sync a single workspace folder with a single local folder (one-time):
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
dbx-sync ./local-project /Workspace/Users/me/project
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Preview actions without applying them:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
dbx-sync ./local-project /Workspace/Users/me/project --dry-run
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Continuously watch and resync (default polling happens every second):
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
dbx-sync ./local-project /Workspace/Users/me/project --watch
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Override optional settings when needed:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
dbx-sync ./local-project /Workspace/Users/me/project \
|
|
94
|
+
--profile WORKSPACE \
|
|
95
|
+
--poll-interval 5 \
|
|
96
|
+
--log-level DEBUG \
|
|
97
|
+
--force
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Use `--force` to clear saved sync state before a fresh pass.
|
|
101
|
+
|
|
102
|
+
The local directory may start empty or not exist yet. On a non-dry-run sync, the tool creates what it needs under that directory when files or sync state are written.
|
|
103
|
+
|
|
104
|
+
## Alternatives
|
|
105
|
+
Yes, I recognize there are a variety of official ways to do something close to this, but none of them fit my desired workflow well. So here are some references for alternatives.
|
|
106
|
+
|
|
107
|
+
- Databricks CLI workspace commands (`import`, `import-dir`, `export`, `export-dir`, `sync`, and related commands): <https://learn.microsoft.com/en-us/azure/databricks/dev-tools/cli/commands/>
|
|
108
|
+
- Databricks extension for Visual Studio Code: <https://learn.microsoft.com/en-us/azure/databricks/dev-tools/vscode-ext/>
|
|
109
|
+
- Databricks Asset Bundles documentation: <https://learn.microsoft.com/en-us/azure/databricks/dev-tools/bundles/>
|
|
110
|
+
- Databricks Git folders: <https://learn.microsoft.com/en-us/azure/databricks/repos/>
|
|
111
|
+
|
|
112
|
+
## Development
|
|
113
|
+
|
|
114
|
+
See [CONTRIBUTING.md](/home/scgraham/repos/dbx-sync/CONTRIBUTING.md) for local development, testing, release, and repository workflow details.
|
|
115
|
+
|
|
116
|
+
## License
|
|
117
|
+
|
|
118
|
+
MIT. See [LICENSE](/home/scgraham/repos/dbx-sync/LICENSE).
|
dbx_sync-0.1.0/README.md
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# dbx-sync
|
|
2
|
+
|
|
3
|
+
```text
|
|
4
|
+
__ __
|
|
5
|
+
,___/ // /____ __ _____ __ ______ _____
|
|
6
|
+
/ __ // __ \\ \ / /_____/ ___// / / / __ \/ ___/
|
|
7
|
+
/ /_/ // /_/ // X \____(__ )/ /_/ / / / / /__
|
|
8
|
+
/_____//_____//__/ \__\ /____/ \__, /_/ /_/\___/
|
|
9
|
+
/____/
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
Are you tired of bouncing between the Databricks workspace UI and your local editor, copying changes by hand, and pretending that counts as a workflow? Well now there's `dbx-sync`.
|
|
13
|
+
|
|
14
|
+
`dbx-sync` keeps a single Databricks workspace folder and a single local directory in sync so you can work with your favorite tools and still stay aligned with what is running in Databricks.
|
|
15
|
+
|
|
16
|
+
Build locally, run in Databricks, tweak it there, then jump back to local coding. Skip the usual copy-paste ritual or one-way imports to weird folders.
|
|
17
|
+
|
|
18
|
+
Great for AI coding-agent workflows, including GitHub Copilot and Claude-based setups that work best against a real local folder.
|
|
19
|
+
|
|
20
|
+
Worried about losing files? `dbx-sync` does not delete files locally or remotely, but it can overwrite content if both sides changed while you were not syncing. Use version control locally and Databricks revision history remotely when you need rollback.
|
|
21
|
+
|
|
22
|
+
Current scope notes:
|
|
23
|
+
|
|
24
|
+
- Sync is limited to a single local folder and a single Databricks workspace folder.
|
|
25
|
+
- File and folder discovery is not recursive.
|
|
26
|
+
- Local tracking currently covers notebook files with Databricks notebook extensions: `.py`, `.sql`, `.scala`, `.r`, and `.ipynb`.
|
|
27
|
+
|
|
28
|
+
## Prerequisites
|
|
29
|
+
|
|
30
|
+
- Databricks CLI 0.205 or newer
|
|
31
|
+
- With a configured Databricks CLI profile
|
|
32
|
+
|
|
33
|
+
## Install
|
|
34
|
+
|
|
35
|
+
### Recommended: install as a uv tool
|
|
36
|
+
|
|
37
|
+
Install `dbx-sync` as a tool so you can run it directly from your shell:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
uv tool install dbx-sync
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Alternative: install with pip
|
|
44
|
+
|
|
45
|
+
If you prefer a standard virtual environment workflow, install the package with `pip`:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
python -m pip install dbx-sync
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Alternative: run from a local checkout
|
|
52
|
+
|
|
53
|
+
If you are developing on the project itself, install the local environment and run it with `uv run`:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
uv sync --dev
|
|
57
|
+
uv run dbx-sync ./local-project /Workspace/Users/me/project
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Usage
|
|
61
|
+
|
|
62
|
+
Sync a single workspace folder with a single local folder (one-time):
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
dbx-sync ./local-project /Workspace/Users/me/project
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Preview actions without applying them:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
dbx-sync ./local-project /Workspace/Users/me/project --dry-run
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Continuously watch and resync (default polling happens every second):
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
dbx-sync ./local-project /Workspace/Users/me/project --watch
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Override optional settings when needed:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
dbx-sync ./local-project /Workspace/Users/me/project \
|
|
84
|
+
--profile WORKSPACE \
|
|
85
|
+
--poll-interval 5 \
|
|
86
|
+
--log-level DEBUG \
|
|
87
|
+
--force
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Use `--force` to clear saved sync state before a fresh pass.
|
|
91
|
+
|
|
92
|
+
The local directory may start empty or not exist yet. On a non-dry-run sync, the tool creates what it needs under that directory when files or sync state are written.
|
|
93
|
+
|
|
94
|
+
## Alternatives
|
|
95
|
+
Yes, I recognize there are a variety of official ways to do something close to this, but none of them fit my desired workflow well. So here are some references for alternatives.
|
|
96
|
+
|
|
97
|
+
- Databricks CLI workspace commands (`import`, `import-dir`, `export`, `export-dir`, `sync`, and related commands): <https://learn.microsoft.com/en-us/azure/databricks/dev-tools/cli/commands/>
|
|
98
|
+
- Databricks extension for Visual Studio Code: <https://learn.microsoft.com/en-us/azure/databricks/dev-tools/vscode-ext/>
|
|
99
|
+
- Databricks Asset Bundles documentation: <https://learn.microsoft.com/en-us/azure/databricks/dev-tools/bundles/>
|
|
100
|
+
- Databricks Git folders: <https://learn.microsoft.com/en-us/azure/databricks/repos/>
|
|
101
|
+
|
|
102
|
+
## Development
|
|
103
|
+
|
|
104
|
+
See [CONTRIBUTING.md](/home/scgraham/repos/dbx-sync/CONTRIBUTING.md) for local development, testing, release, and repository workflow details.
|
|
105
|
+
|
|
106
|
+
## License
|
|
107
|
+
|
|
108
|
+
MIT. See [LICENSE](/home/scgraham/repos/dbx-sync/LICENSE).
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling>=1.27.0"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "dbx-sync"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Synchronize Databricks workspace content with a local directory."
|
|
9
|
+
authors = [{ name = "gramhagen" }]
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
requires-python = ">=3.10"
|
|
13
|
+
dependencies = []
|
|
14
|
+
|
|
15
|
+
[project.scripts]
|
|
16
|
+
dbx-sync = "dbx_sync.cli:main"
|
|
17
|
+
|
|
18
|
+
[dependency-groups]
|
|
19
|
+
dev = [
|
|
20
|
+
"pytest>=8.4.0",
|
|
21
|
+
"pytest-cov>=6.0.0",
|
|
22
|
+
"ruff>=0.11.0",
|
|
23
|
+
"ty>=0.0.23",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[tool.hatch.build.targets.wheel]
|
|
27
|
+
packages = ["src/dbx_sync"]
|
|
28
|
+
|
|
29
|
+
[tool.pytest.ini_options]
|
|
30
|
+
addopts = "-ra --strict-markers --strict-config"
|
|
31
|
+
testpaths = ["tests"]
|
|
32
|
+
|
|
33
|
+
[tool.coverage.run]
|
|
34
|
+
source = ["dbx_sync"]
|
|
35
|
+
branch = true
|
|
36
|
+
|
|
37
|
+
[tool.coverage.report]
|
|
38
|
+
show_missing = true
|
|
39
|
+
skip_covered = true
|
|
40
|
+
|
|
41
|
+
[tool.ruff]
|
|
42
|
+
target-version = "py310"
|
|
43
|
+
line-length = 100
|
|
44
|
+
|
|
45
|
+
[tool.ruff.lint]
|
|
46
|
+
select = ["B", "E", "F", "I", "SIM", "UP"]
|
|
47
|
+
|
|
48
|
+
[tool.ruff.format]
|
|
49
|
+
quote-style = "double"
|
|
50
|
+
|
|
51
|
+
[tool.uv]
|
|
52
|
+
package = true
|
|
53
|
+
|
|
54
|
+
[[tool.uv.index]]
|
|
55
|
+
name = "test-pypi"
|
|
56
|
+
url = "https://test.pypi.org/legacy/"
|
|
57
|
+
publish-url = "https://test.pypi.org/legacy/"
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from dbx_sync.sync import run_sync
|
|
8
|
+
|
|
9
|
+
DEFAULT_POLL_INTERVAL_SECONDS = 1
|
|
10
|
+
LOG_LEVELS = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def positive_int(value: str) -> int:
|
|
14
|
+
"""Parse a strictly positive integer argument.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
value: Raw command-line argument text.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
int: Parsed positive integer value.
|
|
21
|
+
|
|
22
|
+
Raises:
|
|
23
|
+
argparse.ArgumentTypeError: If the value is not a positive integer.
|
|
24
|
+
"""
|
|
25
|
+
parsed = int(value)
|
|
26
|
+
if parsed < 1:
|
|
27
|
+
raise argparse.ArgumentTypeError("must be a positive integer")
|
|
28
|
+
return parsed
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
32
|
+
"""Build the command-line parser for the sync tool."""
|
|
33
|
+
parser = argparse.ArgumentParser(
|
|
34
|
+
prog="dbx-sync",
|
|
35
|
+
description="Synchronize Databricks workspace files to a local directory.",
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument("local_dir", help="Local directory to sync")
|
|
38
|
+
parser.add_argument("workspace", help="Databricks workspace folder to sync")
|
|
39
|
+
parser.add_argument("--profile", default="DEFAULT", help="Databricks CLI profile name")
|
|
40
|
+
parser.add_argument(
|
|
41
|
+
"-p",
|
|
42
|
+
"--poll-interval",
|
|
43
|
+
type=positive_int,
|
|
44
|
+
default=DEFAULT_POLL_INTERVAL_SECONDS,
|
|
45
|
+
help="Polling interval in seconds when running in watch mode",
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
"-l",
|
|
49
|
+
"--log-level",
|
|
50
|
+
choices=LOG_LEVELS,
|
|
51
|
+
default="INFO",
|
|
52
|
+
help="Logging level",
|
|
53
|
+
)
|
|
54
|
+
parser.add_argument(
|
|
55
|
+
"-d",
|
|
56
|
+
"--dry-run",
|
|
57
|
+
action="store_true",
|
|
58
|
+
help="Plan the sync without applying changes.",
|
|
59
|
+
)
|
|
60
|
+
parser.add_argument(
|
|
61
|
+
"-w",
|
|
62
|
+
"--watch",
|
|
63
|
+
action="store_true",
|
|
64
|
+
help="Watch for changes and sync continuously",
|
|
65
|
+
)
|
|
66
|
+
parser.add_argument(
|
|
67
|
+
"-f",
|
|
68
|
+
"--force",
|
|
69
|
+
action="store_true",
|
|
70
|
+
help="Force a refresh by clearing saved sync state before running",
|
|
71
|
+
)
|
|
72
|
+
return parser
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
76
|
+
"""Parse CLI arguments and run a sync operation.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
argv: Optional argument vector used instead of sys.argv.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
int: Process exit code from the sync operation.
|
|
83
|
+
"""
|
|
84
|
+
parser = build_parser()
|
|
85
|
+
args = parser.parse_args(argv)
|
|
86
|
+
return run_sync(
|
|
87
|
+
local_dir=Path(args.local_dir).expanduser().resolve(),
|
|
88
|
+
remote_path=args.workspace,
|
|
89
|
+
profile=args.profile,
|
|
90
|
+
poll_interval_seconds=args.poll_interval,
|
|
91
|
+
log_level=args.log_level,
|
|
92
|
+
dry_run=args.dry_run,
|
|
93
|
+
watch=args.watch,
|
|
94
|
+
force=args.force,
|
|
95
|
+
)
|