dvc-utils 0.2.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,74 @@
1
+ name: Verify README examples, release to PyPI
2
+ on:
3
+ push:
4
+ branches: [ "main" ]
5
+ tags: [ "v**" ]
6
+ pull_request:
7
+ branches: [ "main" ]
8
+ workflow_dispatch:
9
+ env:
10
+ AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
11
+ AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
12
+ jobs:
13
+ test:
14
+ name: Verify README examples
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ with:
19
+ fetch-depth: 0
20
+ submodules: true
21
+ - uses: astral-sh/setup-uv@v5
22
+ with:
23
+ enable-cache: true
24
+ - name: Set up Python 3.11
25
+ run: uv python install 3.11
26
+ - uses: dtolnay/rust-toolchain@stable
27
+ - uses: Swatinem/rust-cache@v2
28
+ - run: cargo install parquet2json
29
+ - name: Install dependencies
30
+ run: uv sync --extra ci --extra test
31
+ - name: Run pytest
32
+ run: |
33
+ source .venv/bin/activate
34
+ pytest
35
+ - name: '`dvc pull` test/data'
36
+ working-directory: test/data
37
+ run: |
38
+ source ../../.venv/bin/activate
39
+ dvc pull -r s3 -R -A
40
+ - name: Set up parquet-helpers
41
+ uses: actions/checkout@v4
42
+ with:
43
+ repository: ryan-williams/parquet-helpers
44
+ path: pqt
45
+ - name: Verify README examples
46
+ env:
47
+ # Evaluate README examples from within the `test/data` submodule
48
+ BMDF_WORKDIR: test/data
49
+ run: |
50
+ source .venv/bin/activate
51
+ export PATH="$PWD/pqt:$PATH"
52
+ . pqt/.pqt-rc
53
+ export SHELL
54
+ mdcmd
55
+ git diff --exit-code
56
+ release:
57
+ name: Release to PyPI
58
+ if: startsWith(github.ref, 'refs/tags/')
59
+ needs: test
60
+ runs-on: ubuntu-latest
61
+ steps:
62
+ - uses: actions/checkout@v4
63
+ - uses: astral-sh/setup-uv@v5
64
+ - name: Build package
65
+ run: uv build
66
+ - name: Publish to PyPI
67
+ run: uv publish --username __token__ --password ${{ secrets.PYPI_TOKEN }}
68
+ - name: Create GitHub Release
69
+ env:
70
+ GH_TOKEN: ${{ github.token }}
71
+ run: |
72
+ gh release create ${{ github.ref_name }} \
73
+ --title "${{ github.ref_name }}" \
74
+ --generate-notes
@@ -0,0 +1,3 @@
1
+ [submodule "test/data"]
2
+ path = test/data
3
+ url = https://github.com/ryan-williams/dvc-helpers
@@ -1,13 +1,23 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: dvc-utils
3
- Version: 0.2.0
4
- Summary: CLI for diffing DVC files at two commits (or one commit vs. current worktree), optionally passing both through another command first
5
- Home-page: https://github.com/runsascoded/dvc-utils
6
- Author: Ryan Williams
7
- Author-email: ryan@runsascoded.com
3
+ Version: 0.3.1
4
+ Summary: CLI for diffing DVC-tracked files at two commits (or one commit vs. current worktree), optionally passing both through another command first
5
+ Project-URL: Homepage, https://github.com/runsascoded/dvc-utils
6
+ Project-URL: Author URL, https://github.com/ryan-williams
7
+ Author-email: Ryan Williams <ryan@runsascoded.com>
8
8
  License: MIT
9
- Description-Content-Type: text/markdown
10
9
  License-File: LICENSE
10
+ Requires-Python: >=3.9
11
+ Requires-Dist: click
12
+ Requires-Dist: dffs>=0.0.6
13
+ Requires-Dist: pyyaml
14
+ Requires-Dist: utz>=0.20.0
15
+ Provides-Extra: ci
16
+ Requires-Dist: bmdf==0.5.2; extra == 'ci'
17
+ Requires-Dist: dvc-s3; extra == 'ci'
18
+ Provides-Extra: test
19
+ Requires-Dist: pytest>=7.0.0; extra == 'test'
20
+ Description-Content-Type: text/markdown
11
21
 
12
22
  # dvc-utils
13
23
  Diff [DVC] files, optionally piping through other commands first.
@@ -32,25 +42,11 @@ pip install dvc-utils
32
42
  ```
33
43
 
34
44
  ## Usage <a id="usage"></a>
35
- <!-- `bmdf -- dvc-utils --help` -->
36
- ```bash
37
- dvc-utils --help
38
- # Usage: dvc-utils [OPTIONS] COMMAND [ARGS]...
39
- #
40
- # Options:
41
- # --help Show this message and exit.
42
- #
43
- # Commands:
44
- # diff Diff a DVC-tracked file at two commits (or one commit vs. current
45
- # worktree), optionally passing both through another command first
46
- ```
45
+ Currently one command is exposed, `dvc-diff`:
47
46
 
48
- The single subcommand, `dvc-utils diff`, is also exposed directly as `dvc-dff`:
49
-
50
- ### `dvc-diff` <a id="dvc-diff"></a>
51
- <!-- `bmdf -- dvc-diff --help` -->
47
+ <!-- `bmdf -- dvc-diff` -->
52
48
  ```bash
53
- dvc-diff --help
49
+ dvc-diff
54
50
  # Usage: dvc-diff [OPTIONS] [exec_cmd...] <path>
55
51
  #
56
52
  # Diff a file at two commits (or one commit vs. current worktree), optionally
@@ -87,9 +83,8 @@ dvc-diff --help
87
83
  ```
88
84
 
89
85
  ## Examples <a id="examples"></a>
90
- These examples are verified with [`mdcmd`] and `$BMDF_WORKDIR=test/data`
91
-
92
- ([`test/data`] is a clone of [ryan-williams/dvc-helpers@test], which contains simple DVC-tracked files used for testing [`git-diff-dvc.sh`])
86
+ - Examples below are verified with [`mdcmd`] and `$BMDF_WORKDIR=test/data` (see [.github/workflows/ci.yml](.github/workflows/ci.yml)).
87
+ - [test/data] is a clone of [ryan-williams/dvc-helpers@test], which contains simple DVC-tracked files (used in that repo for testing [`git-diff-dvc.sh`]).
93
88
 
94
89
  [`8ec2060`] added a DVC-tracked text file, `test.txt`:
95
90
 
@@ -159,9 +154,9 @@ dvc-diff -R f92c1d2 pqa test.parquet
159
154
 
160
155
  [`f29e52a`] updated `test.parquet`:
161
156
 
162
- <!-- `bmdf -- dvc-diff -R f29e52a pqa test.parquet` -->
157
+ <!-- `bmdf -E PQT_TXT_OPTS=-n2 -- dvc-diff -R f29e52a pqa test.parquet` -->
163
158
  ```bash
164
- dvc-diff -R f29e52a pqa test.parquet
159
+ PQT_TXT_OPTS=-n2 dvc-diff -R f29e52a pqa test.parquet
165
160
  # 1,3c1,3
166
161
  # < MD5: 4379600b26647a50dfcd0daa824e8219
167
162
  # < 1635 bytes
@@ -174,17 +169,16 @@ dvc-diff -R f29e52a pqa test.parquet
174
169
  # < OPTIONAL INT64 num;
175
170
  # ---
176
171
  # > OPTIONAL INT32 num;
177
- # 26a27,38
178
- # > }
179
- # > {
180
- # > "num": 666,
181
- # > "str": "fff"
182
- # > }
183
- # > {
172
+ # 19,20c19,20
173
+ # < "num": 444,
174
+ # < "str": "ddd"
175
+ # ---
184
176
  # > "num": 777,
185
177
  # > "str": "ggg"
186
- # > }
187
- # > {
178
+ # 23,24c23,24
179
+ # < "num": 555,
180
+ # < "str": "eee"
181
+ # ---
188
182
  # > "num": 888,
189
183
  # > "str": "hhh"
190
184
  ```
@@ -458,6 +452,7 @@ This helped me see that the data update in question (`c0..c1`) dropped some fiel
458
452
 
459
453
  [`mdcmd`]: https://github.com/runsascoded/bash-markdown-fence?tab=readme-ov-file#bmdf
460
454
  [`test/data`]: test/data
455
+ [test/data]: test/data
461
456
  [ryan-williams/dvc-helpers@test]: https://github.com/ryan-williams/dvc-helpers/tree/test
462
457
  [`git-diff-dvc.sh`]: https://github.com/ryan-williams/dvc-helpers/blob/main/git-diff-dvc.sh
463
458
 
@@ -21,25 +21,11 @@ pip install dvc-utils
21
21
  ```
22
22
 
23
23
  ## Usage <a id="usage"></a>
24
- <!-- `bmdf -- dvc-utils --help` -->
25
- ```bash
26
- dvc-utils --help
27
- # Usage: dvc-utils [OPTIONS] COMMAND [ARGS]...
28
- #
29
- # Options:
30
- # --help Show this message and exit.
31
- #
32
- # Commands:
33
- # diff Diff a DVC-tracked file at two commits (or one commit vs. current
34
- # worktree), optionally passing both through another command first
35
- ```
36
-
37
- The single subcommand, `dvc-utils diff`, is also exposed directly as `dvc-dff`:
24
+ Currently one command is exposed, `dvc-diff`:
38
25
 
39
- ### `dvc-diff` <a id="dvc-diff"></a>
40
- <!-- `bmdf -- dvc-diff --help` -->
26
+ <!-- `bmdf -- dvc-diff` -->
41
27
  ```bash
42
- dvc-diff --help
28
+ dvc-diff
43
29
  # Usage: dvc-diff [OPTIONS] [exec_cmd...] <path>
44
30
  #
45
31
  # Diff a file at two commits (or one commit vs. current worktree), optionally
@@ -76,9 +62,8 @@ dvc-diff --help
76
62
  ```
77
63
 
78
64
  ## Examples <a id="examples"></a>
79
- These examples are verified with [`mdcmd`] and `$BMDF_WORKDIR=test/data`
80
-
81
- ([`test/data`] is a clone of [ryan-williams/dvc-helpers@test], which contains simple DVC-tracked files used for testing [`git-diff-dvc.sh`])
65
+ - Examples below are verified with [`mdcmd`] and `$BMDF_WORKDIR=test/data` (see [.github/workflows/ci.yml](.github/workflows/ci.yml)).
66
+ - [test/data] is a clone of [ryan-williams/dvc-helpers@test], which contains simple DVC-tracked files (used in that repo for testing [`git-diff-dvc.sh`]).
82
67
 
83
68
  [`8ec2060`] added a DVC-tracked text file, `test.txt`:
84
69
 
@@ -148,9 +133,9 @@ dvc-diff -R f92c1d2 pqa test.parquet
148
133
 
149
134
  [`f29e52a`] updated `test.parquet`:
150
135
 
151
- <!-- `bmdf -- dvc-diff -R f29e52a pqa test.parquet` -->
136
+ <!-- `bmdf -E PQT_TXT_OPTS=-n2 -- dvc-diff -R f29e52a pqa test.parquet` -->
152
137
  ```bash
153
- dvc-diff -R f29e52a pqa test.parquet
138
+ PQT_TXT_OPTS=-n2 dvc-diff -R f29e52a pqa test.parquet
154
139
  # 1,3c1,3
155
140
  # < MD5: 4379600b26647a50dfcd0daa824e8219
156
141
  # < 1635 bytes
@@ -163,17 +148,16 @@ dvc-diff -R f29e52a pqa test.parquet
163
148
  # < OPTIONAL INT64 num;
164
149
  # ---
165
150
  # > OPTIONAL INT32 num;
166
- # 26a27,38
167
- # > }
168
- # > {
169
- # > "num": 666,
170
- # > "str": "fff"
171
- # > }
172
- # > {
151
+ # 19,20c19,20
152
+ # < "num": 444,
153
+ # < "str": "ddd"
154
+ # ---
173
155
  # > "num": 777,
174
156
  # > "str": "ggg"
175
- # > }
176
- # > {
157
+ # 23,24c23,24
158
+ # < "num": 555,
159
+ # < "str": "eee"
160
+ # ---
177
161
  # > "num": 888,
178
162
  # > "str": "hhh"
179
163
  ```
@@ -447,6 +431,7 @@ This helped me see that the data update in question (`c0..c1`) dropped some fiel
447
431
 
448
432
  [`mdcmd`]: https://github.com/runsascoded/bash-markdown-fence?tab=readme-ov-file#bmdf
449
433
  [`test/data`]: test/data
434
+ [test/data]: test/data
450
435
  [ryan-williams/dvc-helpers@test]: https://github.com/ryan-williams/dvc-helpers/tree/test
451
436
  [`git-diff-dvc.sh`]: https://github.com/ryan-williams/dvc-helpers/blob/main/git-diff-dvc.sh
452
437
 
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "dvc-utils"
7
+ version = "0.3.1"
8
+ description = "CLI for diffing DVC-tracked files at two commits (or one commit vs. current worktree), optionally passing both through another command first"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ authors = [
12
+ {name = "Ryan Williams", email = "ryan@runsascoded.com"}
13
+ ]
14
+ requires-python = ">=3.9"
15
+ dependencies = [
16
+ "click",
17
+ "dffs>=0.0.6",
18
+ "pyyaml",
19
+ "utz>=0.20.0",
20
+ ]
21
+
22
+ [project.optional-dependencies]
23
+ ci = [
24
+ "bmdf==0.5.2",
25
+ "dvc-s3",
26
+ ]
27
+ test = [
28
+ "pytest>=7.0.0",
29
+ ]
30
+
31
+ [project.urls]
32
+ Homepage = "https://github.com/runsascoded/dvc-utils"
33
+ "Author URL" = "https://github.com/ryan-williams"
34
+
35
+ [project.scripts]
36
+ dvc-utils = "dvc_utils.main:main"
37
+ dvc-diff = "dvc_utils.diff:dvc_diff"
38
+
39
+ [dependency-groups]
40
+ dev = [
41
+ "pytest>=7.0.0",
42
+ ]
43
+
44
+ [tool.hatch.build.targets.wheel]
45
+ packages = ["src/dvc_utils"]
@@ -0,0 +1,6 @@
1
+ [pytest]
2
+ testpaths = tests
3
+ python_files = test_*.py
4
+ python_classes = Test*
5
+ python_functions = test_*
6
+ addopts = -v --tb=short
@@ -1,2 +1,2 @@
1
- from . import cli, path
1
+ from . import cli, diff, path
2
2
  from .path import dvc_cache_dir, dvc_md5, dvc_paths, dvc_path
@@ -0,0 +1,17 @@
1
+ import json
2
+ import shlex
3
+ from os import listdir
4
+ from os.path import isdir, join
5
+ from typing import Tuple
6
+
7
+ import click
8
+ from click import option, argument, group
9
+ from dffs import join_pipelines
10
+ from utz import process, err, hash_file
11
+
12
+ from dvc_utils.path import dvc_paths, dvc_cache_path
13
+
14
+
15
+ @group()
16
+ def cli():
17
+ pass
@@ -6,18 +6,18 @@ from typing import Tuple
6
6
 
7
7
  import click
8
8
  from click import option, argument, group
9
- from qmdx import join_pipelines
9
+ from dffs import join_pipelines
10
10
  from utz import process, err, hash_file
11
11
 
12
+ from dvc_utils.cli import cli
12
13
  from dvc_utils.path import dvc_paths, dvc_cache_path
13
14
 
14
15
 
15
- @group()
16
- def cli():
17
- pass
18
-
19
-
20
- @cli.command('diff', short_help='Diff a DVC-tracked file at two commits (or one commit vs. current worktree), optionally passing both through another command first')
16
+ @cli.command(
17
+ 'diff',
18
+ short_help='Diff a DVC-tracked file at two commits (or one commit vs. current worktree), optionally passing both through another command first',
19
+ no_args_is_help=True,
20
+ )
21
21
  @option('-c/-C', '--color/--no-color', default=None, help='Force or prevent colorized output')
22
22
  @option('-r', '--refspec', help='<commit 1>..<commit 2> (compare two commits) or <commit> (compare <commit> to the worktree)')
23
23
  @option('-R', '--ref', help='Shorthand for `-r <ref>^..<ref>`, i.e. inspect a specific commit (vs. its parent)')
@@ -28,7 +28,7 @@ def cli():
28
28
  @option('-w', '--ignore-whitespace', is_flag=True, help="Ignore whitespace differences (pass `-w` to `diff`)")
29
29
  @option('-x', '--exec-cmd', 'exec_cmds', multiple=True, help='Command(s) to execute before diffing; alternate syntax to passing commands as positional arguments')
30
30
  @argument('args', metavar='[exec_cmd...] <path>', nargs=-1)
31
- def dvc_utils_diff(
31
+ def dvc_diff(
32
32
  color: bool | None,
33
33
  refspec: str | None,
34
34
  ref: str | None,
@@ -111,7 +111,7 @@ def dvc_utils_diff(
111
111
  cmds1 = [ shlex.split(cmd) for cmd in cmds1 ]
112
112
  cmds2 = [ shlex.split(cmd) for cmd in cmds2 ]
113
113
 
114
- join_pipelines(
114
+ returncode = join_pipelines(
115
115
  base_cmd=['diff', *diff_args],
116
116
  cmds1=cmds1,
117
117
  cmds2=cmds2,
@@ -119,10 +119,7 @@ def dvc_utils_diff(
119
119
  shell=shell,
120
120
  executable=shell_executable,
121
121
  )
122
+ exit(returncode)
122
123
  else:
123
124
  res = process.run('diff', *diff_args, path1 or '/dev/null', path2 or '/dev/null', log=log, check=False)
124
125
  exit(res.returncode)
125
-
126
-
127
- if __name__ == '__main__':
128
- cli()
@@ -0,0 +1,9 @@
1
+ from dvc_utils.cli import cli
2
+ from dvc_utils import diff
3
+
4
+ def main():
5
+ cli()
6
+
7
+
8
+ if __name__ == '__main__':
9
+ cli()
@@ -49,8 +49,13 @@ def dvc_md5(
49
49
  dir_path = get_dir_path()
50
50
  dir_path = '' if dir_path == '.' else f'{dir_path}{sep}'
51
51
  dvc_path = f"{dir_path}{dvc_path}"
52
- dvc_spec = process.output('git', 'show', f'{git_ref}:{dvc_path}', log=err if log else None, err_ok=True, stderr=DEVNULL)
53
- if dvc_spec is None:
52
+ dvc_spec = process.output(
53
+ 'git', 'show', f'{git_ref}:{dvc_path}',
54
+ err_ok=True,
55
+ log=err if log else None,
56
+ stderr=None if log else DEVNULL,
57
+ )
58
+ if not dvc_spec:
54
59
  cur_dir = dirname(dvc_path)
55
60
  relpath = basename(dvc_path)
56
61
  if relpath.endswith(".dvc"):
@@ -0,0 +1,13 @@
1
+ from click import option
2
+
3
+ from dvc_utils.cli import cli
4
+ from git import Repo
5
+
6
+
7
+ @cli.command('pull-x', short_help='Sync DVC cache files from an S3 remote')
8
+ @option('-n', '--dry-run', is_flag=True, help='Print files that would be synced, don\'t actually perform sync')
9
+ @option('-p', '--path', 'paths', multiple=True, help='Path globs to sync')
10
+ @option('-r', '--ref', 'refs', multiple=True, help='Git refs to sync DVC files from')
11
+ def pull_x(dry_run, paths, refs):
12
+ repo = Repo()
13
+
File without changes
@@ -0,0 +1,83 @@
1
+ """Tests for dvc-diff exit code handling."""
2
+ import pytest
3
+ import subprocess
4
+ from pathlib import Path
5
+ import tempfile
6
+
7
+
8
+ class TestDiffExitCodes:
9
+ """Test that dvc-diff properly propagates exit codes from pipeline commands."""
10
+
11
+ def test_successful_pipeline_returns_zero(self, tmp_path):
12
+ """Test that successful identical pipeline returns 0."""
13
+ # Create test files
14
+ file1 = tmp_path / "test1.txt"
15
+ file2 = tmp_path / "test2.txt"
16
+ file1.write_text("foo\nbar\n")
17
+ file2.write_text("foo\nbar\n")
18
+
19
+ # Run diff-x (not dvc-diff, but tests the same join_pipelines code)
20
+ result = subprocess.run(
21
+ ["diff-x", "cat", str(file1), str(file2)],
22
+ capture_output=True,
23
+ )
24
+ assert result.returncode == 0
25
+
26
+ def test_diff_found_returns_one(self, tmp_path):
27
+ """Test that differences found returns 1."""
28
+ file1 = tmp_path / "test1.txt"
29
+ file2 = tmp_path / "test2.txt"
30
+ file1.write_text("foo\n")
31
+ file2.write_text("bar\n")
32
+
33
+ result = subprocess.run(
34
+ ["diff-x", "cat", str(file1), str(file2)],
35
+ capture_output=True,
36
+ )
37
+ assert result.returncode == 1
38
+
39
+ def test_pipeline_error_propagates(self, tmp_path):
40
+ """Test that pipeline command errors propagate to exit code."""
41
+ file1 = tmp_path / "test1.txt"
42
+ file2 = tmp_path / "test2.txt"
43
+ file1.write_text("foo\n")
44
+ file2.write_text("bar\n")
45
+
46
+ # Use a command that will fail
47
+ result = subprocess.run(
48
+ ["diff-x", "cat /nonexistent/file/that/does/not/exist ||", str(file1), str(file2)],
49
+ capture_output=True,
50
+ shell=False,
51
+ )
52
+ # Should return non-zero due to cat failing
53
+ assert result.returncode != 0
54
+
55
+ def test_false_command_propagates_error(self, tmp_path):
56
+ """Test that 'false' command in pipeline propagates error."""
57
+ file1 = tmp_path / "test1.txt"
58
+ file2 = tmp_path / "test2.txt"
59
+ file1.write_text("foo\n")
60
+ file2.write_text("bar\n")
61
+
62
+ # Use 'false' which always returns 1
63
+ result = subprocess.run(
64
+ ["diff-x", "cat", "false", str(file1), str(file2)],
65
+ capture_output=True,
66
+ )
67
+ # Should return non-zero due to false in pipeline
68
+ assert result.returncode != 0
69
+
70
+ def test_multi_stage_pipeline_error(self, tmp_path):
71
+ """Test that errors in multi-stage pipelines are detected."""
72
+ file1 = tmp_path / "test1.txt"
73
+ file2 = tmp_path / "test2.txt"
74
+ file1.write_text("foo\nbar\n")
75
+ file2.write_text("bar\nfoo\n")
76
+
77
+ # Pipeline: sort (succeeds) | false (fails)
78
+ result = subprocess.run(
79
+ ["diff-x", "-x", "sort", "-x", "false", str(file1), str(file2)],
80
+ capture_output=True,
81
+ )
82
+ # Should return non-zero due to false in pipeline
83
+ assert result.returncode != 0