dvc-utils 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dvc_utils/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
- from . import cli, path
1
+ from . import cli, diff, path
2
2
  from .path import dvc_cache_dir, dvc_md5, dvc_paths, dvc_path
dvc_utils/cli.py CHANGED
@@ -6,7 +6,7 @@ from typing import Tuple
6
6
 
7
7
  import click
8
8
  from click import option, argument, group
9
- from qmdx import join_pipelines
9
+ from dffs import join_pipelines
10
10
  from utz import process, err, hash_file
11
11
 
12
12
  from dvc_utils.path import dvc_paths, dvc_cache_path
@@ -15,114 +15,3 @@ from dvc_utils.path import dvc_paths, dvc_cache_path
15
15
  @group()
16
16
  def cli():
17
17
  pass
18
-
19
-
20
- @cli.command('diff', short_help='Diff a DVC-tracked file at two commits (or one commit vs. current worktree), optionally passing both through another command first')
21
- @option('-c/-C', '--color/--no-color', default=None, help='Force or prevent colorized output')
22
- @option('-r', '--refspec', help='<commit 1>..<commit 2> (compare two commits) or <commit> (compare <commit> to the worktree)')
23
- @option('-R', '--ref', help='Shorthand for `-r <ref>^..<ref>`, i.e. inspect a specific commit (vs. its parent)')
24
- @option('-s', '--shell-executable', help=f'Shell to use for executing commands; defaults to $SHELL')
25
- @option('-S', '--no-shell', is_flag=True, help="Don't pass `shell=True` to Python `subprocess`es")
26
- @option('-U', '--unified', type=int, help='Number of lines of context to show (passes through to `diff`)')
27
- @option('-v', '--verbose', is_flag=True, help="Log intermediate commands to stderr")
28
- @option('-w', '--ignore-whitespace', is_flag=True, help="Ignore whitespace differences (pass `-w` to `diff`)")
29
- @option('-x', '--exec-cmd', 'exec_cmds', multiple=True, help='Command(s) to execute before diffing; alternate syntax to passing commands as positional arguments')
30
- @argument('args', metavar='[exec_cmd...] <path>', nargs=-1)
31
- def dvc_utils_diff(
32
- color: bool | None,
33
- refspec: str | None,
34
- ref: str | None,
35
- shell_executable: str | None,
36
- no_shell: bool,
37
- unified: int | None,
38
- verbose: bool,
39
- ignore_whitespace: bool,
40
- exec_cmds: Tuple[str, ...],
41
- args: Tuple[str, ...],
42
- ):
43
- """Diff a file at two commits (or one commit vs. current worktree), optionally passing both through `cmd` first
44
-
45
- Examples:
46
-
47
- dvc-utils diff -r HEAD^..HEAD wc -l foo.dvc # Compare the number of lines (`wc -l`) in `foo` (the file referenced by `foo.dvc`) at the previous vs. current commit (`HEAD^..HEAD`).
48
-
49
- dvc-utils diff md5sum foo # Diff the `md5sum` of `foo` (".dvc" extension is optional) at HEAD (last committed value) vs. the current worktree content.
50
- """
51
- if not args:
52
- raise click.UsageError('Must specify [cmd...] <path>')
53
-
54
- shell = not no_shell
55
- *cmds, path = args
56
- cmds = list(exec_cmds) + cmds
57
-
58
- path, dvc_path = dvc_paths(path)
59
-
60
- if refspec and ref:
61
- raise ValueError("Specify -r/--refspec xor -R/--ref")
62
- if ref:
63
- refspec = f'{ref}^..{ref}'
64
- elif not refspec:
65
- refspec = 'HEAD'
66
-
67
- pcs = refspec.split('..', 1)
68
- if len(pcs) == 1:
69
- before = pcs[0]
70
- after = None
71
- elif len(pcs) == 2:
72
- before, after = pcs
73
- else:
74
- raise ValueError(f"Invalid refspec: {refspec}")
75
-
76
- log = err if verbose else False
77
- path1 = dvc_cache_path(before, dvc_path, log=log)
78
- path2 = (path if after is None else dvc_cache_path(after, dvc_path, log=log))
79
-
80
- if isdir(path):
81
- dir_json1 = dir_json2 = {}
82
- if path1:
83
- with open(path1, 'r') as f:
84
- obj = json.load(f)
85
- dir_json1 = { e["relpath"]: e["md5"] for e in obj }
86
- if path2:
87
- if path2 == path and after is None:
88
- dir_json2 = {}
89
- for file in listdir(path2):
90
- md5 = hash_file(join(path2, file), hash_name='md5')
91
- dir_json2[file] = md5
92
- else:
93
- with open(path2, 'r') as f:
94
- dir_json2 = { obj["relpath"]: obj["md5"] for obj in json.load(f) }
95
- for relpath in sorted(set(dir_json1) | set(dir_json2)):
96
- md5_1 = dir_json1.get(relpath)
97
- md5_2 = dir_json2.get(relpath)
98
- if md5_1 != md5_2:
99
- print(f'{relpath}: {md5_1} -> {md5_2}')
100
- else:
101
- diff_args = [
102
- *(['-w'] if ignore_whitespace else []),
103
- *(['-U', str(unified)] if unified is not None else []),
104
- *(['--color=always'] if color is True else ['--color=never'] if color is False else []),
105
- ]
106
- if cmds:
107
- cmd, *sub_cmds = cmds
108
- cmds1 = [ 'cat /dev/null' ] if path1 is None else [ f'{cmd} {path1 or "/dev/null"}', *sub_cmds ]
109
- cmds2 = [ 'cat /dev/null' ] if path2 is None else [ f'{cmd} {path2 or "/dev/null"}', *sub_cmds ]
110
- if not shell:
111
- cmds1 = [ shlex.split(cmd) for cmd in cmds1 ]
112
- cmds2 = [ shlex.split(cmd) for cmd in cmds2 ]
113
-
114
- join_pipelines(
115
- base_cmd=['diff', *diff_args],
116
- cmds1=cmds1,
117
- cmds2=cmds2,
118
- verbose=verbose,
119
- shell=shell,
120
- executable=shell_executable,
121
- )
122
- else:
123
- res = process.run('diff', *diff_args, path1 or '/dev/null', path2 or '/dev/null', log=log, check=False)
124
- exit(res.returncode)
125
-
126
-
127
- if __name__ == '__main__':
128
- cli()
dvc_utils/diff.py ADDED
@@ -0,0 +1,125 @@
1
+ import json
2
+ import shlex
3
+ from os import listdir
4
+ from os.path import isdir, join
5
+ from typing import Tuple
6
+
7
+ import click
8
+ from click import option, argument, group
9
+ from dffs import join_pipelines
10
+ from utz import process, err, hash_file
11
+
12
+ from dvc_utils.cli import cli
13
+ from dvc_utils.path import dvc_paths, dvc_cache_path
14
+
15
+
16
+ @cli.command(
17
+ 'diff',
18
+ short_help='Diff a DVC-tracked file at two commits (or one commit vs. current worktree), optionally passing both through another command first',
19
+ no_args_is_help=True,
20
+ )
21
+ @option('-c/-C', '--color/--no-color', default=None, help='Force or prevent colorized output')
22
+ @option('-r', '--refspec', help='<commit 1>..<commit 2> (compare two commits) or <commit> (compare <commit> to the worktree)')
23
+ @option('-R', '--ref', help='Shorthand for `-r <ref>^..<ref>`, i.e. inspect a specific commit (vs. its parent)')
24
+ @option('-s', '--shell-executable', help=f'Shell to use for executing commands; defaults to $SHELL')
25
+ @option('-S', '--no-shell', is_flag=True, help="Don't pass `shell=True` to Python `subprocess`es")
26
+ @option('-U', '--unified', type=int, help='Number of lines of context to show (passes through to `diff`)')
27
+ @option('-v', '--verbose', is_flag=True, help="Log intermediate commands to stderr")
28
+ @option('-w', '--ignore-whitespace', is_flag=True, help="Ignore whitespace differences (pass `-w` to `diff`)")
29
+ @option('-x', '--exec-cmd', 'exec_cmds', multiple=True, help='Command(s) to execute before diffing; alternate syntax to passing commands as positional arguments')
30
+ @argument('args', metavar='[exec_cmd...] <path>', nargs=-1)
31
+ def dvc_diff(
32
+ color: bool | None,
33
+ refspec: str | None,
34
+ ref: str | None,
35
+ shell_executable: str | None,
36
+ no_shell: bool,
37
+ unified: int | None,
38
+ verbose: bool,
39
+ ignore_whitespace: bool,
40
+ exec_cmds: Tuple[str, ...],
41
+ args: Tuple[str, ...],
42
+ ):
43
+ """Diff a file at two commits (or one commit vs. current worktree), optionally passing both through `cmd` first
44
+
45
+ Examples:
46
+
47
+ dvc-utils diff -r HEAD^..HEAD wc -l foo.dvc # Compare the number of lines (`wc -l`) in `foo` (the file referenced by `foo.dvc`) at the previous vs. current commit (`HEAD^..HEAD`).
48
+
49
+ dvc-utils diff md5sum foo # Diff the `md5sum` of `foo` (".dvc" extension is optional) at HEAD (last committed value) vs. the current worktree content.
50
+ """
51
+ if not args:
52
+ raise click.UsageError('Must specify [cmd...] <path>')
53
+
54
+ shell = not no_shell
55
+ *cmds, path = args
56
+ cmds = list(exec_cmds) + cmds
57
+
58
+ path, dvc_path = dvc_paths(path)
59
+
60
+ if refspec and ref:
61
+ raise ValueError("Specify -r/--refspec xor -R/--ref")
62
+ if ref:
63
+ refspec = f'{ref}^..{ref}'
64
+ elif not refspec:
65
+ refspec = 'HEAD'
66
+
67
+ pcs = refspec.split('..', 1)
68
+ if len(pcs) == 1:
69
+ before = pcs[0]
70
+ after = None
71
+ elif len(pcs) == 2:
72
+ before, after = pcs
73
+ else:
74
+ raise ValueError(f"Invalid refspec: {refspec}")
75
+
76
+ log = err if verbose else False
77
+ path1 = dvc_cache_path(before, dvc_path, log=log)
78
+ path2 = (path if after is None else dvc_cache_path(after, dvc_path, log=log))
79
+
80
+ if isdir(path):
81
+ dir_json1 = dir_json2 = {}
82
+ if path1:
83
+ with open(path1, 'r') as f:
84
+ obj = json.load(f)
85
+ dir_json1 = { e["relpath"]: e["md5"] for e in obj }
86
+ if path2:
87
+ if path2 == path and after is None:
88
+ dir_json2 = {}
89
+ for file in listdir(path2):
90
+ md5 = hash_file(join(path2, file), hash_name='md5')
91
+ dir_json2[file] = md5
92
+ else:
93
+ with open(path2, 'r') as f:
94
+ dir_json2 = { obj["relpath"]: obj["md5"] for obj in json.load(f) }
95
+ for relpath in sorted(set(dir_json1) | set(dir_json2)):
96
+ md5_1 = dir_json1.get(relpath)
97
+ md5_2 = dir_json2.get(relpath)
98
+ if md5_1 != md5_2:
99
+ print(f'{relpath}: {md5_1} -> {md5_2}')
100
+ else:
101
+ diff_args = [
102
+ *(['-w'] if ignore_whitespace else []),
103
+ *(['-U', str(unified)] if unified is not None else []),
104
+ *(['--color=always'] if color is True else ['--color=never'] if color is False else []),
105
+ ]
106
+ if cmds:
107
+ cmd, *sub_cmds = cmds
108
+ cmds1 = [ 'cat /dev/null' ] if path1 is None else [ f'{cmd} {path1 or "/dev/null"}', *sub_cmds ]
109
+ cmds2 = [ 'cat /dev/null' ] if path2 is None else [ f'{cmd} {path2 or "/dev/null"}', *sub_cmds ]
110
+ if not shell:
111
+ cmds1 = [ shlex.split(cmd) for cmd in cmds1 ]
112
+ cmds2 = [ shlex.split(cmd) for cmd in cmds2 ]
113
+
114
+ returncode = join_pipelines(
115
+ base_cmd=['diff', *diff_args],
116
+ cmds1=cmds1,
117
+ cmds2=cmds2,
118
+ verbose=verbose,
119
+ shell=shell,
120
+ executable=shell_executable,
121
+ )
122
+ exit(returncode)
123
+ else:
124
+ res = process.run('diff', *diff_args, path1 or '/dev/null', path2 or '/dev/null', log=log, check=False)
125
+ exit(res.returncode)
dvc_utils/main.py ADDED
@@ -0,0 +1,9 @@
1
+ from dvc_utils.cli import cli
2
+ from dvc_utils import diff
3
+
4
+ def main():
5
+ cli()
6
+
7
+
8
+ if __name__ == '__main__':
9
+ cli()
dvc_utils/path.py CHANGED
@@ -49,8 +49,13 @@ def dvc_md5(
49
49
  dir_path = get_dir_path()
50
50
  dir_path = '' if dir_path == '.' else f'{dir_path}{sep}'
51
51
  dvc_path = f"{dir_path}{dvc_path}"
52
- dvc_spec = process.output('git', 'show', f'{git_ref}:{dvc_path}', log=err if log else None, err_ok=True, stderr=DEVNULL)
53
- if dvc_spec is None:
52
+ dvc_spec = process.output(
53
+ 'git', 'show', f'{git_ref}:{dvc_path}',
54
+ err_ok=True,
55
+ log=err if log else None,
56
+ stderr=None if log else DEVNULL,
57
+ )
58
+ if not dvc_spec:
54
59
  cur_dir = dirname(dvc_path)
55
60
  relpath = basename(dvc_path)
56
61
  if relpath.endswith(".dvc"):
dvc_utils/sync.py ADDED
@@ -0,0 +1,13 @@
1
+ from click import option
2
+
3
+ from dvc_utils.cli import cli
4
+ from git import Repo
5
+
6
+
7
+ @cli.command('pull-x', short_help='Sync DVC cache files from an S3 remote')
8
+ @option('-n', '--dry-run', is_flag=True, help='Print files that would be synced, don\'t actually perform sync')
9
+ @option('-p', '--path', 'paths', multiple=True, help='Path globs to sync')
10
+ @option('-r', '--ref', 'refs', multiple=True, help='Git refs to sync DVC files from')
11
+ def pull_x(dry_run, paths, refs):
12
+ repo = Repo()
13
+
@@ -1,17 +1,23 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: dvc-utils
3
- Version: 0.2.0
4
- Summary: CLI for diffing DVC files at two commits (or one commit vs. current worktree), optionally passing both through another command first
5
- Home-page: https://github.com/runsascoded/dvc-utils
6
- Author: Ryan Williams
7
- Author-email: ryan@runsascoded.com
3
+ Version: 0.3.1
4
+ Summary: CLI for diffing DVC-tracked files at two commits (or one commit vs. current worktree), optionally passing both through another command first
5
+ Project-URL: Homepage, https://github.com/runsascoded/dvc-utils
6
+ Project-URL: Author URL, https://github.com/ryan-williams
7
+ Author-email: Ryan Williams <ryan@runsascoded.com>
8
8
  License: MIT
9
- Description-Content-Type: text/markdown
10
9
  License-File: LICENSE
10
+ Requires-Python: >=3.9
11
11
  Requires-Dist: click
12
+ Requires-Dist: dffs>=0.0.6
12
13
  Requires-Dist: pyyaml
13
- Requires-Dist: qmdx>=0.0.5
14
- Requires-Dist: utz>=0.13.0
14
+ Requires-Dist: utz>=0.20.0
15
+ Provides-Extra: ci
16
+ Requires-Dist: bmdf==0.5.2; extra == 'ci'
17
+ Requires-Dist: dvc-s3; extra == 'ci'
18
+ Provides-Extra: test
19
+ Requires-Dist: pytest>=7.0.0; extra == 'test'
20
+ Description-Content-Type: text/markdown
15
21
 
16
22
  # dvc-utils
17
23
  Diff [DVC] files, optionally piping through other commands first.
@@ -36,25 +42,11 @@ pip install dvc-utils
36
42
  ```
37
43
 
38
44
  ## Usage <a id="usage"></a>
39
- <!-- `bmdf -- dvc-utils --help` -->
40
- ```bash
41
- dvc-utils --help
42
- # Usage: dvc-utils [OPTIONS] COMMAND [ARGS]...
43
- #
44
- # Options:
45
- # --help Show this message and exit.
46
- #
47
- # Commands:
48
- # diff Diff a DVC-tracked file at two commits (or one commit vs. current
49
- # worktree), optionally passing both through another command first
50
- ```
45
+ Currently one command is exposed, `dvc-diff`:
51
46
 
52
- The single subcommand, `dvc-utils diff`, is also exposed directly as `dvc-dff`:
53
-
54
- ### `dvc-diff` <a id="dvc-diff"></a>
55
- <!-- `bmdf -- dvc-diff --help` -->
47
+ <!-- `bmdf -- dvc-diff` -->
56
48
  ```bash
57
- dvc-diff --help
49
+ dvc-diff
58
50
  # Usage: dvc-diff [OPTIONS] [exec_cmd...] <path>
59
51
  #
60
52
  # Diff a file at two commits (or one commit vs. current worktree), optionally
@@ -91,9 +83,8 @@ dvc-diff --help
91
83
  ```
92
84
 
93
85
  ## Examples <a id="examples"></a>
94
- These examples are verified with [`mdcmd`] and `$BMDF_WORKDIR=test/data`
95
-
96
- ([`test/data`] is a clone of [ryan-williams/dvc-helpers@test], which contains simple DVC-tracked files used for testing [`git-diff-dvc.sh`])
86
+ - Examples below are verified with [`mdcmd`] and `$BMDF_WORKDIR=test/data` (see [.github/workflows/ci.yml](.github/workflows/ci.yml)).
87
+ - [test/data] is a clone of [ryan-williams/dvc-helpers@test], which contains simple DVC-tracked files (used in that repo for testing [`git-diff-dvc.sh`]).
97
88
 
98
89
  [`8ec2060`] added a DVC-tracked text file, `test.txt`:
99
90
 
@@ -163,9 +154,9 @@ dvc-diff -R f92c1d2 pqa test.parquet
163
154
 
164
155
  [`f29e52a`] updated `test.parquet`:
165
156
 
166
- <!-- `bmdf -- dvc-diff -R f29e52a pqa test.parquet` -->
157
+ <!-- `bmdf -E PQT_TXT_OPTS=-n2 -- dvc-diff -R f29e52a pqa test.parquet` -->
167
158
  ```bash
168
- dvc-diff -R f29e52a pqa test.parquet
159
+ PQT_TXT_OPTS=-n2 dvc-diff -R f29e52a pqa test.parquet
169
160
  # 1,3c1,3
170
161
  # < MD5: 4379600b26647a50dfcd0daa824e8219
171
162
  # < 1635 bytes
@@ -178,17 +169,16 @@ dvc-diff -R f29e52a pqa test.parquet
178
169
  # < OPTIONAL INT64 num;
179
170
  # ---
180
171
  # > OPTIONAL INT32 num;
181
- # 26a27,38
182
- # > }
183
- # > {
184
- # > "num": 666,
185
- # > "str": "fff"
186
- # > }
187
- # > {
172
+ # 19,20c19,20
173
+ # < "num": 444,
174
+ # < "str": "ddd"
175
+ # ---
188
176
  # > "num": 777,
189
177
  # > "str": "ggg"
190
- # > }
191
- # > {
178
+ # 23,24c23,24
179
+ # < "num": 555,
180
+ # < "str": "eee"
181
+ # ---
192
182
  # > "num": 888,
193
183
  # > "str": "hhh"
194
184
  ```
@@ -462,6 +452,7 @@ This helped me see that the data update in question (`c0..c1`) dropped some fiel
462
452
 
463
453
  [`mdcmd`]: https://github.com/runsascoded/bash-markdown-fence?tab=readme-ov-file#bmdf
464
454
  [`test/data`]: test/data
455
+ [test/data]: test/data
465
456
  [ryan-williams/dvc-helpers@test]: https://github.com/ryan-williams/dvc-helpers/tree/test
466
457
  [`git-diff-dvc.sh`]: https://github.com/ryan-williams/dvc-helpers/blob/main/git-diff-dvc.sh
467
458
 
@@ -0,0 +1,11 @@
1
+ dvc_utils/__init__.py,sha256=Q9_9py_bah16WN6RIryNMelBvG9IJW1B1Sm2PEU87Yk,92
2
+ dvc_utils/cli.py,sha256=et2OeEWJeY0XUuStMVd8qmjtOgDbXl9esn3KYZCQFSI,318
3
+ dvc_utils/diff.py,sha256=k4_1rjCfvtQkTwelPkrp2m6lrh8lU4IUg7aaKw3YhU8,5112
4
+ dvc_utils/main.py,sha256=cfbwebEc9Auh9DXyah31kvd9Br_uCsUxzOXkucfiSnM,119
5
+ dvc_utils/path.py,sha256=tdigsKTCZPhCIb9wRX4e_tj1lUxa2gm6lXERlfb8qY4,2955
6
+ dvc_utils/sync.py,sha256=gDwo0Z4lMK--xRwsACIpJRWS5rF6g7Wou0LJRxNd6b4,481
7
+ dvc_utils-0.3.1.dist-info/METADATA,sha256=aWI-NtzxzF66gfP7jH3oVf7n6HDtAzvuJg8biTFk7Cw,14986
8
+ dvc_utils-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ dvc_utils-0.3.1.dist-info/entry_points.txt,sha256=VHnkBNH2bPbP1wX8-rKK_T13f3e9tnmAGBcrP0hjTr0,85
10
+ dvc_utils-0.3.1.dist-info/licenses/LICENSE,sha256=ZS8AReay7xmQzBAHwxIuTouGXz3SKgUa2_Sz8Ip0EzQ,1070
11
+ dvc_utils-0.3.1.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.45.1)
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ dvc-diff = dvc_utils.diff:dvc_diff
3
+ dvc-utils = dvc_utils.main:main
@@ -1,9 +0,0 @@
1
- dvc_utils/__init__.py,sha256=mP-p1Sl2JMMShM_hRhu86pFNfIq_8E_feh1CN47LWcs,86
2
- dvc_utils/cli.py,sha256=d27Q8K77ZxwWRtHJqn_70MhfOYF8ybsCEamG7wrAyjU,5079
3
- dvc_utils/path.py,sha256=eEP-r6o33BZsEz6JNllzEbU8ficW9slAFMJ2l02IRrY,2903
4
- dvc_utils-0.2.0.dist-info/LICENSE,sha256=ZS8AReay7xmQzBAHwxIuTouGXz3SKgUa2_Sz8Ip0EzQ,1070
5
- dvc_utils-0.2.0.dist-info/METADATA,sha256=ppHbZlkyxL015AhfAWcx9H6UGWLNt2078FjdMIQgCok,14977
6
- dvc_utils-0.2.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
7
- dvc_utils-0.2.0.dist-info/entry_points.txt,sha256=luxCQr8OS-jMSyyDhB9KDQhUbP8UH6UMcy-vkfXX7Gg,88
8
- dvc_utils-0.2.0.dist-info/top_level.txt,sha256=jT0-PJa2t_eFRE9rn-52AjdnZ8nQeEHllf2kJmaGh80,10
9
- dvc_utils-0.2.0.dist-info/RECORD,,
@@ -1,3 +0,0 @@
1
- [console_scripts]
2
- dvc-diff = dvc_utils.cli:dvc_utils_diff
3
- dvc-utils = dvc_utils.cli:cli
@@ -1 +0,0 @@
1
- dvc_utils