dvc-utils 0.0.3__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dvc-utils
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Summary: CLI for diffing DVC files at two commits (or one commit vs. current worktree), optionally passing both through another command first
5
5
  Home-page: https://github.com/runsascoded/dvc-utils
6
6
  Author: Ryan Williams
@@ -29,6 +29,7 @@ pip install dvc-utils
29
29
  ```
30
30
 
31
31
  ## Usage <a id="usage"></a>
32
+ <!-- `bmdf -- dvc-utils --help` -->
32
33
  ```bash
33
34
  dvc-utils --help
34
35
  # Usage: dvc-utils [OPTIONS] COMMAND [ARGS]...
@@ -42,6 +43,7 @@ dvc-utils --help
42
43
  ```
43
44
 
44
45
  ### `dvc-utils diff` <a id="dvc-utils-diff"></a>
46
+ <!-- `bmdf -- dvc-utils diff --help` -->
45
47
  ```bash
46
48
  dvc-utils diff --help
47
49
  # Usage: dvc-utils diff [OPTIONS] [cmd...] <path>
@@ -59,11 +61,15 @@ dvc-utils diff --help
59
61
  # optional) at HEAD (last committed value) vs. the current worktree content.
60
62
  #
61
63
  # Options:
62
- # -r, --refspec TEXT <commit 1>..<commit 2> (compare two commits) or <commit>
63
- # (compare <commit> to the worktree)
64
- # -S, --no-shell Don't pass `shell=True` to Python `subprocess`es
65
- # -v, --verbose Log intermediate commands to stderr
66
- # --help Show this message and exit.
64
+ # -c, --color Colorize the output
65
+ # -r, --refspec TEXT <commit 1>..<commit 2> (compare two commits) or
66
+ # <commit> (compare <commit> to the worktree)
67
+ # -S, --no-shell Don't pass `shell=True` to Python `subprocess`es
68
+ # -U, --unified INTEGER Number of lines of context to show (passes through
69
+ # to `diff`)
70
+ # -v, --verbose Log intermediate commands to stderr
71
+ # -w, --ignore-whitespace Ignore whitespace differences (pass `-w` to `diff`)
72
+ # --help Show this message and exit.
67
73
  ```
68
74
 
69
75
  ## Examples <a id="examples"></a>
@@ -18,6 +18,7 @@ pip install dvc-utils
18
18
  ```
19
19
 
20
20
  ## Usage <a id="usage"></a>
21
+ <!-- `bmdf -- dvc-utils --help` -->
21
22
  ```bash
22
23
  dvc-utils --help
23
24
  # Usage: dvc-utils [OPTIONS] COMMAND [ARGS]...
@@ -31,6 +32,7 @@ dvc-utils --help
31
32
  ```
32
33
 
33
34
  ### `dvc-utils diff` <a id="dvc-utils-diff"></a>
35
+ <!-- `bmdf -- dvc-utils diff --help` -->
34
36
  ```bash
35
37
  dvc-utils diff --help
36
38
  # Usage: dvc-utils diff [OPTIONS] [cmd...] <path>
@@ -48,11 +50,15 @@ dvc-utils diff --help
48
50
  # optional) at HEAD (last committed value) vs. the current worktree content.
49
51
  #
50
52
  # Options:
51
- # -r, --refspec TEXT <commit 1>..<commit 2> (compare two commits) or <commit>
52
- # (compare <commit> to the worktree)
53
- # -S, --no-shell Don't pass `shell=True` to Python `subprocess`es
54
- # -v, --verbose Log intermediate commands to stderr
55
- # --help Show this message and exit.
53
+ # -c, --color Colorize the output
54
+ # -r, --refspec TEXT <commit 1>..<commit 2> (compare two commits) or
55
+ # <commit> (compare <commit> to the worktree)
56
+ # -S, --no-shell Don't pass `shell=True` to Python `subprocess`es
57
+ # -U, --unified INTEGER Number of lines of context to show (passes through
58
+ # to `diff`)
59
+ # -v, --verbose Log intermediate commands to stderr
60
+ # -w, --ignore-whitespace Ignore whitespace differences (pass `-w` to `diff`)
61
+ # --help Show this message and exit.
56
62
  ```
57
63
 
58
64
  ## Examples <a id="examples"></a>
@@ -0,0 +1,233 @@
1
+ from functools import cache
2
+ from os import environ as env, getcwd
3
+ from os.path import join, relpath
4
+ import shlex
5
+ from subprocess import Popen, PIPE
6
+ from typing import Optional, Tuple
7
+
8
+ from click import option, argument, group
9
+ import click
10
+ import yaml
11
+ from utz import process, singleton, err
12
+
13
+ from dvc_utils.named_pipes import named_pipes
14
+
15
+
16
+ @group()
17
+ def cli():
18
+ pass
19
+
20
+
21
+ def dvc_paths(path: str) -> Tuple[str, str]:
22
+ if path.endswith('.dvc'):
23
+ dvc_path = path
24
+ path = dvc_path[:-len('.dvc')]
25
+ else:
26
+ dvc_path = f'{path}.dvc'
27
+ return path, dvc_path
28
+
29
+
30
+ @cache
31
+ def get_git_root() -> str:
32
+ return process.line('git', 'rev-parse', '--show-toplevel', log=False)
33
+
34
+
35
+ @cache
36
+ def get_dir_path() -> str:
37
+ return relpath(getcwd(), get_git_root())
38
+
39
+
40
+ @cache
41
+ def dvc_cache_dir(log: bool = False) -> str:
42
+ dvc_cache_relpath = env.get('DVC_UTILS_CACHE_DIR')
43
+ if dvc_cache_relpath:
44
+ return join(get_git_root(), dvc_cache_relpath)
45
+ else:
46
+ return process.line('dvc', 'cache', 'dir', log=log)
47
+
48
+
49
+ def dvc_md5(git_ref: str, dvc_path: str, log: bool = False) -> str:
50
+ dir_path = get_dir_path()
51
+ dir_path = '' if dir_path == '.' else f'{dir_path}/'
52
+ dvc_spec = process.output('git', 'show', f'{git_ref}:{dir_path}{dvc_path}', log=err if log else None)
53
+ dvc_obj = yaml.safe_load(dvc_spec)
54
+ out = singleton(dvc_obj['outs'], dedupe=False)
55
+ md5 = out['md5']
56
+ return md5
57
+
58
+
59
+ def dvc_cache_path(ref: str, dvc_path: Optional[str] = None, log: bool = False) -> str:
60
+ if dvc_path:
61
+ md5 = dvc_md5(ref, dvc_path, log=log)
62
+ elif ':' in ref:
63
+ git_ref, dvc_path = ref.split(':', 1)
64
+ md5 = dvc_md5(git_ref, dvc_path, log=log)
65
+ else:
66
+ md5 = ref
67
+ dirname = md5[:2]
68
+ basename = md5[2:]
69
+ return join(dvc_cache_dir(log=log), 'files', 'md5', dirname, basename)
70
+
71
+
72
+ def diff_cmds(
73
+ cmds1: list[str],
74
+ cmds2: list[str],
75
+ verbose: bool = False,
76
+ color: bool = False,
77
+ unified: int | None = None,
78
+ ignore_whitespace: bool = False,
79
+ **kwargs,
80
+ ):
81
+ """Run two sequences of piped commands and diff their output.
82
+
83
+ Args:
84
+ cmds1: First sequence of commands to pipe together
85
+ cmds2: Second sequence of commands to pipe together
86
+ verbose: Whether to print commands being executed
87
+ color: Whether to show colored diff output
88
+ unified: Number of unified context lines, or None
89
+ ignore_whitespace: Whether to ignore whitespace changes
90
+ **kwargs: Additional arguments passed to subprocess.Popen
91
+
92
+ Each command sequence will be piped together before being compared.
93
+ For example, if cmds1 = ['cat foo.txt', 'sort'], the function will
94
+ execute 'cat foo.txt | sort' before comparing with cmds2's output.
95
+
96
+ Adapted from https://stackoverflow.com/a/28840955"""
97
+ with named_pipes(n=2) as pipes:
98
+ (pipe1, pipe2) = pipes
99
+ diff_cmd = [
100
+ 'diff',
101
+ *(['-w'] if ignore_whitespace else []),
102
+ *(['-U', str(unified)] if unified is not None else []),
103
+ *(['--color=always'] if color else []),
104
+ pipe1,
105
+ pipe2,
106
+ ]
107
+ diff = Popen(diff_cmd)
108
+ processes = []
109
+
110
+ for pipe, cmds in ((pipe1, cmds1), (pipe2, cmds2)):
111
+ if verbose:
112
+ err(f"Running pipeline: {' | '.join(cmds)}")
113
+
114
+ # Create the pipeline of processes
115
+ prev_process = None
116
+ for i, cmd in enumerate(cmds):
117
+ is_last = i + 1 == len(cmds)
118
+
119
+ # For the first process, take input from the original source
120
+ stdin = None if prev_process is None else prev_process.stdout
121
+
122
+ # For the last process, output to the named pipe
123
+ if is_last:
124
+ with open(pipe, 'wb', 0) as pipe_fd:
125
+ proc = Popen(
126
+ cmd,
127
+ stdin=stdin,
128
+ stdout=pipe_fd,
129
+ close_fds=True,
130
+ **kwargs
131
+ )
132
+ # For intermediate processes, output to a pipe
133
+ else:
134
+ proc = Popen(
135
+ cmd,
136
+ stdin=stdin,
137
+ stdout=PIPE,
138
+ close_fds=True,
139
+ **kwargs
140
+ )
141
+
142
+ if prev_process is not None:
143
+ prev_process.stdout.close()
144
+
145
+ processes.append(proc)
146
+ prev_process = proc
147
+
148
+ for p in [diff] + processes:
149
+ p.wait()
150
+
151
+
152
+ @cli.command('diff', short_help='Diff a DVC-tracked file at two commits (or one commit vs. current worktree), optionally passing both through another command first')
153
+ @option('-c', '--color', is_flag=True, help='Colorize the output')
154
+ @option('-r', '--refspec', default='HEAD', help='<commit 1>..<commit 2> (compare two commits) or <commit> (compare <commit> to the worktree)')
155
+ @option('-S', '--no-shell', is_flag=True, help="Don't pass `shell=True` to Python `subprocess`es")
156
+ @option('-U', '--unified', type=int, help='Number of lines of context to show (passes through to `diff`)')
157
+ @option('-v', '--verbose', is_flag=True, help="Log intermediate commands to stderr")
158
+ @option('-w', '--ignore-whitespace', is_flag=True, help="Ignore whitespace differences (pass `-w` to `diff`)")
159
+ @option('-x', '--exec-cmd', 'exec_cmds', multiple=True, help='Command(s) to execute before diffing; alternate syntax to passing commands as positional arguments')
160
+ @argument('args', metavar='[exec_cmd...] <path>', nargs=-1)
161
+ def dvc_utils_diff(
162
+ color: bool,
163
+ refspec: str | None,
164
+ no_shell: bool,
165
+ unified: int | None,
166
+ verbose: bool,
167
+ ignore_whitespace: bool,
168
+ exec_cmds: Tuple[str, ...],
169
+ args: Tuple[str, ...],
170
+ ):
171
+ """Diff a file at two commits (or one commit vs. current worktree), optionally passing both through `cmd` first
172
+
173
+ Examples:
174
+
175
+ dvc-utils diff -r HEAD^..HEAD wc -l foo.dvc # Compare the number of lines (`wc -l`) in `foo` (the file referenced by `foo.dvc`) at the previous vs. current commit (`HEAD^..HEAD`).
176
+
177
+ dvc-utils diff md5sum foo # Diff the `md5sum` of `foo` (".dvc" extension is optional) at HEAD (last committed value) vs. the current worktree content.
178
+ """
179
+ if not args:
180
+ raise click.UsageError('Must specify [cmd...] <path>')
181
+
182
+ shell = not no_shell
183
+ *cmds, path = args
184
+ cmds = list(exec_cmds) + cmds
185
+
186
+ path, dvc_path = dvc_paths(path)
187
+
188
+ pcs = refspec.split('..', 1)
189
+ if len(pcs) == 1:
190
+ before = pcs[0]
191
+ after = None
192
+ elif len(pcs) == 2:
193
+ before, after = pcs
194
+ else:
195
+ raise ValueError(f"Invalid refspec: {refspec}")
196
+
197
+ log = err if verbose else False
198
+ before_path = dvc_cache_path(before, dvc_path, log=log)
199
+ after_path = path if after is None else dvc_cache_path(after, dvc_path, log=log)
200
+
201
+ if cmds:
202
+ cmd, *sub_cmds = cmds
203
+ if not shell:
204
+ sub_cmds = [ shlex.split(c) for c in sub_cmds ]
205
+ before_cmds = [
206
+ shlex.split(f'{cmd} {before_path}'),
207
+ *sub_cmds,
208
+ ]
209
+ after_cmds = [
210
+ shlex.split(f'{cmd} {after_path}'),
211
+ *sub_cmds,
212
+ ]
213
+ shell_kwargs = {}
214
+ else:
215
+ before_cmds = [ f'{cmd} {before_path}', *sub_cmds ]
216
+ after_cmds = [ f'{cmd} {after_path}', *sub_cmds ]
217
+ shell_kwargs = dict(shell=shell)
218
+
219
+ diff_cmds(
220
+ before_cmds,
221
+ after_cmds,
222
+ verbose=verbose,
223
+ color=color,
224
+ unified=unified,
225
+ ignore_whitespace=ignore_whitespace,
226
+ **shell_kwargs,
227
+ )
228
+ else:
229
+ process.run('diff', before_path, after_path, log=log)
230
+
231
+
232
+ if __name__ == '__main__':
233
+ cli()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dvc-utils
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Summary: CLI for diffing DVC files at two commits (or one commit vs. current worktree), optionally passing both through another command first
5
5
  Home-page: https://github.com/runsascoded/dvc-utils
6
6
  Author: Ryan Williams
@@ -29,6 +29,7 @@ pip install dvc-utils
29
29
  ```
30
30
 
31
31
  ## Usage <a id="usage"></a>
32
+ <!-- `bmdf -- dvc-utils --help` -->
32
33
  ```bash
33
34
  dvc-utils --help
34
35
  # Usage: dvc-utils [OPTIONS] COMMAND [ARGS]...
@@ -42,6 +43,7 @@ dvc-utils --help
42
43
  ```
43
44
 
44
45
  ### `dvc-utils diff` <a id="dvc-utils-diff"></a>
46
+ <!-- `bmdf -- dvc-utils diff --help` -->
45
47
  ```bash
46
48
  dvc-utils diff --help
47
49
  # Usage: dvc-utils diff [OPTIONS] [cmd...] <path>
@@ -59,11 +61,15 @@ dvc-utils diff --help
59
61
  # optional) at HEAD (last committed value) vs. the current worktree content.
60
62
  #
61
63
  # Options:
62
- # -r, --refspec TEXT <commit 1>..<commit 2> (compare two commits) or <commit>
63
- # (compare <commit> to the worktree)
64
- # -S, --no-shell Don't pass `shell=True` to Python `subprocess`es
65
- # -v, --verbose Log intermediate commands to stderr
66
- # --help Show this message and exit.
64
+ # -c, --color Colorize the output
65
+ # -r, --refspec TEXT <commit 1>..<commit 2> (compare two commits) or
66
+ # <commit> (compare <commit> to the worktree)
67
+ # -S, --no-shell Don't pass `shell=True` to Python `subprocess`es
68
+ # -U, --unified INTEGER Number of lines of context to show (passes through
69
+ # to `diff`)
70
+ # -v, --verbose Log intermediate commands to stderr
71
+ # -w, --ignore-whitespace Ignore whitespace differences (pass `-w` to `diff`)
72
+ # --help Show this message and exit.
67
73
  ```
68
74
 
69
75
  ## Examples <a id="examples"></a>
@@ -2,7 +2,7 @@ from setuptools import setup
2
2
 
3
3
  setup(
4
4
  name='dvc-utils',
5
- version="0.0.3",
5
+ version="0.0.5",
6
6
  description="CLI for diffing DVC files at two commits (or one commit vs. current worktree), optionally passing both through another command first",
7
7
  long_description=open("README.md").read(),
8
8
  long_description_content_type="text/markdown",
@@ -1,148 +0,0 @@
1
- from functools import cache
2
- from os import environ as env, getcwd
3
-
4
- from typing import Optional, Tuple
5
-
6
- import shlex
7
- from os.path import join, relpath
8
-
9
- from click import option, argument, group
10
- from subprocess import Popen
11
-
12
- import click
13
- import yaml
14
- from utz import process, singleton, err
15
-
16
- from dvc_utils.named_pipes import named_pipes
17
-
18
-
19
- @group()
20
- def cli():
21
- pass
22
-
23
-
24
- def dvc_paths(path: str) -> Tuple[str, str]:
25
- if path.endswith('.dvc'):
26
- dvc_path = path
27
- path = dvc_path[:-len('.dvc')]
28
- else:
29
- dvc_path = f'{path}.dvc'
30
- return path, dvc_path
31
-
32
-
33
- @cache
34
- def get_git_root() -> str:
35
- return process.line('git', 'rev-parse', '--show-toplevel', log=False)
36
-
37
-
38
- @cache
39
- def get_dir_path() -> str:
40
- return relpath(getcwd(), get_git_root())
41
-
42
-
43
- @cache
44
- def dvc_cache_dir(log: bool = False) -> str:
45
- dvc_cache_relpath = env.get('DVC_UTILS_CACHE_DIR')
46
- if dvc_cache_relpath:
47
- return join(get_git_root(), dvc_cache_relpath)
48
- else:
49
- return process.line('dvc', 'cache', 'dir', log=log)
50
-
51
-
52
- def dvc_md5(git_ref: str, dvc_path: str, log: bool = False) -> str:
53
- dir_path = get_dir_path()
54
- dir_path = '' if dir_path == '.' else f'{dir_path}/'
55
- dvc_spec = process.output('git', 'show', f'{git_ref}:{dir_path}{dvc_path}', log=log)
56
- dvc_obj = yaml.safe_load(dvc_spec)
57
- out = singleton(dvc_obj['outs'], dedupe=False)
58
- md5 = out['md5']
59
- return md5
60
-
61
-
62
- def dvc_cache_path(ref: str, dvc_path: Optional[str] = None, log: bool = False) -> str:
63
- if dvc_path:
64
- md5 = dvc_md5(ref, dvc_path, log=log)
65
- elif ':' in ref:
66
- git_ref, dvc_path = ref.split(':', 1)
67
- md5 = dvc_md5(git_ref, dvc_path, log=log)
68
- else:
69
- md5 = ref
70
- dirname = md5[:2]
71
- basename = md5[2:]
72
- return join(dvc_cache_dir(log=log), 'files', 'md5', dirname, basename)
73
-
74
-
75
- def diff_cmds(cmd1: str, cmd2: str, verbose: bool = False, **kwargs):
76
- """Run two commands and diff their output.
77
-
78
- Adapted from https://stackoverflow.com/a/28840955"""
79
- with named_pipes(n=2) as pipes:
80
- (pipe1, pipe2) = pipes
81
- diff = Popen(['diff'] + pipes)
82
- processes = []
83
- for path, cmd in ((pipe1, cmd1), (pipe2, cmd2)):
84
- with open(path, 'wb', 0) as pipe:
85
- if verbose:
86
- err(f"Running: {cmd}")
87
- processes.append(Popen(cmd, stdout=pipe, close_fds=True, **kwargs))
88
- for p in [diff] + processes:
89
- p.wait()
90
-
91
-
92
- @cli.command('diff', short_help='Diff a DVC-tracked file at two commits (or one commit vs. current worktree), optionally passing both through another command first')
93
- @option('-r', '--refspec', default='HEAD', help='<commit 1>..<commit 2> (compare two commits) or <commit> (compare <commit> to the worktree)')
94
- @option('-S', '--no-shell', is_flag=True, help="Don't pass `shell=True` to Python `subprocess`es")
95
- @option('-v', '--verbose', is_flag=True, help="Log intermediate commands to stderr")
96
- @argument('args', metavar='[cmd...] <path>', nargs=-1)
97
- def dvc_utils_diff(refspec, no_shell, verbose, args):
98
- """Diff a file at two commits (or one commit vs. current worktree), optionally passing both through `cmd` first
99
-
100
- Examples:
101
-
102
- dvc-utils diff -r HEAD^..HEAD wc -l foo.dvc # Compare the number of lines (`wc -l`) in `foo` (the file referenced by `foo.dvc`) at the previous vs. current commit (`HEAD^..HEAD`).
103
-
104
- dvc-utils diff md5sum foo # Diff the `md5sum` of `foo` (".dvc" extension is optional) at HEAD (last committed value) vs. the current worktree content.
105
- """
106
- if not args:
107
- raise click.UsageError('Must specify [cmd...] <path>')
108
-
109
- shell = not no_shell
110
- if len(args) == 2:
111
- cmd, path = args
112
- cmd = shlex.split(cmd)
113
- elif len(args) == 1:
114
- cmd = None
115
- path, = args
116
- else:
117
- raise click.UsageError('Maximum 2 positional args: [cmd] <path>')
118
-
119
- path, dvc_path = dvc_paths(path)
120
-
121
- pcs = refspec.split('..', 1)
122
- if len(pcs) == 1:
123
- before = pcs[0]
124
- after = None
125
- elif len(pcs) == 2:
126
- before, after = pcs
127
- else:
128
- raise ValueError(f"Invalid refspec: {refspec}")
129
-
130
- log = err if verbose else False
131
- before_path = dvc_cache_path(before, dvc_path, log=log)
132
- after_path = path if after is None else dvc_cache_path(after, dvc_path, log=log)
133
-
134
- if cmd:
135
- def args(path: str):
136
- arr = cmd + [path]
137
- return shlex.join(arr) if shell else arr
138
-
139
- shell_kwargs = dict(shell=shell) if shell else {}
140
- before_cmd = args(before_path)
141
- after_cmd = args(after_path)
142
- diff_cmds(before_cmd, after_cmd, verbose=verbose, **shell_kwargs)
143
- else:
144
- process.run('diff', before_path, after_path, log=log)
145
-
146
-
147
- if __name__ == '__main__':
148
- cli()
File without changes
File without changes