dvc-utils 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dvc_utils/main.py CHANGED
@@ -1,5 +1,12 @@
1
+ from functools import cache
2
+ from os import environ as env, getcwd
3
+
4
+ from typing import Optional, Tuple
5
+
1
6
  import shlex
2
- from os.path import join
7
+ from os.path import join, relpath
8
+
9
+ from click import option, argument, group
3
10
  from subprocess import Popen
4
11
 
5
12
  import click
@@ -9,12 +16,12 @@ from utz import process, singleton, err
9
16
  from dvc_utils.named_pipes import named_pipes
10
17
 
11
18
 
12
- @click.group()
19
+ @group()
13
20
  def cli():
14
21
  pass
15
22
 
16
23
 
17
- def dvc_paths(path):
24
+ def dvc_paths(path: str) -> Tuple[str, str]:
18
25
  if path.endswith('.dvc'):
19
26
  dvc_path = path
20
27
  path = dvc_path[:-len('.dvc')]
@@ -23,55 +30,98 @@ def dvc_paths(path):
23
30
  return path, dvc_path
24
31
 
25
32
 
26
- def dvc_md5(git_ref, dvc_path, log=False):
27
- dvc_spec = process.output('git', 'show', f'{git_ref}:{dvc_path}', log=log)
33
+ @cache
34
+ def get_git_root() -> str:
35
+ return process.line('git', 'rev-parse', '--show-toplevel', log=False)
36
+
37
+
38
+ @cache
39
+ def get_dir_path() -> str:
40
+ return relpath(getcwd(), get_git_root())
41
+
42
+
43
+ @cache
44
+ def dvc_cache_dir(log: bool = False) -> str:
45
+ dvc_cache_relpath = env.get('DVC_UTILS_CACHE_DIR')
46
+ if dvc_cache_relpath:
47
+ return join(get_git_root(), dvc_cache_relpath)
48
+ else:
49
+ return process.line('dvc', 'cache', 'dir', log=log)
50
+
51
+
52
+ def dvc_md5(git_ref: str, dvc_path: str, log: bool = False) -> str:
53
+ dir_path = get_dir_path()
54
+ dir_path = '' if dir_path == '.' else f'{dir_path}/'
55
+ dvc_spec = process.output('git', 'show', f'{git_ref}:{dir_path}{dvc_path}', log=log)
28
56
  dvc_obj = yaml.safe_load(dvc_spec)
29
57
  out = singleton(dvc_obj['outs'], dedupe=False)
30
58
  md5 = out['md5']
31
59
  return md5
32
60
 
33
61
 
34
- _dvc_cache_dir = None
35
- def dvc_cache_dir(log=False):
36
- global _dvc_cache_dir
37
- if _dvc_cache_dir is None:
38
- _dvc_cache_dir = process.line('dvc', 'cache', 'dir', log=log)
39
- return _dvc_cache_dir
40
-
41
-
42
- def dvc_cache_path(spec, dvc_path=None, log=False):
62
+ def dvc_cache_path(ref: str, dvc_path: Optional[str] = None, log: bool = False) -> str:
43
63
  if dvc_path:
44
- md5 = dvc_md5(spec, dvc_path, log=log)
45
- elif ':' in spec:
46
- git_ref, dvc_path = spec.split(':', 1)
64
+ md5 = dvc_md5(ref, dvc_path, log=log)
65
+ elif ':' in ref:
66
+ git_ref, dvc_path = ref.split(':', 1)
47
67
  md5 = dvc_md5(git_ref, dvc_path, log=log)
48
68
  else:
49
- md5 = spec
69
+ md5 = ref
50
70
  dirname = md5[:2]
51
71
  basename = md5[2:]
52
72
  return join(dvc_cache_dir(log=log), 'files', 'md5', dirname, basename)
53
73
 
54
74
 
55
- def diff_cmds(cmd1, cmd2, **kwargs):
75
+ def diff_cmds(
76
+ cmd1: str,
77
+ cmd2: str,
78
+ verbose: bool = False,
79
+ color: bool = False,
80
+ unified: int | None = None,
81
+ ignore_whitespace: bool = False,
82
+ **kwargs,
83
+ ):
56
84
  """Run two commands and diff their output.
57
85
 
58
86
  Adapted from https://stackoverflow.com/a/28840955"""
59
- with named_pipes(n=2) as paths:
60
- someprogram = Popen(['diff'] + paths)
87
+ with named_pipes(n=2) as pipes:
88
+ (pipe1, pipe2) = pipes
89
+ diff_cmd = [
90
+ 'diff',
91
+ *(['-w'] if ignore_whitespace else []),
92
+ *(['-U', str(unified)] if unified is not None else []),
93
+ *(['--color=always'] if color else []),
94
+ pipe1,
95
+ pipe2,
96
+ ]
97
+ diff = Popen(diff_cmd)
61
98
  processes = []
62
- for path, cmd in zip(paths, [ cmd1, cmd2 ]):
99
+ for path, cmd in ((pipe1, cmd1), (pipe2, cmd2)):
63
100
  with open(path, 'wb', 0) as pipe:
101
+ if verbose:
102
+ err(f"Running: {cmd}")
64
103
  processes.append(Popen(cmd, stdout=pipe, close_fds=True, **kwargs))
65
- for p in [someprogram] + processes:
104
+ for p in [diff] + processes:
66
105
  p.wait()
67
106
 
68
107
 
69
108
  @cli.command('diff', short_help='Diff a DVC-tracked file at two commits (or one commit vs. current worktree), optionally passing both through another command first')
70
- @click.option('-r', '--refspec', default='HEAD', help='<commit 1>..<commit 2> (compare two commits) or <commit> (compare <commit> to the worktree)')
71
- @click.option('-S', '--no-shell', is_flag=True, help="Don't pass `shell=True` to Python `subprocess`es")
72
- @click.option('-v', '--verbose', is_flag=True, help="Log intermediate commands to stderr")
73
- @click.argument('args', metavar='[cmd...] <path>', nargs=-1)
74
- def dvc_utils_diff(refspec, no_shell, verbose, args):
109
+ @option('-c', '--color', is_flag=True, help='Colorize the output')
110
+ @option('-r', '--refspec', default='HEAD', help='<commit 1>..<commit 2> (compare two commits) or <commit> (compare <commit> to the worktree)')
111
+ @option('-S', '--no-shell', is_flag=True, help="Don't pass `shell=True` to Python `subprocess`es")
112
+ @option('-U', '--unified', type=int, help='Number of lines of context to show (passes through to `diff`)')
113
+ @option('-v', '--verbose', is_flag=True, help="Log intermediate commands to stderr")
114
+ @option('-w', '--ignore-whitespace', is_flag=True, help="Ignore whitespace differences (pass `-w` to `diff`)")
115
+ @argument('args', metavar='[cmd...] <path>', nargs=-1)
116
+ def dvc_utils_diff(
117
+ color: bool,
118
+ refspec: str | None,
119
+ no_shell: bool,
120
+ unified: int | None,
121
+ verbose: bool,
122
+ ignore_whitespace: bool,
123
+ args: Tuple[str, ...],
124
+ ):
75
125
  """Diff a file at two commits (or one commit vs. current worktree), optionally passing both through `cmd` first
76
126
 
77
127
  Examples:
@@ -84,33 +134,47 @@ def dvc_utils_diff(refspec, no_shell, verbose, args):
84
134
  raise click.UsageError('Must specify [cmd...] <path>')
85
135
 
86
136
  shell = not no_shell
87
- (*cmd, path) = args
88
- if path.endswith('.dvc'):
89
- dvc_path = path
90
- path = dvc_path[:-len('.dvc')]
137
+ if len(args) == 2:
138
+ cmd, path = args
139
+ cmd = shlex.split(cmd)
140
+ elif len(args) == 1:
141
+ cmd = None
142
+ path, = args
91
143
  else:
92
- dvc_path = f'{path}.dvc'
144
+ raise click.UsageError('Maximum 2 positional args: [cmd] <path>')
145
+
146
+ path, dvc_path = dvc_paths(path)
93
147
 
94
148
  pcs = refspec.split('..', 1)
95
149
  if len(pcs) == 1:
96
150
  before = pcs[0]
97
151
  after = None
98
- else:
152
+ elif len(pcs) == 2:
99
153
  before, after = pcs
154
+ else:
155
+ raise ValueError(f"Invalid refspec: {refspec}")
100
156
 
101
157
  log = err if verbose else False
102
158
  before_path = dvc_cache_path(before, dvc_path, log=log)
103
159
  after_path = path if after is None else dvc_cache_path(after, dvc_path, log=log)
104
160
 
105
161
  if cmd:
106
- def args(path):
162
+ def args(path: str):
107
163
  arr = cmd + [path]
108
164
  return shlex.join(arr) if shell else arr
109
165
 
110
166
  shell_kwargs = dict(shell=shell) if shell else {}
111
167
  before_cmd = args(before_path)
112
168
  after_cmd = args(after_path)
113
- diff_cmds(before_cmd, after_cmd, **shell_kwargs)
169
+ diff_cmds(
170
+ before_cmd,
171
+ after_cmd,
172
+ verbose=verbose,
173
+ color=color,
174
+ unified=unified,
175
+ ignore_whitespace=ignore_whitespace,
176
+ **shell_kwargs,
177
+ )
114
178
  else:
115
179
  process.run('diff', before_path, after_path, log=log)
116
180
 
dvc_utils/named_pipes.py CHANGED
@@ -5,7 +5,7 @@ from contextlib import contextmanager
5
5
 
6
6
 
7
7
  @contextmanager
8
- def named_pipes(n=1):
8
+ def named_pipes(n: int = 1):
9
9
  """Yield a list of paths to named pipes that are created and destroyed
10
10
 
11
11
  From https://stackoverflow.com/a/28840955"""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dvc-utils
3
- Version: 0.0.2
3
+ Version: 0.0.4
4
4
  Summary: CLI for diffing DVC files at two commits (or one commit vs. current worktree), optionally passing both through another command first
5
5
  Home-page: https://github.com/runsascoded/dvc-utils
6
6
  Author: Ryan Williams
@@ -10,14 +10,26 @@ Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
 
12
12
  # dvc-utils
13
- CLI for diffing [DVC] files at two commits (or one commit vs. current worktree), optionally passing both through another command first
14
-
15
- ## Installation
13
+ CLI for diffing [DVC] files, optionally passing both through another command first
14
+
15
+ <!-- toc -->
16
+ - [Installation](#installation)
17
+ - [Usage](#usage)
18
+ - [`dvc-utils diff`](#dvc-utils-diff)
19
+ - [Examples](#examples)
20
+ - [Parquet file](#parquet-diff)
21
+ - [Schema diff](#parquet-schema-diff)
22
+ - [Row diff](#parquet-row-diff)
23
+ - [Row count diff](#parquet-row-count-diff)
24
+ <!-- /toc -->
25
+
26
+ ## Installation <a id="installation"></a>
16
27
  ```bash
17
28
  pip install dvc-utils
18
29
  ```
19
30
 
20
- ## Usage
31
+ ## Usage <a id="usage"></a>
32
+ <!-- `bmdf -- dvc-utils --help` -->
21
33
  ```bash
22
34
  dvc-utils --help
23
35
  # Usage: dvc-utils [OPTIONS] COMMAND [ARGS]...
@@ -30,7 +42,8 @@ dvc-utils --help
30
42
  # worktree), optionally passing both through another command first
31
43
  ```
32
44
 
33
- ### `dvc-utils diff`
45
+ ### `dvc-utils diff` <a id="dvc-utils-diff"></a>
46
+ <!-- `bmdf -- dvc-utils diff --help` -->
34
47
  ```bash
35
48
  dvc-utils diff --help
36
49
  # Usage: dvc-utils diff [OPTIONS] [cmd...] <path>
@@ -48,24 +61,31 @@ dvc-utils diff --help
48
61
  # optional) at HEAD (last committed value) vs. the current worktree content.
49
62
  #
50
63
  # Options:
51
- # -r, --refspec TEXT <commit 1>..<commit 2> (compare two commits) or <commit>
52
- # (compare <commit> to the worktree)
53
- # -S, --no-shell Don't pass `shell=True` to Python `subprocess`es
54
- # -v, --verbose Log intermediate commands to stderr
55
- # --help Show this message and exit.
64
+ # -c, --color Colorize the output
65
+ # -r, --refspec TEXT <commit 1>..<commit 2> (compare two commits) or
66
+ # <commit> (compare <commit> to the worktree)
67
+ # -S, --no-shell Don't pass `shell=True` to Python `subprocess`es
68
+ # -U, --unified INTEGER Number of lines of context to show (passes through
69
+ # to `diff`)
70
+ # -v, --verbose Log intermediate commands to stderr
71
+ # -w, --ignore-whitespace Ignore whitespace differences (pass `-w` to `diff`)
72
+ # --help Show this message and exit.
56
73
  ```
57
74
 
58
- ## Examples
75
+ ## Examples <a id="examples"></a>
76
+
77
+ ### Parquet file <a id="parquet-diff"></a>
59
78
  See sample commands and output below for inspecting changes to [a DVC-tracked Parquet file][commit path] in [a given commit][commit].
60
79
 
80
+ Setup:
61
81
  ```bash
62
- git clone https://github.com/neighbor-ryan/nj-crashes
63
- commit=c8ae28e
64
- path=njdot/data/2001/NewJersey2001Accidents.pqt.dvc
82
+ git clone https://github.com/hudcostreets/nj-crashes && cd nj-crashes # Clone + enter example repo
83
+ commit=c8ae28e # Example commit that changed some DVC-tracked Parquet files
84
+ path=njdot/data/2001/NewJersey2001Accidents.pqt.dvc # One of the changed files
65
85
  ```
66
86
 
67
- ### Parquet schema diff
68
- Use [`parquet2json`] to observe schema changes to a Parquet file, in [a given commit][commit] from [neighbor-ryan/nj-crashes]:
87
+ #### Schema diff <a id="parquet-schema-diff"></a>
88
+ Use [`parquet2json`] to observe schema changes to a Parquet file:
69
89
  ```bash
70
90
  parquet_schema() {
71
91
  parquet2json "$1" schema
@@ -120,11 +140,12 @@ Here we can see that various date/time columns were consolidated, and several st
120
140
 
121
141
  </details>
122
142
 
123
- ### Parquet row diff
124
- Diff the first row of the Parquet file above (pretty-printed as JSON), before and after the given commit:
143
+ #### Row diff <a id="parquet-row-diff"></a>
144
+ Diff the first row of the Parquet file above (pretty-printed as JSON using [`jq`]), before and after the given commit:
125
145
 
126
146
  ```bash
127
147
  pretty_print_first_row() {
148
+ # Print first row of Parquet file as JSON, pretty-print with jq
128
149
  parquet2json "$1" cat -l 1 | jq .
129
150
  }
130
151
  export -f pretty_print_first_row
@@ -181,7 +202,7 @@ This reflects the schema changes above.
181
202
 
182
203
  </details>
183
204
 
184
- ### Parquet row count diff
205
+ #### Row count diff <a id="parquet-row-count-diff"></a>
185
206
  ```bash
186
207
  parquet_row_count() {
187
208
  parquet2json "$1" rowcount
@@ -194,8 +215,9 @@ This time we get no output; [the given `$commit`][commit] didn't change the row
194
215
 
195
216
  [DVC]: https://dvc.org/
196
217
  [`parquet2json`]: https://github.com/jupiter/parquet2json
197
- [neighbor-ryan/nj-crashes]: https://github.com/neighbor-ryan/nj-crashes
218
+ [hudcostreets/nj-crashes]: https://github.com/hudcostreets/nj-crashes
198
219
  [Parquet]: https://parquet.apache.org/
199
- [commit]: https://github.com/neighbor-ryan/nj-crashes/commit/c8ae28e64f4917895d84074913f48e0a7afbc3d7
200
- [commit path]: https://github.com/neighbor-ryan/nj-crashes/commit/c8ae28e64f4917895d84074913f48e0a7afbc3d7#diff-7f812dce61e0996354f4af414203e0933ccdfe9613cb406c40c1c41a14b9769c
201
- [neighbor-ryan/nj-crashes]: https://github.com/neighbor-ryan/nj-crashes
220
+ [commit]: https://github.com/hudcostreets/nj-crashes/commit/c8ae28e64f4917895d84074913f48e0a7afbc3d7
221
+ [commit path]: https://github.com/hudcostreets/nj-crashes/commit/c8ae28e64f4917895d84074913f48e0a7afbc3d7#diff-7f812dce61e0996354f4af414203e0933ccdfe9613cb406c40c1c41a14b9769c
222
+ [hudcostreets/nj-crashes]: https://github.com/hudcostreets/nj-crashes
223
+ [`jq`]: https://jqlang.github.io/jq/
@@ -0,0 +1,9 @@
1
+ dvc_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ dvc_utils/main.py,sha256=oszbPch2tASbhKQunE9DiiOZxUHkfd_s2iHWqDM5vZg,5687
3
+ dvc_utils/named_pipes.py,sha256=VQ2t9BYCazFq_-MABj4t2HS7GHDvSqXXx8fOLz5DsTc,492
4
+ dvc_utils-0.0.4.dist-info/LICENSE,sha256=ZS8AReay7xmQzBAHwxIuTouGXz3SKgUa2_Sz8Ip0EzQ,1070
5
+ dvc_utils-0.0.4.dist-info/METADATA,sha256=Pr8ov2afc0wlJkzWWqTNoey-LvrKRSIrG8JM_x4q03A,6924
6
+ dvc_utils-0.0.4.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
7
+ dvc_utils-0.0.4.dist-info/entry_points.txt,sha256=W9OuZ6CX8QF9ojbqLtfXFo8Q2hnJ-zlcGY4_7nO8paM,49
8
+ dvc_utils-0.0.4.dist-info/top_level.txt,sha256=jT0-PJa2t_eFRE9rn-52AjdnZ8nQeEHllf2kJmaGh80,10
9
+ dvc_utils-0.0.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.3)
2
+ Generator: bdist_wheel (0.44.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,9 +0,0 @@
1
- dvc_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- dvc_utils/main.py,sha256=_D_FVRVnBGRHb6XIeTLOOFKc0v5J9D8_O6thG8_lSmU,3863
3
- dvc_utils/named_pipes.py,sha256=GqWvsvTMmnkjk0gPM1aXBIW5dUsSkW-eblerHJ18B68,485
4
- dvc_utils-0.0.2.dist-info/LICENSE,sha256=ZS8AReay7xmQzBAHwxIuTouGXz3SKgUa2_Sz8Ip0EzQ,1070
5
- dvc_utils-0.0.2.dist-info/METADATA,sha256=92Uu2g1qLI9qpHoKvvndj5xapXnPcQN-sxhxK2PTvow,5896
6
- dvc_utils-0.0.2.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
7
- dvc_utils-0.0.2.dist-info/entry_points.txt,sha256=W9OuZ6CX8QF9ojbqLtfXFo8Q2hnJ-zlcGY4_7nO8paM,49
8
- dvc_utils-0.0.2.dist-info/top_level.txt,sha256=jT0-PJa2t_eFRE9rn-52AjdnZ8nQeEHllf2kJmaGh80,10
9
- dvc_utils-0.0.2.dist-info/RECORD,,