dvc-utils 0.0.5__tar.gz → 0.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dvc-utils-0.0.5 → dvc-utils-0.0.7}/PKG-INFO +13 -9
- {dvc-utils-0.0.5 → dvc-utils-0.0.7}/README.md +12 -8
- {dvc-utils-0.0.5 → dvc-utils-0.0.7}/dvc_utils/main.py +1 -84
- {dvc-utils-0.0.5 → dvc-utils-0.0.7}/dvc_utils.egg-info/PKG-INFO +13 -9
- {dvc-utils-0.0.5 → dvc-utils-0.0.7}/dvc_utils.egg-info/SOURCES.txt +0 -1
- {dvc-utils-0.0.5 → dvc-utils-0.0.7}/dvc_utils.egg-info/entry_points.txt +1 -0
- {dvc-utils-0.0.5 → dvc-utils-0.0.7}/setup.py +2 -1
- dvc-utils-0.0.5/dvc_utils/named_pipes.py +0 -19
- {dvc-utils-0.0.5 → dvc-utils-0.0.7}/LICENSE +0 -0
- {dvc-utils-0.0.5 → dvc-utils-0.0.7}/dvc_utils/__init__.py +0 -0
- {dvc-utils-0.0.5 → dvc-utils-0.0.7}/dvc_utils.egg-info/dependency_links.txt +0 -0
- {dvc-utils-0.0.5 → dvc-utils-0.0.7}/dvc_utils.egg-info/top_level.txt +0 -0
- {dvc-utils-0.0.5 → dvc-utils-0.0.7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dvc-utils
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.7
|
4
4
|
Summary: CLI for diffing DVC files at two commits (or one commit vs. current worktree), optionally passing both through another command first
|
5
5
|
Home-page: https://github.com/runsascoded/dvc-utils
|
6
6
|
Author: Ryan Williams
|
@@ -15,7 +15,7 @@ CLI for diffing [DVC] files, optionally passing both through another command fir
|
|
15
15
|
<!-- toc -->
|
16
16
|
- [Installation](#installation)
|
17
17
|
- [Usage](#usage)
|
18
|
-
- [`dvc-
|
18
|
+
- [`dvc-diff`](#dvc-diff)
|
19
19
|
- [Examples](#examples)
|
20
20
|
- [Parquet file](#parquet-diff)
|
21
21
|
- [Schema diff](#parquet-schema-diff)
|
@@ -42,11 +42,13 @@ dvc-utils --help
|
|
42
42
|
# worktree), optionally passing both through another command first
|
43
43
|
```
|
44
44
|
|
45
|
-
|
46
|
-
|
45
|
+
The single subcommand, `dvc-utils diff`, is also exposed directly as `dvc-dff`:
|
46
|
+
|
47
|
+
### `dvc-diff` <a id="dvc-diff"></a>
|
48
|
+
<!-- `bmdf -- dvc-diff --help` -->
|
47
49
|
```bash
|
48
|
-
dvc-
|
49
|
-
# Usage: dvc-
|
50
|
+
dvc-diff --help
|
51
|
+
# Usage: dvc-diff [OPTIONS] [exec_cmd...] <path>
|
50
52
|
#
|
51
53
|
# Diff a file at two commits (or one commit vs. current worktree), optionally
|
52
54
|
# passing both through `cmd` first
|
@@ -69,6 +71,8 @@ dvc-utils diff --help
|
|
69
71
|
# to `diff`)
|
70
72
|
# -v, --verbose Log intermediate commands to stderr
|
71
73
|
# -w, --ignore-whitespace Ignore whitespace differences (pass `-w` to `diff`)
|
74
|
+
# -x, --exec-cmd TEXT Command(s) to execute before diffing; alternate
|
75
|
+
# syntax to passing commands as positional arguments
|
72
76
|
# --help Show this message and exit.
|
73
77
|
```
|
74
78
|
|
@@ -91,7 +95,7 @@ parquet_schema() {
|
|
91
95
|
parquet2json "$1" schema
|
92
96
|
}
|
93
97
|
export -f parquet_schema
|
94
|
-
dvc-
|
98
|
+
dvc-diff -r $commit^..$commit parquet_schema $path
|
95
99
|
```
|
96
100
|
<details><summary>Output</summary>
|
97
101
|
|
@@ -149,7 +153,7 @@ pretty_print_first_row() {
|
|
149
153
|
parquet2json "$1" cat -l 1 | jq .
|
150
154
|
}
|
151
155
|
export -f pretty_print_first_row
|
152
|
-
dvc-
|
156
|
+
dvc-diff -r $commit^..$commit pretty_print_first_row $path
|
153
157
|
```
|
154
158
|
|
155
159
|
<details><summary>Output</summary>
|
@@ -208,7 +212,7 @@ parquet_row_count() {
|
|
208
212
|
parquet2json "$1" rowcount
|
209
213
|
}
|
210
214
|
export -f parquet_row_count
|
211
|
-
dvc-
|
215
|
+
dvc-diff -r $commit^..$commit parquet_row_count $path
|
212
216
|
```
|
213
217
|
|
214
218
|
This time we get no output; [the given `$commit`][commit] didn't change the row count in the DVC-tracked Parquet file [`$path`][commit path].
|
@@ -4,7 +4,7 @@ CLI for diffing [DVC] files, optionally passing both through another command fir
|
|
4
4
|
<!-- toc -->
|
5
5
|
- [Installation](#installation)
|
6
6
|
- [Usage](#usage)
|
7
|
-
- [`dvc-
|
7
|
+
- [`dvc-diff`](#dvc-diff)
|
8
8
|
- [Examples](#examples)
|
9
9
|
- [Parquet file](#parquet-diff)
|
10
10
|
- [Schema diff](#parquet-schema-diff)
|
@@ -31,11 +31,13 @@ dvc-utils --help
|
|
31
31
|
# worktree), optionally passing both through another command first
|
32
32
|
```
|
33
33
|
|
34
|
-
|
35
|
-
|
34
|
+
The single subcommand, `dvc-utils diff`, is also exposed directly as `dvc-dff`:
|
35
|
+
|
36
|
+
### `dvc-diff` <a id="dvc-diff"></a>
|
37
|
+
<!-- `bmdf -- dvc-diff --help` -->
|
36
38
|
```bash
|
37
|
-
dvc-
|
38
|
-
# Usage: dvc-
|
39
|
+
dvc-diff --help
|
40
|
+
# Usage: dvc-diff [OPTIONS] [exec_cmd...] <path>
|
39
41
|
#
|
40
42
|
# Diff a file at two commits (or one commit vs. current worktree), optionally
|
41
43
|
# passing both through `cmd` first
|
@@ -58,6 +60,8 @@ dvc-utils diff --help
|
|
58
60
|
# to `diff`)
|
59
61
|
# -v, --verbose Log intermediate commands to stderr
|
60
62
|
# -w, --ignore-whitespace Ignore whitespace differences (pass `-w` to `diff`)
|
63
|
+
# -x, --exec-cmd TEXT Command(s) to execute before diffing; alternate
|
64
|
+
# syntax to passing commands as positional arguments
|
61
65
|
# --help Show this message and exit.
|
62
66
|
```
|
63
67
|
|
@@ -80,7 +84,7 @@ parquet_schema() {
|
|
80
84
|
parquet2json "$1" schema
|
81
85
|
}
|
82
86
|
export -f parquet_schema
|
83
|
-
dvc-
|
87
|
+
dvc-diff -r $commit^..$commit parquet_schema $path
|
84
88
|
```
|
85
89
|
<details><summary>Output</summary>
|
86
90
|
|
@@ -138,7 +142,7 @@ pretty_print_first_row() {
|
|
138
142
|
parquet2json "$1" cat -l 1 | jq .
|
139
143
|
}
|
140
144
|
export -f pretty_print_first_row
|
141
|
-
dvc-
|
145
|
+
dvc-diff -r $commit^..$commit pretty_print_first_row $path
|
142
146
|
```
|
143
147
|
|
144
148
|
<details><summary>Output</summary>
|
@@ -197,7 +201,7 @@ parquet_row_count() {
|
|
197
201
|
parquet2json "$1" rowcount
|
198
202
|
}
|
199
203
|
export -f parquet_row_count
|
200
|
-
dvc-
|
204
|
+
dvc-diff -r $commit^..$commit parquet_row_count $path
|
201
205
|
```
|
202
206
|
|
203
207
|
This time we get no output; [the given `$commit`][commit] didn't change the row count in the DVC-tracked Parquet file [`$path`][commit path].
|
@@ -2,15 +2,12 @@ from functools import cache
|
|
2
2
|
from os import environ as env, getcwd
|
3
3
|
from os.path import join, relpath
|
4
4
|
import shlex
|
5
|
-
from subprocess import Popen, PIPE
|
6
5
|
from typing import Optional, Tuple
|
7
6
|
|
8
7
|
from click import option, argument, group
|
9
8
|
import click
|
10
9
|
import yaml
|
11
|
-
from utz import process,
|
12
|
-
|
13
|
-
from dvc_utils.named_pipes import named_pipes
|
10
|
+
from utz import diff_cmds, process, err, singleton
|
14
11
|
|
15
12
|
|
16
13
|
@group()
|
@@ -69,86 +66,6 @@ def dvc_cache_path(ref: str, dvc_path: Optional[str] = None, log: bool = False)
|
|
69
66
|
return join(dvc_cache_dir(log=log), 'files', 'md5', dirname, basename)
|
70
67
|
|
71
68
|
|
72
|
-
def diff_cmds(
|
73
|
-
cmds1: list[str],
|
74
|
-
cmds2: list[str],
|
75
|
-
verbose: bool = False,
|
76
|
-
color: bool = False,
|
77
|
-
unified: int | None = None,
|
78
|
-
ignore_whitespace: bool = False,
|
79
|
-
**kwargs,
|
80
|
-
):
|
81
|
-
"""Run two sequences of piped commands and diff their output.
|
82
|
-
|
83
|
-
Args:
|
84
|
-
cmds1: First sequence of commands to pipe together
|
85
|
-
cmds2: Second sequence of commands to pipe together
|
86
|
-
verbose: Whether to print commands being executed
|
87
|
-
color: Whether to show colored diff output
|
88
|
-
unified: Number of unified context lines, or None
|
89
|
-
ignore_whitespace: Whether to ignore whitespace changes
|
90
|
-
**kwargs: Additional arguments passed to subprocess.Popen
|
91
|
-
|
92
|
-
Each command sequence will be piped together before being compared.
|
93
|
-
For example, if cmds1 = ['cat foo.txt', 'sort'], the function will
|
94
|
-
execute 'cat foo.txt | sort' before comparing with cmds2's output.
|
95
|
-
|
96
|
-
Adapted from https://stackoverflow.com/a/28840955"""
|
97
|
-
with named_pipes(n=2) as pipes:
|
98
|
-
(pipe1, pipe2) = pipes
|
99
|
-
diff_cmd = [
|
100
|
-
'diff',
|
101
|
-
*(['-w'] if ignore_whitespace else []),
|
102
|
-
*(['-U', str(unified)] if unified is not None else []),
|
103
|
-
*(['--color=always'] if color else []),
|
104
|
-
pipe1,
|
105
|
-
pipe2,
|
106
|
-
]
|
107
|
-
diff = Popen(diff_cmd)
|
108
|
-
processes = []
|
109
|
-
|
110
|
-
for pipe, cmds in ((pipe1, cmds1), (pipe2, cmds2)):
|
111
|
-
if verbose:
|
112
|
-
err(f"Running pipeline: {' | '.join(cmds)}")
|
113
|
-
|
114
|
-
# Create the pipeline of processes
|
115
|
-
prev_process = None
|
116
|
-
for i, cmd in enumerate(cmds):
|
117
|
-
is_last = i + 1 == len(cmds)
|
118
|
-
|
119
|
-
# For the first process, take input from the original source
|
120
|
-
stdin = None if prev_process is None else prev_process.stdout
|
121
|
-
|
122
|
-
# For the last process, output to the named pipe
|
123
|
-
if is_last:
|
124
|
-
with open(pipe, 'wb', 0) as pipe_fd:
|
125
|
-
proc = Popen(
|
126
|
-
cmd,
|
127
|
-
stdin=stdin,
|
128
|
-
stdout=pipe_fd,
|
129
|
-
close_fds=True,
|
130
|
-
**kwargs
|
131
|
-
)
|
132
|
-
# For intermediate processes, output to a pipe
|
133
|
-
else:
|
134
|
-
proc = Popen(
|
135
|
-
cmd,
|
136
|
-
stdin=stdin,
|
137
|
-
stdout=PIPE,
|
138
|
-
close_fds=True,
|
139
|
-
**kwargs
|
140
|
-
)
|
141
|
-
|
142
|
-
if prev_process is not None:
|
143
|
-
prev_process.stdout.close()
|
144
|
-
|
145
|
-
processes.append(proc)
|
146
|
-
prev_process = proc
|
147
|
-
|
148
|
-
for p in [diff] + processes:
|
149
|
-
p.wait()
|
150
|
-
|
151
|
-
|
152
69
|
@cli.command('diff', short_help='Diff a DVC-tracked file at two commits (or one commit vs. current worktree), optionally passing both through another command first')
|
153
70
|
@option('-c', '--color', is_flag=True, help='Colorize the output')
|
154
71
|
@option('-r', '--refspec', default='HEAD', help='<commit 1>..<commit 2> (compare two commits) or <commit> (compare <commit> to the worktree)')
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dvc-utils
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.7
|
4
4
|
Summary: CLI for diffing DVC files at two commits (or one commit vs. current worktree), optionally passing both through another command first
|
5
5
|
Home-page: https://github.com/runsascoded/dvc-utils
|
6
6
|
Author: Ryan Williams
|
@@ -15,7 +15,7 @@ CLI for diffing [DVC] files, optionally passing both through another command fir
|
|
15
15
|
<!-- toc -->
|
16
16
|
- [Installation](#installation)
|
17
17
|
- [Usage](#usage)
|
18
|
-
- [`dvc-
|
18
|
+
- [`dvc-diff`](#dvc-diff)
|
19
19
|
- [Examples](#examples)
|
20
20
|
- [Parquet file](#parquet-diff)
|
21
21
|
- [Schema diff](#parquet-schema-diff)
|
@@ -42,11 +42,13 @@ dvc-utils --help
|
|
42
42
|
# worktree), optionally passing both through another command first
|
43
43
|
```
|
44
44
|
|
45
|
-
|
46
|
-
|
45
|
+
The single subcommand, `dvc-utils diff`, is also exposed directly as `dvc-dff`:
|
46
|
+
|
47
|
+
### `dvc-diff` <a id="dvc-diff"></a>
|
48
|
+
<!-- `bmdf -- dvc-diff --help` -->
|
47
49
|
```bash
|
48
|
-
dvc-
|
49
|
-
# Usage: dvc-
|
50
|
+
dvc-diff --help
|
51
|
+
# Usage: dvc-diff [OPTIONS] [exec_cmd...] <path>
|
50
52
|
#
|
51
53
|
# Diff a file at two commits (or one commit vs. current worktree), optionally
|
52
54
|
# passing both through `cmd` first
|
@@ -69,6 +71,8 @@ dvc-utils diff --help
|
|
69
71
|
# to `diff`)
|
70
72
|
# -v, --verbose Log intermediate commands to stderr
|
71
73
|
# -w, --ignore-whitespace Ignore whitespace differences (pass `-w` to `diff`)
|
74
|
+
# -x, --exec-cmd TEXT Command(s) to execute before diffing; alternate
|
75
|
+
# syntax to passing commands as positional arguments
|
72
76
|
# --help Show this message and exit.
|
73
77
|
```
|
74
78
|
|
@@ -91,7 +95,7 @@ parquet_schema() {
|
|
91
95
|
parquet2json "$1" schema
|
92
96
|
}
|
93
97
|
export -f parquet_schema
|
94
|
-
dvc-
|
98
|
+
dvc-diff -r $commit^..$commit parquet_schema $path
|
95
99
|
```
|
96
100
|
<details><summary>Output</summary>
|
97
101
|
|
@@ -149,7 +153,7 @@ pretty_print_first_row() {
|
|
149
153
|
parquet2json "$1" cat -l 1 | jq .
|
150
154
|
}
|
151
155
|
export -f pretty_print_first_row
|
152
|
-
dvc-
|
156
|
+
dvc-diff -r $commit^..$commit pretty_print_first_row $path
|
153
157
|
```
|
154
158
|
|
155
159
|
<details><summary>Output</summary>
|
@@ -208,7 +212,7 @@ parquet_row_count() {
|
|
208
212
|
parquet2json "$1" rowcount
|
209
213
|
}
|
210
214
|
export -f parquet_row_count
|
211
|
-
dvc-
|
215
|
+
dvc-diff -r $commit^..$commit parquet_row_count $path
|
212
216
|
```
|
213
217
|
|
214
218
|
This time we get no output; [the given `$commit`][commit] didn't change the row count in the DVC-tracked Parquet file [`$path`][commit path].
|
@@ -2,7 +2,7 @@ from setuptools import setup
|
|
2
2
|
|
3
3
|
setup(
|
4
4
|
name='dvc-utils',
|
5
|
-
version="0.0.
|
5
|
+
version="0.0.7",
|
6
6
|
description="CLI for diffing DVC files at two commits (or one commit vs. current worktree), optionally passing both through another command first",
|
7
7
|
long_description=open("README.md").read(),
|
8
8
|
long_description_content_type="text/markdown",
|
@@ -10,6 +10,7 @@ setup(
|
|
10
10
|
entry_points={
|
11
11
|
'console_scripts': [
|
12
12
|
'dvc-utils = dvc_utils.main:cli',
|
13
|
+
'dvc-diff = dvc_utils.main:dvc_utils_diff',
|
13
14
|
],
|
14
15
|
},
|
15
16
|
license="MIT",
|
@@ -1,19 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
import shutil
|
3
|
-
import tempfile
|
4
|
-
from contextlib import contextmanager
|
5
|
-
|
6
|
-
|
7
|
-
@contextmanager
|
8
|
-
def named_pipes(n: int = 1):
|
9
|
-
"""Yield a list of paths to named pipes that are created and destroyed
|
10
|
-
|
11
|
-
From https://stackoverflow.com/a/28840955"""
|
12
|
-
dirname = tempfile.mkdtemp()
|
13
|
-
try:
|
14
|
-
paths = [os.path.join(dirname, 'named_pipe' + str(i)) for i in range(n)]
|
15
|
-
for path in paths:
|
16
|
-
os.mkfifo(path)
|
17
|
-
yield paths
|
18
|
-
finally:
|
19
|
-
shutil.rmtree(dirname)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|