dvcgen 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dvcgen/__init__.py +21 -0
- dvcgen/cli.py +119 -0
- dvcgen/generate.py +145 -0
- dvcgen/inspect.py +160 -0
- dvcgen-0.2.0.dist-info/METADATA +175 -0
- dvcgen-0.2.0.dist-info/RECORD +8 -0
- dvcgen-0.2.0.dist-info/WHEEL +4 -0
- dvcgen-0.2.0.dist-info/entry_points.txt +2 -0
dvcgen/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Generate DVC pipeline files from Python declarations."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def dep(path):
|
|
7
|
+
"""Declare a pipeline dependency and return its runtime value."""
|
|
8
|
+
return path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def out(path):
|
|
12
|
+
"""Declare a pipeline output and return its runtime value."""
|
|
13
|
+
return path
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def param(name, default):
|
|
17
|
+
"""Declare a pipeline parameter and return its default runtime value."""
|
|
18
|
+
return default
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
__all__ = ["__version__", "dep", "out", "param"]
|
dvcgen/cli.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Command-line interface for dvcgen."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
from collections.abc import Sequence
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
import sys
|
|
9
|
+
from typing import Optional, TextIO
|
|
10
|
+
|
|
11
|
+
from dvcgen import __version__
|
|
12
|
+
from dvcgen.generate import write_files
|
|
13
|
+
from dvcgen.inspect import inspect_files
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
17
|
+
parser = argparse.ArgumentParser(
|
|
18
|
+
prog="dvcgen",
|
|
19
|
+
description="Generate dvc.yaml and params.yaml from Python pipeline scripts.",
|
|
20
|
+
)
|
|
21
|
+
parser.add_argument(
|
|
22
|
+
"scripts",
|
|
23
|
+
nargs="*",
|
|
24
|
+
help="Python pipeline scripts to inspect.",
|
|
25
|
+
)
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"-o",
|
|
28
|
+
"--output-dir",
|
|
29
|
+
default=".",
|
|
30
|
+
help="Directory where dvc.yaml and params.yaml are written. Defaults to the current directory.",
|
|
31
|
+
)
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"-f",
|
|
34
|
+
"--force",
|
|
35
|
+
action="store_true",
|
|
36
|
+
help="Overwrite existing dvc.yaml and params.yaml files.",
|
|
37
|
+
)
|
|
38
|
+
parser.add_argument(
|
|
39
|
+
"--version",
|
|
40
|
+
action="version",
|
|
41
|
+
version=f"%(prog)s {__version__}",
|
|
42
|
+
)
|
|
43
|
+
return parser
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def main(
|
|
47
|
+
argv: Optional[Sequence[str]] = None,
|
|
48
|
+
stdout: Optional[TextIO] = None,
|
|
49
|
+
stderr: Optional[TextIO] = None,
|
|
50
|
+
) -> int:
|
|
51
|
+
stdout = sys.stdout if stdout is None else stdout
|
|
52
|
+
stderr = sys.stderr if stderr is None else stderr
|
|
53
|
+
parser = build_parser()
|
|
54
|
+
args = parser.parse_args(argv)
|
|
55
|
+
if not args.scripts:
|
|
56
|
+
print("dvcgen: error: provide at least one Python pipeline script", file=stderr)
|
|
57
|
+
print("Try 'dvcgen --help' for usage.", file=stderr)
|
|
58
|
+
return 2
|
|
59
|
+
|
|
60
|
+
script_paths = tuple(Path(script) for script in args.scripts)
|
|
61
|
+
output_dir = Path(args.output_dir)
|
|
62
|
+
dvc_path = output_dir / "dvc.yaml"
|
|
63
|
+
params_path = output_dir / "params.yaml"
|
|
64
|
+
|
|
65
|
+
validation_message = _validation_error(
|
|
66
|
+
script_paths,
|
|
67
|
+
output_dir,
|
|
68
|
+
(dvc_path, params_path),
|
|
69
|
+
args.force,
|
|
70
|
+
)
|
|
71
|
+
if validation_message is not None:
|
|
72
|
+
print(f"dvcgen: error: {validation_message}", file=stderr)
|
|
73
|
+
return 2
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
declarations = inspect_files(script_paths)
|
|
77
|
+
write_files(declarations, dvc_path=dvc_path, params_path=params_path)
|
|
78
|
+
except SyntaxError as syntax_error:
|
|
79
|
+
print(
|
|
80
|
+
f"dvcgen: error: failed to parse {syntax_error.filename}: {syntax_error.msg}",
|
|
81
|
+
file=stderr,
|
|
82
|
+
)
|
|
83
|
+
return 2
|
|
84
|
+
except OSError as os_error:
|
|
85
|
+
print(f"dvcgen: error: {os_error}", file=stderr)
|
|
86
|
+
return 2
|
|
87
|
+
except ValueError as value_error:
|
|
88
|
+
print(f"dvcgen: error: {value_error}", file=stderr)
|
|
89
|
+
return 2
|
|
90
|
+
|
|
91
|
+
print(f"Wrote {dvc_path} and {params_path}", file=stdout)
|
|
92
|
+
return 0
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _validation_error(
|
|
96
|
+
script_paths: Sequence[Path],
|
|
97
|
+
output_dir: Path,
|
|
98
|
+
output_paths: Sequence[Path],
|
|
99
|
+
force: bool,
|
|
100
|
+
) -> Optional[str]:
|
|
101
|
+
for script_path in script_paths:
|
|
102
|
+
if not script_path.exists():
|
|
103
|
+
return f"input script not found: {script_path}"
|
|
104
|
+
if not script_path.is_file():
|
|
105
|
+
return f"input script is not a file: {script_path}"
|
|
106
|
+
if script_path.suffix != ".py":
|
|
107
|
+
return f"input script must be a .py file: {script_path}"
|
|
108
|
+
|
|
109
|
+
if output_dir.exists() and not output_dir.is_dir():
|
|
110
|
+
return f"output directory is not a directory: {output_dir}"
|
|
111
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
112
|
+
|
|
113
|
+
if not force:
|
|
114
|
+
existing_paths = [path for path in output_paths if path.exists()]
|
|
115
|
+
if existing_paths:
|
|
116
|
+
joined_paths = ", ".join(str(path) for path in existing_paths)
|
|
117
|
+
return f"refusing to overwrite existing file(s): {joined_paths}; use --force to replace them"
|
|
118
|
+
|
|
119
|
+
return None
|
dvcgen/generate.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""Generate DVC configuration files from extracted declarations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from collections.abc import Iterable, Mapping, Sequence
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from dvcgen.inspect import SourceDeclarations
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def dvc_document(declarations: Iterable[SourceDeclarations]) -> dict[str, Any]:
|
|
14
|
+
"""Build a dvc.yaml document from source declarations."""
|
|
15
|
+
stages: dict[str, dict[str, Any]] = {}
|
|
16
|
+
|
|
17
|
+
for source_declarations in sorted(declarations, key=_stage_name):
|
|
18
|
+
stage_name = _stage_name(source_declarations)
|
|
19
|
+
if stage_name in stages:
|
|
20
|
+
raise ValueError(f"duplicate stage name: {stage_name}")
|
|
21
|
+
|
|
22
|
+
stage: dict[str, Any] = {
|
|
23
|
+
"cmd": f"python {source_declarations.source}",
|
|
24
|
+
"deps": [
|
|
25
|
+
source_declarations.source,
|
|
26
|
+
*(dep.path for dep in source_declarations.deps),
|
|
27
|
+
],
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if source_declarations.outs:
|
|
31
|
+
stage["outs"] = [out.path for out in source_declarations.outs]
|
|
32
|
+
if source_declarations.params:
|
|
33
|
+
stage["params"] = sorted(param.name for param in source_declarations.params)
|
|
34
|
+
|
|
35
|
+
stages[stage_name] = stage
|
|
36
|
+
|
|
37
|
+
return {"stages": stages}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def params_document(declarations: Iterable[SourceDeclarations]) -> dict[str, Any]:
|
|
41
|
+
"""Build a params.yaml document from source declarations."""
|
|
42
|
+
params: dict[str, Any] = {}
|
|
43
|
+
|
|
44
|
+
for source_declarations in sorted(declarations, key=_stage_name):
|
|
45
|
+
for param in sorted(source_declarations.params, key=lambda item: item.name):
|
|
46
|
+
_assign_dotted(params, param.name, param.default)
|
|
47
|
+
|
|
48
|
+
return params
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def write_files(
|
|
52
|
+
declarations: Sequence[SourceDeclarations],
|
|
53
|
+
dvc_path: str | Path = "dvc.yaml",
|
|
54
|
+
params_path: str | Path = "params.yaml",
|
|
55
|
+
) -> None:
|
|
56
|
+
"""Write dvc.yaml and params.yaml for the supplied declarations."""
|
|
57
|
+
Path(dvc_path).write_text(
|
|
58
|
+
dump_yaml(dvc_document(declarations)),
|
|
59
|
+
encoding="utf-8",
|
|
60
|
+
)
|
|
61
|
+
Path(params_path).write_text(
|
|
62
|
+
dump_yaml(params_document(declarations)),
|
|
63
|
+
encoding="utf-8",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def dump_yaml(value: Any) -> str:
|
|
68
|
+
"""Serialize a small, deterministic YAML subset."""
|
|
69
|
+
return "\n".join(_yaml_lines(value, indent=0)) + "\n"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _stage_name(declarations: SourceDeclarations) -> str:
|
|
73
|
+
return Path(declarations.source).stem
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _assign_dotted(document: dict[str, Any], name: str, value: Any) -> None:
|
|
77
|
+
parts = name.split(".")
|
|
78
|
+
if not all(parts):
|
|
79
|
+
raise ValueError(f"invalid parameter name: {name}")
|
|
80
|
+
|
|
81
|
+
cursor = document
|
|
82
|
+
for part in parts[:-1]:
|
|
83
|
+
existing = cursor.setdefault(part, {})
|
|
84
|
+
if not isinstance(existing, dict):
|
|
85
|
+
raise ValueError(f"conflicting parameter name: {name}")
|
|
86
|
+
cursor = existing
|
|
87
|
+
|
|
88
|
+
leaf = parts[-1]
|
|
89
|
+
if leaf in cursor and isinstance(cursor[leaf], dict):
|
|
90
|
+
raise ValueError(f"conflicting parameter name: {name}")
|
|
91
|
+
cursor[leaf] = value
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _yaml_lines(value: Any, indent: int) -> list[str]:
|
|
95
|
+
if isinstance(value, Mapping):
|
|
96
|
+
return _mapping_lines(value, indent)
|
|
97
|
+
if isinstance(value, list):
|
|
98
|
+
return _list_lines(value, indent)
|
|
99
|
+
return [" " * indent + _scalar(value)]
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _mapping_lines(value: Mapping[str, Any], indent: int) -> list[str]:
|
|
103
|
+
lines: list[str] = []
|
|
104
|
+
prefix = " " * indent
|
|
105
|
+
|
|
106
|
+
for key in sorted(value):
|
|
107
|
+
item = value[key]
|
|
108
|
+
yaml_key = _string(key)
|
|
109
|
+
if isinstance(item, (Mapping, list)):
|
|
110
|
+
lines.append(f"{prefix}{yaml_key}:")
|
|
111
|
+
lines.extend(_yaml_lines(item, indent + 2))
|
|
112
|
+
else:
|
|
113
|
+
lines.append(f"{prefix}{yaml_key}: {_scalar(item)}")
|
|
114
|
+
|
|
115
|
+
return lines
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _list_lines(value: list[Any], indent: int) -> list[str]:
|
|
119
|
+
lines: list[str] = []
|
|
120
|
+
prefix = " " * indent
|
|
121
|
+
|
|
122
|
+
for item in value:
|
|
123
|
+
if isinstance(item, (Mapping, list)):
|
|
124
|
+
lines.append(f"{prefix}-")
|
|
125
|
+
lines.extend(_yaml_lines(item, indent + 2))
|
|
126
|
+
else:
|
|
127
|
+
lines.append(f"{prefix}- {_scalar(item)}")
|
|
128
|
+
|
|
129
|
+
return lines
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _scalar(value: Any) -> str:
|
|
133
|
+
if isinstance(value, bool):
|
|
134
|
+
return "true" if value else "false"
|
|
135
|
+
if value is None:
|
|
136
|
+
return "null"
|
|
137
|
+
if isinstance(value, (int, float)):
|
|
138
|
+
return str(value)
|
|
139
|
+
if isinstance(value, str):
|
|
140
|
+
return _string(value)
|
|
141
|
+
raise TypeError(f"unsupported YAML value: {value!r}")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _string(value: str) -> str:
|
|
145
|
+
return json.dumps(value)
|
dvcgen/inspect.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Inspect Python pipeline scripts for dvcgen declarations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import ast
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Iterable, Optional, Union
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class PathDeclaration:
|
|
13
|
+
"""A dependency or output declaration extracted from source code."""
|
|
14
|
+
|
|
15
|
+
target: str
|
|
16
|
+
path: str
|
|
17
|
+
lineno: int
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True)
|
|
21
|
+
class ParamDeclaration:
|
|
22
|
+
"""A parameter declaration extracted from source code."""
|
|
23
|
+
|
|
24
|
+
target: str
|
|
25
|
+
name: str
|
|
26
|
+
default: Any
|
|
27
|
+
lineno: int
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class SourceDeclarations:
|
|
32
|
+
"""Declarations extracted from a single Python source file."""
|
|
33
|
+
|
|
34
|
+
source: str
|
|
35
|
+
deps: tuple[PathDeclaration, ...]
|
|
36
|
+
outs: tuple[PathDeclaration, ...]
|
|
37
|
+
params: tuple[ParamDeclaration, ...]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
PathLike = Union[str, Path]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def inspect_file(path: PathLike) -> SourceDeclarations:
|
|
44
|
+
"""Parse a Python file and return supported dvcgen declarations."""
|
|
45
|
+
source_path = Path(path)
|
|
46
|
+
return inspect_source(
|
|
47
|
+
source_path.read_text(encoding="utf-8"),
|
|
48
|
+
source=str(source_path),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def inspect_files(paths: Iterable[PathLike]) -> tuple[SourceDeclarations, ...]:
|
|
53
|
+
"""Parse multiple Python files and return declarations per file."""
|
|
54
|
+
return tuple(inspect_file(path) for path in paths)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def inspect_source(source_code: str, source: str = "<string>") -> SourceDeclarations:
|
|
58
|
+
"""Parse Python source and return supported top-level declarations."""
|
|
59
|
+
tree = ast.parse(source_code, filename=source)
|
|
60
|
+
deps: list[PathDeclaration] = []
|
|
61
|
+
outs: list[PathDeclaration] = []
|
|
62
|
+
params: list[ParamDeclaration] = []
|
|
63
|
+
|
|
64
|
+
for statement in tree.body:
|
|
65
|
+
target = _assignment_target(statement)
|
|
66
|
+
if target is None:
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
value = _assignment_value(statement)
|
|
70
|
+
if not isinstance(value, ast.Call):
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
call_name = _simple_call_name(value)
|
|
74
|
+
if call_name == "dep":
|
|
75
|
+
path_declaration = _path_declaration(target, value)
|
|
76
|
+
if path_declaration is not None:
|
|
77
|
+
deps.append(path_declaration)
|
|
78
|
+
elif call_name == "out":
|
|
79
|
+
path_declaration = _path_declaration(target, value)
|
|
80
|
+
if path_declaration is not None:
|
|
81
|
+
outs.append(path_declaration)
|
|
82
|
+
elif call_name == "param":
|
|
83
|
+
param_declaration = _param_declaration(target, value)
|
|
84
|
+
if param_declaration is not None:
|
|
85
|
+
params.append(param_declaration)
|
|
86
|
+
|
|
87
|
+
return SourceDeclarations(
|
|
88
|
+
source=source,
|
|
89
|
+
deps=tuple(deps),
|
|
90
|
+
outs=tuple(outs),
|
|
91
|
+
params=tuple(params),
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _assignment_target(statement: ast.stmt) -> Optional[str]:
|
|
96
|
+
if isinstance(statement, ast.Assign) and len(statement.targets) == 1:
|
|
97
|
+
target = statement.targets[0]
|
|
98
|
+
elif isinstance(statement, ast.AnnAssign) and statement.simple:
|
|
99
|
+
target = statement.target
|
|
100
|
+
else:
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
if isinstance(target, ast.Name):
|
|
104
|
+
return target.id
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _assignment_value(statement: ast.stmt) -> Optional[ast.expr]:
|
|
109
|
+
if isinstance(statement, ast.Assign):
|
|
110
|
+
return statement.value
|
|
111
|
+
if isinstance(statement, ast.AnnAssign):
|
|
112
|
+
return statement.value
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _simple_call_name(call: ast.Call) -> Optional[str]:
|
|
117
|
+
if isinstance(call.func, ast.Name):
|
|
118
|
+
return call.func.id
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _path_declaration(target: str, call: ast.Call) -> Optional[PathDeclaration]:
|
|
123
|
+
if len(call.args) != 1 or call.keywords:
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
path = _literal(call.args[0])
|
|
127
|
+
if not isinstance(path, str):
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
return PathDeclaration(target=target, path=path, lineno=call.lineno)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _param_declaration(target: str, call: ast.Call) -> Optional[ParamDeclaration]:
|
|
134
|
+
if len(call.args) != 2 or call.keywords:
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
name = _literal(call.args[0])
|
|
138
|
+
if not isinstance(name, str):
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
default = _literal(call.args[1])
|
|
142
|
+
if default is _UNSUPPORTED:
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
return ParamDeclaration(
|
|
146
|
+
target=target,
|
|
147
|
+
name=name,
|
|
148
|
+
default=default,
|
|
149
|
+
lineno=call.lineno,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
_UNSUPPORTED = object()
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _literal(node: ast.AST) -> Any:
|
|
157
|
+
try:
|
|
158
|
+
return ast.literal_eval(node)
|
|
159
|
+
except (ValueError, TypeError):
|
|
160
|
+
return _UNSUPPORTED
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dvcgen
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Generate DVC pipeline files from Python declarations.
|
|
5
|
+
Author: pillyshi
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: dvc,params,pipeline,yaml
|
|
8
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
9
|
+
Classifier: Environment :: Console
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Software Development :: Build Tools
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# dvcgen
|
|
22
|
+
|
|
23
|
+
Write your DVC pipeline once, in Python.
|
|
24
|
+
|
|
25
|
+
`dvcgen` is an early-stage command-line tool for generating DVC pipeline files
|
|
26
|
+
from lightweight declarations embedded in Python pipeline scripts.
|
|
27
|
+
|
|
28
|
+
## Current Status
|
|
29
|
+
|
|
30
|
+
Implemented:
|
|
31
|
+
|
|
32
|
+
- A Python package named `dvcgen`
|
|
33
|
+
- A `dvcgen` console command
|
|
34
|
+
- CLI argument parsing for pipeline script paths
|
|
35
|
+
- CLI input validation and overwrite protection
|
|
36
|
+
- Public declaration helpers: `dep()`, `out()`, and `param()`
|
|
37
|
+
- Python script inspection for top-level literal declarations
|
|
38
|
+
- `dvc.yaml` generation
|
|
39
|
+
- `params.yaml` generation
|
|
40
|
+
|
|
41
|
+
## Installation
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
uv tool install dvcgen
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Or run without installing:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
uvx dvcgen --help
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Usage
|
|
54
|
+
|
|
55
|
+
Show CLI help:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
dvcgen --help
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Generate DVC files from one or more Python pipeline scripts:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
dvcgen pipeline/*.py
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
The command writes `dvc.yaml` and `params.yaml` in the current directory.
|
|
68
|
+
Stage names are derived from input Python filenames. For example,
|
|
69
|
+
`pipeline/train.py` becomes the `train` stage.
|
|
70
|
+
|
|
71
|
+
By default, `dvcgen` refuses to overwrite existing `dvc.yaml` or `params.yaml`
|
|
72
|
+
files. Use `--force` when you intentionally want to replace them:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
dvcgen --force pipeline/*.py
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Write files to another directory with `--output-dir`:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
dvcgen --output-dir generated pipeline/*.py
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Bad inputs fail with an error message and a non-zero exit code. Successful runs
|
|
85
|
+
print the files that were written.
|
|
86
|
+
|
|
87
|
+
Inspect declarations from Python without executing the pipeline script:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from dvcgen.inspect import inspect_file
|
|
91
|
+
|
|
92
|
+
declarations = inspect_file("pipeline/train.py")
|
|
93
|
+
print(declarations.deps)
|
|
94
|
+
print(declarations.outs)
|
|
95
|
+
print(declarations.params)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Release
|
|
99
|
+
|
|
100
|
+
Publishing is intentionally manual while the project is early stage. Build and
|
|
101
|
+
validate artifacts before uploading anything:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
uv run python -m build
|
|
105
|
+
uv run twine check dist/*
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Use TestPyPI first when rehearsing a release. Create a TestPyPI API token, then
|
|
109
|
+
upload with the token as the password:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
uv run twine upload --repository testpypi dist/*
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Use the production PyPI repository only when the version, changelog, and package
|
|
116
|
+
name decision are ready:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
uv run twine upload dist/*
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
For both repositories, use `__token__` as the username and the repository API
|
|
123
|
+
token as the password. Avoid committing tokens or storing them in project files.
|
|
124
|
+
|
|
125
|
+
Before the first production upload, decide whether to publish the current
|
|
126
|
+
minimal release to reserve the `dvcgen` package name on PyPI. Once a version is
|
|
127
|
+
uploaded to PyPI or TestPyPI, that exact version cannot be uploaded again; bump
|
|
128
|
+
the version before retrying with changed artifacts.
|
|
129
|
+
|
|
130
|
+
## Planned MVP
|
|
131
|
+
|
|
132
|
+
The intended MVP is:
|
|
133
|
+
|
|
134
|
+
1. Pipeline scripts declare dependencies, outputs, and parameters in Python.
|
|
135
|
+
2. `dvcgen` inspects those declarations without executing the scripts.
|
|
136
|
+
3. `dvcgen` writes `dvc.yaml` and `params.yaml`.
|
|
137
|
+
|
|
138
|
+
Example API:
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from dvcgen import dep, out, param
|
|
142
|
+
|
|
143
|
+
TRAIN_DATA = dep("data/processed.csv")
|
|
144
|
+
MODEL = out("models/model.pkl")
|
|
145
|
+
|
|
146
|
+
LR = param("train.lr", 0.001)
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Running:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
dvcgen pipeline/train.py
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
Generates `dvc.yaml`:
|
|
156
|
+
|
|
157
|
+
```yaml
|
|
158
|
+
"stages":
|
|
159
|
+
"train":
|
|
160
|
+
"cmd": "python pipeline/train.py"
|
|
161
|
+
"deps":
|
|
162
|
+
- "pipeline/train.py"
|
|
163
|
+
- "data/processed.csv"
|
|
164
|
+
"outs":
|
|
165
|
+
- "models/model.pkl"
|
|
166
|
+
"params":
|
|
167
|
+
- "train.lr"
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
And `params.yaml`:
|
|
171
|
+
|
|
172
|
+
```yaml
|
|
173
|
+
"train":
|
|
174
|
+
"lr": 0.001
|
|
175
|
+
```
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
dvcgen/__init__.py,sha256=Mlag_PpW7bf5LYn21xI__c3VgGEhT4sUXYLzOmxRK8w,460
|
|
2
|
+
dvcgen/cli.py,sha256=9kTsfZB0do6IydRHDo6UhSWQItiXUA2H_UbA-Q2o_Kw,3718
|
|
3
|
+
dvcgen/generate.py,sha256=jB0Wcm3RVvClXOpTIUiIzYoC7F_x6YGo1tt5b1lYDmc,4450
|
|
4
|
+
dvcgen/inspect.py,sha256=hU5DH_i3AYJIYuGw9Izmbj8wQGNYjuQdA-r6254KC20,4365
|
|
5
|
+
dvcgen-0.2.0.dist-info/METADATA,sha256=vkLfDAi2peIR8h1Bi5QVjWRYfvHG3HdE3N0gS0-xoXI,4141
|
|
6
|
+
dvcgen-0.2.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
7
|
+
dvcgen-0.2.0.dist-info/entry_points.txt,sha256=94FKuP8KZnWqZT3IOLj1TIrd4O_w8ycrt7dR_sJSUOk,43
|
|
8
|
+
dvcgen-0.2.0.dist-info/RECORD,,
|