dvcgen 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,60 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ pull_request:
6
+
7
+ jobs:
8
+ test:
9
+ name: Test Python ${{ matrix.python-version }}
10
+ runs-on: ubuntu-latest
11
+ strategy:
12
+ fail-fast: false
13
+ matrix:
14
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
15
+
16
+ steps:
17
+ - name: Check out repository
18
+ uses: actions/checkout@v6
19
+
20
+ - name: Install uv
21
+ uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
22
+ with:
23
+ enable-cache: true
24
+
25
+ - name: Install Python
26
+ run: uv python install ${{ matrix.python-version }}
27
+
28
+ - name: Install dependencies
29
+ run: uv sync --locked --dev
30
+
31
+ - name: Run tests
32
+ run: uv run pytest
33
+
34
+ quality:
35
+ name: Quality and package
36
+ runs-on: ubuntu-latest
37
+
38
+ steps:
39
+ - name: Check out repository
40
+ uses: actions/checkout@v6
41
+
42
+ - name: Install uv
43
+ uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
44
+ with:
45
+ enable-cache: true
46
+
47
+ - name: Install Python
48
+ run: uv python install 3.12
49
+
50
+ - name: Install dependencies
51
+ run: uv sync --locked --dev
52
+
53
+ - name: Run ruff
54
+ run: uv run ruff check .
55
+
56
+ - name: Run mypy
57
+ run: uv run mypy src tests
58
+
59
+ - name: Build package
60
+ run: uv build
@@ -0,0 +1,8 @@
1
+ .venv/
2
+ .uv-cache/
3
+ __pycache__/
4
+ *.py[cod]
5
+ .pytest_cache/
6
+ dist/
7
+ build/
8
+ *.egg-info/
dvcgen-0.2.0/PKG-INFO ADDED
@@ -0,0 +1,175 @@
1
+ Metadata-Version: 2.4
2
+ Name: dvcgen
3
+ Version: 0.2.0
4
+ Summary: Generate DVC pipeline files from Python declarations.
5
+ Author: pillyshi
6
+ License: MIT
7
+ Keywords: dvc,params,pipeline,yaml
8
+ Classifier: Development Status :: 2 - Pre-Alpha
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3 :: Only
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Software Development :: Build Tools
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+
21
+ # dvcgen
22
+
23
+ Write your DVC pipeline once, in Python.
24
+
25
+ `dvcgen` is an early-stage command-line tool for generating DVC pipeline files
26
+ from lightweight declarations embedded in Python pipeline scripts.
27
+
28
+ ## Current Status
29
+
30
+ Implemented:
31
+
32
+ - A Python package named `dvcgen`
33
+ - A `dvcgen` console command
34
+ - CLI argument parsing for pipeline script paths
35
+ - CLI input validation and overwrite protection
36
+ - Public declaration helpers: `dep()`, `out()`, and `param()`
37
+ - Python script inspection for top-level literal declarations
38
+ - `dvc.yaml` generation
39
+ - `params.yaml` generation
40
+
41
+ ## Installation
42
+
43
+ ```bash
44
+ uv tool install dvcgen
45
+ ```
46
+
47
+ Or run without installing:
48
+
49
+ ```bash
50
+ uvx dvcgen --help
51
+ ```
52
+
53
+ ## Usage
54
+
55
+ Show CLI help:
56
+
57
+ ```bash
58
+ dvcgen --help
59
+ ```
60
+
61
+ Generate DVC files from one or more Python pipeline scripts:
62
+
63
+ ```bash
64
+ dvcgen pipeline/*.py
65
+ ```
66
+
67
+ The command writes `dvc.yaml` and `params.yaml` in the current directory.
68
+ Stage names are derived from input Python filenames. For example,
69
+ `pipeline/train.py` becomes the `train` stage.
70
+
71
+ By default, `dvcgen` refuses to overwrite existing `dvc.yaml` or `params.yaml`
72
+ files. Use `--force` when you intentionally want to replace them:
73
+
74
+ ```bash
75
+ dvcgen --force pipeline/*.py
76
+ ```
77
+
78
+ Write files to another directory with `--output-dir`:
79
+
80
+ ```bash
81
+ dvcgen --output-dir generated pipeline/*.py
82
+ ```
83
+
84
+ Bad inputs fail with an error message and a non-zero exit code. Successful runs
85
+ print the files that were written.
86
+
87
+ Inspect declarations from Python without executing the pipeline script:
88
+
89
+ ```python
90
+ from dvcgen.inspect import inspect_file
91
+
92
+ declarations = inspect_file("pipeline/train.py")
93
+ print(declarations.deps)
94
+ print(declarations.outs)
95
+ print(declarations.params)
96
+ ```
97
+
98
+ ## Release
99
+
100
+ Publishing is intentionally manual while the project is early stage. Build and
101
+ validate artifacts before uploading anything:
102
+
103
+ ```bash
104
+ uv run python -m build
105
+ uv run twine check dist/*
106
+ ```
107
+
108
+ Use TestPyPI first when rehearsing a release. Create a TestPyPI API token, then
109
+ upload with the token as the password:
110
+
111
+ ```bash
112
+ uv run twine upload --repository testpypi dist/*
113
+ ```
114
+
115
+ Use the production PyPI repository only when the version, changelog, and package
116
+ name decision are ready:
117
+
118
+ ```bash
119
+ uv run twine upload dist/*
120
+ ```
121
+
122
+ For both repositories, use `__token__` as the username and the repository API
123
+ token as the password. Avoid committing tokens or storing them in project files.
124
+
125
+ Before the first production upload, decide whether to publish the current
126
+ minimal release to reserve the `dvcgen` package name on PyPI. Once a version is
127
+ uploaded to PyPI or TestPyPI, that exact version cannot be uploaded again; bump
128
+ the version before retrying with changed artifacts.
129
+
130
+ ## Planned MVP
131
+
132
+ The intended MVP is:
133
+
134
+ 1. Pipeline scripts declare dependencies, outputs, and parameters in Python.
135
+ 2. `dvcgen` inspects those declarations without executing the scripts.
136
+ 3. `dvcgen` writes `dvc.yaml` and `params.yaml`.
137
+
138
+ Example API:
139
+
140
+ ```python
141
+ from dvcgen import dep, out, param
142
+
143
+ TRAIN_DATA = dep("data/processed.csv")
144
+ MODEL = out("models/model.pkl")
145
+
146
+ LR = param("train.lr", 0.001)
147
+ ```
148
+
149
+ Running:
150
+
151
+ ```bash
152
+ dvcgen pipeline/train.py
153
+ ```
154
+
155
+ Generates `dvc.yaml`:
156
+
157
+ ```yaml
158
+ "stages":
159
+ "train":
160
+ "cmd": "python pipeline/train.py"
161
+ "deps":
162
+ - "pipeline/train.py"
163
+ - "data/processed.csv"
164
+ "outs":
165
+ - "models/model.pkl"
166
+ "params":
167
+ - "train.lr"
168
+ ```
169
+
170
+ And `params.yaml`:
171
+
172
+ ```yaml
173
+ "train":
174
+ "lr": 0.001
175
+ ```
dvcgen-0.2.0/README.md ADDED
@@ -0,0 +1,155 @@
1
+ # dvcgen
2
+
3
+ Write your DVC pipeline once, in Python.
4
+
5
+ `dvcgen` is an early-stage command-line tool for generating DVC pipeline files
6
+ from lightweight declarations embedded in Python pipeline scripts.
7
+
8
+ ## Current Status
9
+
10
+ Implemented:
11
+
12
+ - A Python package named `dvcgen`
13
+ - A `dvcgen` console command
14
+ - CLI argument parsing for pipeline script paths
15
+ - CLI input validation and overwrite protection
16
+ - Public declaration helpers: `dep()`, `out()`, and `param()`
17
+ - Python script inspection for top-level literal declarations
18
+ - `dvc.yaml` generation
19
+ - `params.yaml` generation
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ uv tool install dvcgen
25
+ ```
26
+
27
+ Or run without installing:
28
+
29
+ ```bash
30
+ uvx dvcgen --help
31
+ ```
32
+
33
+ ## Usage
34
+
35
+ Show CLI help:
36
+
37
+ ```bash
38
+ dvcgen --help
39
+ ```
40
+
41
+ Generate DVC files from one or more Python pipeline scripts:
42
+
43
+ ```bash
44
+ dvcgen pipeline/*.py
45
+ ```
46
+
47
+ The command writes `dvc.yaml` and `params.yaml` in the current directory.
48
+ Stage names are derived from input Python filenames. For example,
49
+ `pipeline/train.py` becomes the `train` stage.
50
+
51
+ By default, `dvcgen` refuses to overwrite existing `dvc.yaml` or `params.yaml`
52
+ files. Use `--force` when you intentionally want to replace them:
53
+
54
+ ```bash
55
+ dvcgen --force pipeline/*.py
56
+ ```
57
+
58
+ Write files to another directory with `--output-dir`:
59
+
60
+ ```bash
61
+ dvcgen --output-dir generated pipeline/*.py
62
+ ```
63
+
64
+ Bad inputs fail with an error message and a non-zero exit code. Successful runs
65
+ print the files that were written.
66
+
67
+ Inspect declarations from Python without executing the pipeline script:
68
+
69
+ ```python
70
+ from dvcgen.inspect import inspect_file
71
+
72
+ declarations = inspect_file("pipeline/train.py")
73
+ print(declarations.deps)
74
+ print(declarations.outs)
75
+ print(declarations.params)
76
+ ```
77
+
78
+ ## Release
79
+
80
+ Publishing is intentionally manual while the project is early stage. Build and
81
+ validate artifacts before uploading anything:
82
+
83
+ ```bash
84
+ uv run python -m build
85
+ uv run twine check dist/*
86
+ ```
87
+
88
+ Use TestPyPI first when rehearsing a release. Create a TestPyPI API token, then
89
+ upload with the token as the password:
90
+
91
+ ```bash
92
+ uv run twine upload --repository testpypi dist/*
93
+ ```
94
+
95
+ Use the production PyPI repository only when the version, changelog, and package
96
+ name decision are ready:
97
+
98
+ ```bash
99
+ uv run twine upload dist/*
100
+ ```
101
+
102
+ For both repositories, use `__token__` as the username and the repository API
103
+ token as the password. Avoid committing tokens or storing them in project files.
104
+
105
+ Before the first production upload, decide whether to publish the current
106
+ minimal release to reserve the `dvcgen` package name on PyPI. Once a version is
107
+ uploaded to PyPI or TestPyPI, that exact version cannot be uploaded again; bump
108
+ the version before retrying with changed artifacts.
109
+
110
+ ## Planned MVP
111
+
112
+ The intended MVP is:
113
+
114
+ 1. Pipeline scripts declare dependencies, outputs, and parameters in Python.
115
+ 2. `dvcgen` inspects those declarations without executing the scripts.
116
+ 3. `dvcgen` writes `dvc.yaml` and `params.yaml`.
117
+
118
+ Example API:
119
+
120
+ ```python
121
+ from dvcgen import dep, out, param
122
+
123
+ TRAIN_DATA = dep("data/processed.csv")
124
+ MODEL = out("models/model.pkl")
125
+
126
+ LR = param("train.lr", 0.001)
127
+ ```
128
+
129
+ Running:
130
+
131
+ ```bash
132
+ dvcgen pipeline/train.py
133
+ ```
134
+
135
+ Generates `dvc.yaml`:
136
+
137
+ ```yaml
138
+ "stages":
139
+ "train":
140
+ "cmd": "python pipeline/train.py"
141
+ "deps":
142
+ - "pipeline/train.py"
143
+ - "data/processed.csv"
144
+ "outs":
145
+ - "models/model.pkl"
146
+ "params":
147
+ - "train.lr"
148
+ ```
149
+
150
+ And `params.yaml`:
151
+
152
+ ```yaml
153
+ "train":
154
+ "lr": 0.001
155
+ ```
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "dvcgen"
7
+ version = "0.2.0"
8
+ description = "Generate DVC pipeline files from Python declarations."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [
13
+ { name = "pillyshi" },
14
+ ]
15
+ keywords = ["dvc", "pipeline", "params", "yaml"]
16
+ classifiers = [
17
+ "Development Status :: 2 - Pre-Alpha",
18
+ "Environment :: Console",
19
+ "Intended Audience :: Developers",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3 :: Only",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Topic :: Software Development :: Build Tools",
27
+ ]
28
+
29
+ [project.scripts]
30
+ dvcgen = "dvcgen.cli:main"
31
+
32
+ [tool.hatch.build.targets.wheel]
33
+ packages = ["src/dvcgen"]
34
+
35
+ [dependency-groups]
36
+ dev = [
37
+ "build>=1.5.0",
38
+ "mypy>=1.19.1",
39
+ "pytest>=8.4.2",
40
+ "ruff>=0.15.15",
41
+ "twine>=6.2.0",
42
+ ]
@@ -0,0 +1,21 @@
1
+ """Generate DVC pipeline files from Python declarations."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+
6
+ def dep(path):
7
+ """Declare a pipeline dependency and return its runtime value."""
8
+ return path
9
+
10
+
11
+ def out(path):
12
+ """Declare a pipeline output and return its runtime value."""
13
+ return path
14
+
15
+
16
+ def param(name, default):
17
+ """Declare a pipeline parameter and return its default runtime value."""
18
+ return default
19
+
20
+
21
+ __all__ = ["__version__", "dep", "out", "param"]
@@ -0,0 +1,119 @@
1
+ """Command-line interface for dvcgen."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ from collections.abc import Sequence
7
+ from pathlib import Path
8
+ import sys
9
+ from typing import Optional, TextIO
10
+
11
+ from dvcgen import __version__
12
+ from dvcgen.generate import write_files
13
+ from dvcgen.inspect import inspect_files
14
+
15
+
16
+ def build_parser() -> argparse.ArgumentParser:
17
+ parser = argparse.ArgumentParser(
18
+ prog="dvcgen",
19
+ description="Generate dvc.yaml and params.yaml from Python pipeline scripts.",
20
+ )
21
+ parser.add_argument(
22
+ "scripts",
23
+ nargs="*",
24
+ help="Python pipeline scripts to inspect.",
25
+ )
26
+ parser.add_argument(
27
+ "-o",
28
+ "--output-dir",
29
+ default=".",
30
+ help="Directory where dvc.yaml and params.yaml are written. Defaults to the current directory.",
31
+ )
32
+ parser.add_argument(
33
+ "-f",
34
+ "--force",
35
+ action="store_true",
36
+ help="Overwrite existing dvc.yaml and params.yaml files.",
37
+ )
38
+ parser.add_argument(
39
+ "--version",
40
+ action="version",
41
+ version=f"%(prog)s {__version__}",
42
+ )
43
+ return parser
44
+
45
+
46
+ def main(
47
+ argv: Optional[Sequence[str]] = None,
48
+ stdout: Optional[TextIO] = None,
49
+ stderr: Optional[TextIO] = None,
50
+ ) -> int:
51
+ stdout = sys.stdout if stdout is None else stdout
52
+ stderr = sys.stderr if stderr is None else stderr
53
+ parser = build_parser()
54
+ args = parser.parse_args(argv)
55
+ if not args.scripts:
56
+ print("dvcgen: error: provide at least one Python pipeline script", file=stderr)
57
+ print("Try 'dvcgen --help' for usage.", file=stderr)
58
+ return 2
59
+
60
+ script_paths = tuple(Path(script) for script in args.scripts)
61
+ output_dir = Path(args.output_dir)
62
+ dvc_path = output_dir / "dvc.yaml"
63
+ params_path = output_dir / "params.yaml"
64
+
65
+ validation_message = _validation_error(
66
+ script_paths,
67
+ output_dir,
68
+ (dvc_path, params_path),
69
+ args.force,
70
+ )
71
+ if validation_message is not None:
72
+ print(f"dvcgen: error: {validation_message}", file=stderr)
73
+ return 2
74
+
75
+ try:
76
+ declarations = inspect_files(script_paths)
77
+ write_files(declarations, dvc_path=dvc_path, params_path=params_path)
78
+ except SyntaxError as syntax_error:
79
+ print(
80
+ f"dvcgen: error: failed to parse {syntax_error.filename}: {syntax_error.msg}",
81
+ file=stderr,
82
+ )
83
+ return 2
84
+ except OSError as os_error:
85
+ print(f"dvcgen: error: {os_error}", file=stderr)
86
+ return 2
87
+ except ValueError as value_error:
88
+ print(f"dvcgen: error: {value_error}", file=stderr)
89
+ return 2
90
+
91
+ print(f"Wrote {dvc_path} and {params_path}", file=stdout)
92
+ return 0
93
+
94
+
95
+ def _validation_error(
96
+ script_paths: Sequence[Path],
97
+ output_dir: Path,
98
+ output_paths: Sequence[Path],
99
+ force: bool,
100
+ ) -> Optional[str]:
101
+ for script_path in script_paths:
102
+ if not script_path.exists():
103
+ return f"input script not found: {script_path}"
104
+ if not script_path.is_file():
105
+ return f"input script is not a file: {script_path}"
106
+ if script_path.suffix != ".py":
107
+ return f"input script must be a .py file: {script_path}"
108
+
109
+ if output_dir.exists() and not output_dir.is_dir():
110
+ return f"output directory is not a directory: {output_dir}"
111
+ output_dir.mkdir(parents=True, exist_ok=True)
112
+
113
+ if not force:
114
+ existing_paths = [path for path in output_paths if path.exists()]
115
+ if existing_paths:
116
+ joined_paths = ", ".join(str(path) for path in existing_paths)
117
+ return f"refusing to overwrite existing file(s): {joined_paths}; use --force to replace them"
118
+
119
+ return None