dvcgen 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dvcgen-0.2.0/.github/workflows/ci.yml +60 -0
- dvcgen-0.2.0/.gitignore +8 -0
- dvcgen-0.2.0/PKG-INFO +175 -0
- dvcgen-0.2.0/README.md +155 -0
- dvcgen-0.2.0/pyproject.toml +42 -0
- dvcgen-0.2.0/src/dvcgen/__init__.py +21 -0
- dvcgen-0.2.0/src/dvcgen/cli.py +119 -0
- dvcgen-0.2.0/src/dvcgen/generate.py +145 -0
- dvcgen-0.2.0/src/dvcgen/inspect.py +160 -0
- dvcgen-0.2.0/tests/test_generate.py +347 -0
- dvcgen-0.2.0/tests/test_inspect.py +178 -0
- dvcgen-0.2.0/tests/test_runtime_api.py +18 -0
- dvcgen-0.2.0/uv.lock +930 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
pull_request:
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
test:
|
|
9
|
+
name: Test Python ${{ matrix.python-version }}
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
strategy:
|
|
12
|
+
fail-fast: false
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- name: Check out repository
|
|
18
|
+
uses: actions/checkout@v6
|
|
19
|
+
|
|
20
|
+
- name: Install uv
|
|
21
|
+
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
|
|
22
|
+
with:
|
|
23
|
+
enable-cache: true
|
|
24
|
+
|
|
25
|
+
- name: Install Python
|
|
26
|
+
run: uv python install ${{ matrix.python-version }}
|
|
27
|
+
|
|
28
|
+
- name: Install dependencies
|
|
29
|
+
run: uv sync --locked --dev
|
|
30
|
+
|
|
31
|
+
- name: Run tests
|
|
32
|
+
run: uv run pytest
|
|
33
|
+
|
|
34
|
+
quality:
|
|
35
|
+
name: Quality and package
|
|
36
|
+
runs-on: ubuntu-latest
|
|
37
|
+
|
|
38
|
+
steps:
|
|
39
|
+
- name: Check out repository
|
|
40
|
+
uses: actions/checkout@v6
|
|
41
|
+
|
|
42
|
+
- name: Install uv
|
|
43
|
+
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
|
|
44
|
+
with:
|
|
45
|
+
enable-cache: true
|
|
46
|
+
|
|
47
|
+
- name: Install Python
|
|
48
|
+
run: uv python install 3.12
|
|
49
|
+
|
|
50
|
+
- name: Install dependencies
|
|
51
|
+
run: uv sync --locked --dev
|
|
52
|
+
|
|
53
|
+
- name: Run ruff
|
|
54
|
+
run: uv run ruff check .
|
|
55
|
+
|
|
56
|
+
- name: Run mypy
|
|
57
|
+
run: uv run mypy src tests
|
|
58
|
+
|
|
59
|
+
- name: Build package
|
|
60
|
+
run: uv build
|
dvcgen-0.2.0/.gitignore
ADDED
dvcgen-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dvcgen
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Generate DVC pipeline files from Python declarations.
|
|
5
|
+
Author: pillyshi
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: dvc,params,pipeline,yaml
|
|
8
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
9
|
+
Classifier: Environment :: Console
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Software Development :: Build Tools
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# dvcgen
|
|
22
|
+
|
|
23
|
+
Write your DVC pipeline once, in Python.
|
|
24
|
+
|
|
25
|
+
`dvcgen` is an early-stage command-line tool for generating DVC pipeline files
|
|
26
|
+
from lightweight declarations embedded in Python pipeline scripts.
|
|
27
|
+
|
|
28
|
+
## Current Status
|
|
29
|
+
|
|
30
|
+
Implemented:
|
|
31
|
+
|
|
32
|
+
- A Python package named `dvcgen`
|
|
33
|
+
- A `dvcgen` console command
|
|
34
|
+
- CLI argument parsing for pipeline script paths
|
|
35
|
+
- CLI input validation and overwrite protection
|
|
36
|
+
- Public declaration helpers: `dep()`, `out()`, and `param()`
|
|
37
|
+
- Python script inspection for top-level literal declarations
|
|
38
|
+
- `dvc.yaml` generation
|
|
39
|
+
- `params.yaml` generation
|
|
40
|
+
|
|
41
|
+
## Installation
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
uv tool install dvcgen
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Or run without installing:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
uvx dvcgen --help
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Usage
|
|
54
|
+
|
|
55
|
+
Show CLI help:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
dvcgen --help
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Generate DVC files from one or more Python pipeline scripts:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
dvcgen pipeline/*.py
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
The command writes `dvc.yaml` and `params.yaml` in the current directory.
|
|
68
|
+
Stage names are derived from input Python filenames. For example,
|
|
69
|
+
`pipeline/train.py` becomes the `train` stage.
|
|
70
|
+
|
|
71
|
+
By default, `dvcgen` refuses to overwrite existing `dvc.yaml` or `params.yaml`
|
|
72
|
+
files. Use `--force` when you intentionally want to replace them:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
dvcgen --force pipeline/*.py
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Write files to another directory with `--output-dir`:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
dvcgen --output-dir generated pipeline/*.py
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Bad inputs fail with an error message and a non-zero exit code. Successful runs
|
|
85
|
+
print the files that were written.
|
|
86
|
+
|
|
87
|
+
Inspect declarations from Python without executing the pipeline script:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from dvcgen.inspect import inspect_file
|
|
91
|
+
|
|
92
|
+
declarations = inspect_file("pipeline/train.py")
|
|
93
|
+
print(declarations.deps)
|
|
94
|
+
print(declarations.outs)
|
|
95
|
+
print(declarations.params)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Release
|
|
99
|
+
|
|
100
|
+
Publishing is intentionally manual while the project is early stage. Build and
|
|
101
|
+
validate artifacts before uploading anything:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
uv run python -m build
|
|
105
|
+
uv run twine check dist/*
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Use TestPyPI first when rehearsing a release. Create a TestPyPI API token, then
|
|
109
|
+
upload with the token as the password:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
uv run twine upload --repository testpypi dist/*
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Use the production PyPI repository only when the version, changelog, and package
|
|
116
|
+
name decision are ready:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
uv run twine upload dist/*
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
For both repositories, use `__token__` as the username and the repository API
|
|
123
|
+
token as the password. Avoid committing tokens or storing them in project files.
|
|
124
|
+
|
|
125
|
+
Before the first production upload, decide whether to publish the current
|
|
126
|
+
minimal release to reserve the `dvcgen` package name on PyPI. Once a version is
|
|
127
|
+
uploaded to PyPI or TestPyPI, that exact version cannot be uploaded again; bump
|
|
128
|
+
the version before retrying with changed artifacts.
|
|
129
|
+
|
|
130
|
+
## Planned MVP
|
|
131
|
+
|
|
132
|
+
The intended MVP is:
|
|
133
|
+
|
|
134
|
+
1. Pipeline scripts declare dependencies, outputs, and parameters in Python.
|
|
135
|
+
2. `dvcgen` inspects those declarations without executing the scripts.
|
|
136
|
+
3. `dvcgen` writes `dvc.yaml` and `params.yaml`.
|
|
137
|
+
|
|
138
|
+
Example API:
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from dvcgen import dep, out, param
|
|
142
|
+
|
|
143
|
+
TRAIN_DATA = dep("data/processed.csv")
|
|
144
|
+
MODEL = out("models/model.pkl")
|
|
145
|
+
|
|
146
|
+
LR = param("train.lr", 0.001)
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Running:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
dvcgen pipeline/train.py
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
Generates `dvc.yaml`:
|
|
156
|
+
|
|
157
|
+
```yaml
|
|
158
|
+
"stages":
|
|
159
|
+
"train":
|
|
160
|
+
"cmd": "python pipeline/train.py"
|
|
161
|
+
"deps":
|
|
162
|
+
- "pipeline/train.py"
|
|
163
|
+
- "data/processed.csv"
|
|
164
|
+
"outs":
|
|
165
|
+
- "models/model.pkl"
|
|
166
|
+
"params":
|
|
167
|
+
- "train.lr"
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
And `params.yaml`:
|
|
171
|
+
|
|
172
|
+
```yaml
|
|
173
|
+
"train":
|
|
174
|
+
"lr": 0.001
|
|
175
|
+
```
|
dvcgen-0.2.0/README.md
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# dvcgen
|
|
2
|
+
|
|
3
|
+
Write your DVC pipeline once, in Python.
|
|
4
|
+
|
|
5
|
+
`dvcgen` is an early-stage command-line tool for generating DVC pipeline files
|
|
6
|
+
from lightweight declarations embedded in Python pipeline scripts.
|
|
7
|
+
|
|
8
|
+
## Current Status
|
|
9
|
+
|
|
10
|
+
Implemented:
|
|
11
|
+
|
|
12
|
+
- A Python package named `dvcgen`
|
|
13
|
+
- A `dvcgen` console command
|
|
14
|
+
- CLI argument parsing for pipeline script paths
|
|
15
|
+
- CLI input validation and overwrite protection
|
|
16
|
+
- Public declaration helpers: `dep()`, `out()`, and `param()`
|
|
17
|
+
- Python script inspection for top-level literal declarations
|
|
18
|
+
- `dvc.yaml` generation
|
|
19
|
+
- `params.yaml` generation
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
uv tool install dvcgen
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Or run without installing:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
uvx dvcgen --help
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
Show CLI help:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
dvcgen --help
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Generate DVC files from one or more Python pipeline scripts:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
dvcgen pipeline/*.py
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
The command writes `dvc.yaml` and `params.yaml` in the current directory.
|
|
48
|
+
Stage names are derived from input Python filenames. For example,
|
|
49
|
+
`pipeline/train.py` becomes the `train` stage.
|
|
50
|
+
|
|
51
|
+
By default, `dvcgen` refuses to overwrite existing `dvc.yaml` or `params.yaml`
|
|
52
|
+
files. Use `--force` when you intentionally want to replace them:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
dvcgen --force pipeline/*.py
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Write files to another directory with `--output-dir`:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
dvcgen --output-dir generated pipeline/*.py
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Bad inputs fail with an error message and a non-zero exit code. Successful runs
|
|
65
|
+
print the files that were written.
|
|
66
|
+
|
|
67
|
+
Inspect declarations from Python without executing the pipeline script:
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from dvcgen.inspect import inspect_file
|
|
71
|
+
|
|
72
|
+
declarations = inspect_file("pipeline/train.py")
|
|
73
|
+
print(declarations.deps)
|
|
74
|
+
print(declarations.outs)
|
|
75
|
+
print(declarations.params)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Release
|
|
79
|
+
|
|
80
|
+
Publishing is intentionally manual while the project is early stage. Build and
|
|
81
|
+
validate artifacts before uploading anything:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
uv run python -m build
|
|
85
|
+
uv run twine check dist/*
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Use TestPyPI first when rehearsing a release. Create a TestPyPI API token, then
|
|
89
|
+
upload with the token as the password:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
uv run twine upload --repository testpypi dist/*
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Use the production PyPI repository only when the version, changelog, and package
|
|
96
|
+
name decision are ready:
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
uv run twine upload dist/*
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
For both repositories, use `__token__` as the username and the repository API
|
|
103
|
+
token as the password. Avoid committing tokens or storing them in project files.
|
|
104
|
+
|
|
105
|
+
Before the first production upload, decide whether to publish the current
|
|
106
|
+
minimal release to reserve the `dvcgen` package name on PyPI. Once a version is
|
|
107
|
+
uploaded to PyPI or TestPyPI, that exact version cannot be uploaded again; bump
|
|
108
|
+
the version before retrying with changed artifacts.
|
|
109
|
+
|
|
110
|
+
## Planned MVP
|
|
111
|
+
|
|
112
|
+
The intended MVP is:
|
|
113
|
+
|
|
114
|
+
1. Pipeline scripts declare dependencies, outputs, and parameters in Python.
|
|
115
|
+
2. `dvcgen` inspects those declarations without executing the scripts.
|
|
116
|
+
3. `dvcgen` writes `dvc.yaml` and `params.yaml`.
|
|
117
|
+
|
|
118
|
+
Example API:
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from dvcgen import dep, out, param
|
|
122
|
+
|
|
123
|
+
TRAIN_DATA = dep("data/processed.csv")
|
|
124
|
+
MODEL = out("models/model.pkl")
|
|
125
|
+
|
|
126
|
+
LR = param("train.lr", 0.001)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Running:
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
dvcgen pipeline/train.py
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Generates `dvc.yaml`:
|
|
136
|
+
|
|
137
|
+
```yaml
|
|
138
|
+
"stages":
|
|
139
|
+
"train":
|
|
140
|
+
"cmd": "python pipeline/train.py"
|
|
141
|
+
"deps":
|
|
142
|
+
- "pipeline/train.py"
|
|
143
|
+
- "data/processed.csv"
|
|
144
|
+
"outs":
|
|
145
|
+
- "models/model.pkl"
|
|
146
|
+
"params":
|
|
147
|
+
- "train.lr"
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
And `params.yaml`:
|
|
151
|
+
|
|
152
|
+
```yaml
|
|
153
|
+
"train":
|
|
154
|
+
"lr": 0.001
|
|
155
|
+
```
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "dvcgen"
|
|
7
|
+
version = "0.2.0"
|
|
8
|
+
description = "Generate DVC pipeline files from Python declarations."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "pillyshi" },
|
|
14
|
+
]
|
|
15
|
+
keywords = ["dvc", "pipeline", "params", "yaml"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 2 - Pre-Alpha",
|
|
18
|
+
"Environment :: Console",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Programming Language :: Python :: 3.13",
|
|
26
|
+
"Topic :: Software Development :: Build Tools",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.scripts]
|
|
30
|
+
dvcgen = "dvcgen.cli:main"
|
|
31
|
+
|
|
32
|
+
[tool.hatch.build.targets.wheel]
|
|
33
|
+
packages = ["src/dvcgen"]
|
|
34
|
+
|
|
35
|
+
[dependency-groups]
|
|
36
|
+
dev = [
|
|
37
|
+
"build>=1.5.0",
|
|
38
|
+
"mypy>=1.19.1",
|
|
39
|
+
"pytest>=8.4.2",
|
|
40
|
+
"ruff>=0.15.15",
|
|
41
|
+
"twine>=6.2.0",
|
|
42
|
+
]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Generate DVC pipeline files from Python declarations."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def dep(path):
|
|
7
|
+
"""Declare a pipeline dependency and return its runtime value."""
|
|
8
|
+
return path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def out(path):
|
|
12
|
+
"""Declare a pipeline output and return its runtime value."""
|
|
13
|
+
return path
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def param(name, default):
|
|
17
|
+
"""Declare a pipeline parameter and return its default runtime value."""
|
|
18
|
+
return default
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
__all__ = ["__version__", "dep", "out", "param"]
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Command-line interface for dvcgen."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
from collections.abc import Sequence
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
import sys
|
|
9
|
+
from typing import Optional, TextIO
|
|
10
|
+
|
|
11
|
+
from dvcgen import __version__
|
|
12
|
+
from dvcgen.generate import write_files
|
|
13
|
+
from dvcgen.inspect import inspect_files
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
17
|
+
parser = argparse.ArgumentParser(
|
|
18
|
+
prog="dvcgen",
|
|
19
|
+
description="Generate dvc.yaml and params.yaml from Python pipeline scripts.",
|
|
20
|
+
)
|
|
21
|
+
parser.add_argument(
|
|
22
|
+
"scripts",
|
|
23
|
+
nargs="*",
|
|
24
|
+
help="Python pipeline scripts to inspect.",
|
|
25
|
+
)
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"-o",
|
|
28
|
+
"--output-dir",
|
|
29
|
+
default=".",
|
|
30
|
+
help="Directory where dvc.yaml and params.yaml are written. Defaults to the current directory.",
|
|
31
|
+
)
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"-f",
|
|
34
|
+
"--force",
|
|
35
|
+
action="store_true",
|
|
36
|
+
help="Overwrite existing dvc.yaml and params.yaml files.",
|
|
37
|
+
)
|
|
38
|
+
parser.add_argument(
|
|
39
|
+
"--version",
|
|
40
|
+
action="version",
|
|
41
|
+
version=f"%(prog)s {__version__}",
|
|
42
|
+
)
|
|
43
|
+
return parser
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def main(
|
|
47
|
+
argv: Optional[Sequence[str]] = None,
|
|
48
|
+
stdout: Optional[TextIO] = None,
|
|
49
|
+
stderr: Optional[TextIO] = None,
|
|
50
|
+
) -> int:
|
|
51
|
+
stdout = sys.stdout if stdout is None else stdout
|
|
52
|
+
stderr = sys.stderr if stderr is None else stderr
|
|
53
|
+
parser = build_parser()
|
|
54
|
+
args = parser.parse_args(argv)
|
|
55
|
+
if not args.scripts:
|
|
56
|
+
print("dvcgen: error: provide at least one Python pipeline script", file=stderr)
|
|
57
|
+
print("Try 'dvcgen --help' for usage.", file=stderr)
|
|
58
|
+
return 2
|
|
59
|
+
|
|
60
|
+
script_paths = tuple(Path(script) for script in args.scripts)
|
|
61
|
+
output_dir = Path(args.output_dir)
|
|
62
|
+
dvc_path = output_dir / "dvc.yaml"
|
|
63
|
+
params_path = output_dir / "params.yaml"
|
|
64
|
+
|
|
65
|
+
validation_message = _validation_error(
|
|
66
|
+
script_paths,
|
|
67
|
+
output_dir,
|
|
68
|
+
(dvc_path, params_path),
|
|
69
|
+
args.force,
|
|
70
|
+
)
|
|
71
|
+
if validation_message is not None:
|
|
72
|
+
print(f"dvcgen: error: {validation_message}", file=stderr)
|
|
73
|
+
return 2
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
declarations = inspect_files(script_paths)
|
|
77
|
+
write_files(declarations, dvc_path=dvc_path, params_path=params_path)
|
|
78
|
+
except SyntaxError as syntax_error:
|
|
79
|
+
print(
|
|
80
|
+
f"dvcgen: error: failed to parse {syntax_error.filename}: {syntax_error.msg}",
|
|
81
|
+
file=stderr,
|
|
82
|
+
)
|
|
83
|
+
return 2
|
|
84
|
+
except OSError as os_error:
|
|
85
|
+
print(f"dvcgen: error: {os_error}", file=stderr)
|
|
86
|
+
return 2
|
|
87
|
+
except ValueError as value_error:
|
|
88
|
+
print(f"dvcgen: error: {value_error}", file=stderr)
|
|
89
|
+
return 2
|
|
90
|
+
|
|
91
|
+
print(f"Wrote {dvc_path} and {params_path}", file=stdout)
|
|
92
|
+
return 0
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _validation_error(
|
|
96
|
+
script_paths: Sequence[Path],
|
|
97
|
+
output_dir: Path,
|
|
98
|
+
output_paths: Sequence[Path],
|
|
99
|
+
force: bool,
|
|
100
|
+
) -> Optional[str]:
|
|
101
|
+
for script_path in script_paths:
|
|
102
|
+
if not script_path.exists():
|
|
103
|
+
return f"input script not found: {script_path}"
|
|
104
|
+
if not script_path.is_file():
|
|
105
|
+
return f"input script is not a file: {script_path}"
|
|
106
|
+
if script_path.suffix != ".py":
|
|
107
|
+
return f"input script must be a .py file: {script_path}"
|
|
108
|
+
|
|
109
|
+
if output_dir.exists() and not output_dir.is_dir():
|
|
110
|
+
return f"output directory is not a directory: {output_dir}"
|
|
111
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
112
|
+
|
|
113
|
+
if not force:
|
|
114
|
+
existing_paths = [path for path in output_paths if path.exists()]
|
|
115
|
+
if existing_paths:
|
|
116
|
+
joined_paths = ", ".join(str(path) for path in existing_paths)
|
|
117
|
+
return f"refusing to overwrite existing file(s): {joined_paths}; use --force to replace them"
|
|
118
|
+
|
|
119
|
+
return None
|