pypdown 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pypdown-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,89 @@
1
+ Metadata-Version: 2.1
2
+ Name: pypdown
3
+ Version: 0.1.0
4
+ Summary: A Pydantic model-based approach to data pipelining with file I/O linting.
5
+ Author-Email: Louis Maddox <louismmx@gmail.com>
6
+ License: MIT
7
+ Requires-Python: >=3.12
8
+ Requires-Dist: pydantic>=2.8.2
9
+ Description-Content-Type: text/markdown
10
+
11
+ # pypdown
12
+
13
+ A Pydantic model-based approach to data pipelining with file I/O linting.
14
+
15
+ [![PyPI version](https://badge.fury.io/py/pypdown.svg)](https://badge.fury.io/py/pypdown)
16
+ [![Python Versions](https://img.shields.io/pypi/pyversions/pypdown.svg)](https://pypi.org/project/pypdown/)
17
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
18
+ [![Documentation](https://img.shields.io/badge/docs-pypdown.vercel.app-blue)](https://pypdown.vercel.app/)
19
+ [![CI Status](https://github.com/lmmx/pypdown/actions/workflows/ci.yml/badge.svg)](https://github.com/lmmx/pypdown/actions/workflows/ci.yml)
20
+ [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/lmmx/pypdown/master.svg)](https://results.pre-commit.ci/latest/github/lmmx/pypdown/master)
21
+
22
+ ## Features
23
+
24
+ - Pydantic model-based approach to data pipelining
25
+ - File I/O linting for robust pipeline execution
26
+ - Easy-to-use API for defining and running pipeline steps
27
+ - Support for callback functions and keyword argument-based file paths
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install pypdown
33
+ ```
34
+
35
+ ## Quick Start
36
+
37
+ ```python
38
+ from pypdown import run_step
39
+ from pypdown.models import Step
40
+ from pydantic import BaseModel
41
+ from pathlib import Path
42
+
43
+ class StepParams(BaseModel):
44
+ input_file: Path = "input.txt"
45
+ output_file: Path = "output.txt"
46
+ final_file: Path = "final.txt"
47
+
48
+ def process_input(input_file: Path, output_file: Path, config: StepParams):
49
+ """Process input file and create output file."""
50
+ output_file.write_text(input_file.read_text().upper())
51
+
52
+ def finalize_output(output_file: Path, final_file: Path, config: StepParams):
53
+ """Process output file and create final file."""
54
+ final_file.write_text(f"Processed: {output_file.read_text()}")
55
+
56
+ config = StepParams()
57
+
58
+ # Define your pipeline tasks
59
+ tasks = [
60
+ {
61
+ "src": {"input_file": config.input_file},
62
+ "dst": {"output_file": config.output_file},
63
+ "fn": process_input,
64
+ },
65
+ {
66
+ "src": {"output_file": config.output_file},
67
+ "dst": {"final_file": config.final_file},
68
+ "fn": finalize_output,
69
+ },
70
+ ]
71
+
72
+ # Create a Step
73
+ step = Step(name="Example Pipeline Step", tasks=tasks, config=config)
74
+
75
+ # Run the step
76
+ run_step(step)
77
+ ```
78
+
79
+ ## Documentation
80
+
81
+ For full documentation, please visit [pypdown.vercel.app](https://pypdown.vercel.app/).
82
+
83
+ ## Contributing
84
+
85
+ Contributions are welcome! Please feel free to submit a Pull Request.
86
+
87
+ ## License
88
+
89
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,79 @@
1
+ # pypdown
2
+
3
+ A Pydantic model-based approach to data pipelining with file I/O linting.
4
+
5
+ [![PyPI version](https://badge.fury.io/py/pypdown.svg)](https://badge.fury.io/py/pypdown)
6
+ [![Python Versions](https://img.shields.io/pypi/pyversions/pypdown.svg)](https://pypi.org/project/pypdown/)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
8
+ [![Documentation](https://img.shields.io/badge/docs-pypdown.vercel.app-blue)](https://pypdown.vercel.app/)
9
+ [![CI Status](https://github.com/lmmx/pypdown/actions/workflows/ci.yml/badge.svg)](https://github.com/lmmx/pypdown/actions/workflows/ci.yml)
10
+ [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/lmmx/pypdown/master.svg)](https://results.pre-commit.ci/latest/github/lmmx/pypdown/master)
11
+
12
+ ## Features
13
+
14
+ - Pydantic model-based approach to data pipelining
15
+ - File I/O linting for robust pipeline execution
16
+ - Easy-to-use API for defining and running pipeline steps
17
+ - Support for callback functions and keyword argument-based file paths
18
+
19
+ ## Installation
20
+
21
+ ```bash
22
+ pip install pypdown
23
+ ```
24
+
25
+ ## Quick Start
26
+
27
+ ```python
28
+ from pypdown import run_step
29
+ from pypdown.models import Step
30
+ from pydantic import BaseModel
31
+ from pathlib import Path
32
+
33
+ class StepParams(BaseModel):
34
+ input_file: Path = "input.txt"
35
+ output_file: Path = "output.txt"
36
+ final_file: Path = "final.txt"
37
+
38
+ def process_input(input_file: Path, output_file: Path, config: StepParams):
39
+ """Process input file and create output file."""
40
+ output_file.write_text(input_file.read_text().upper())
41
+
42
+ def finalize_output(output_file: Path, final_file: Path, config: StepParams):
43
+ """Process output file and create final file."""
44
+ final_file.write_text(f"Processed: {output_file.read_text()}")
45
+
46
+ config = StepParams()
47
+
48
+ # Define your pipeline tasks
49
+ tasks = [
50
+ {
51
+ "src": {"input_file": config.input_file},
52
+ "dst": {"output_file": config.output_file},
53
+ "fn": process_input,
54
+ },
55
+ {
56
+ "src": {"output_file": config.output_file},
57
+ "dst": {"final_file": config.final_file},
58
+ "fn": finalize_output,
59
+ },
60
+ ]
61
+
62
+ # Create a Step
63
+ step = Step(name="Example Pipeline Step", tasks=tasks, config=config)
64
+
65
+ # Run the step
66
+ run_step(step)
67
+ ```
68
+
69
+ ## Documentation
70
+
71
+ For full documentation, please visit [pypdown.vercel.app](https://pypdown.vercel.app/).
72
+
73
+ ## Contributing
74
+
75
+ Contributions are welcome! Please feel free to submit a Pull Request.
76
+
77
+ ## License
78
+
79
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,38 @@
1
+ [build-system]
2
+ requires = [
3
+ "pdm-backend",
4
+ ]
5
+ build-backend = "pdm.backend"
6
+
7
+ [project]
8
+ name = "pypdown"
9
+ version = "0.1.0"
10
+ description = "A Pydantic model-based approach to data pipelining with file I/O linting."
11
+ authors = [
12
+ { name = "Louis Maddox", email = "louismmx@gmail.com" },
13
+ ]
14
+ dependencies = [
15
+ "pydantic>=2.8.2",
16
+ ]
17
+ requires-python = ">=3.12"
18
+ readme = "README.md"
19
+
20
+ [project.license]
21
+ text = "MIT"
22
+
23
+ [tool.pdm]
24
+ distribution = true
25
+
26
+ [tool.pdm.dev-dependencies]
27
+ docs = [
28
+ "mkdocs-material[recommended,imaging]>=9.5.2",
29
+ "mkdocs-section-index>=0.3.8",
30
+ "mkdocs>=1.5.3",
31
+ "mkdocstrings[python]>=0.24.0",
32
+ ]
33
+ test = [
34
+ "pytest>=8.2.2",
35
+ ]
36
+ vercel = [
37
+ "urllib3<2",
38
+ ]
@@ -0,0 +1,3 @@
1
+ from .run import run_step
2
+
3
+ __all__ = ["run_step"]
@@ -0,0 +1,66 @@
1
+ """Pydantic models to represent the tasks within a step in a data pipeline."""
2
+
3
+ from pathlib import Path
4
+ from typing import TypeVar
5
+ from collections.abc import Callable
6
+
7
+ from pydantic import BaseModel, FilePath, NewPath, OnErrorOmit, TypeAdapter
8
+
9
+ __all__ = [
10
+ "AvailableTask",
11
+ "CompletedTask",
12
+ "Task",
13
+ "Step",
14
+ "AvailableTA",
15
+ "CompletedTA",
16
+ "RunContext",
17
+ ]
18
+
19
+
20
+ class Executable(BaseModel):
21
+ """All tasks must have an associated function to make them executable."""
22
+
23
+ fn: Callable
24
+
25
+
26
+ class AvailableTask(Executable):
27
+ """A task is available when its input files exist and its outputs don't."""
28
+
29
+ src: dict[str, FilePath]
30
+ dst: dict[str, NewPath]
31
+
32
+
33
+ class CompletedTask(Executable):
34
+ """A task is completed when its output files exist, whether inputs exist or not."""
35
+
36
+ src: dict[str, Path]
37
+ dst: dict[str, FilePath]
38
+
39
+
40
+ class Task(Executable):
41
+ """A task has zero or more input files and zero or more output files."""
42
+
43
+ src: dict[str, Path]
44
+ dst: dict[str, Path]
45
+
46
+
47
+ C = TypeVar("C", bound=BaseModel)
48
+
49
+
50
+ class Step(BaseModel):
51
+ """A named step in a data pipeline, split up into tasks with specified file I/O."""
52
+
53
+ name: str
54
+ tasks: list[Task]
55
+ config: C
56
+
57
+
58
+ AvailableTA = TypeAdapter(list[OnErrorOmit[AvailableTask]])
59
+ CompletedTA = TypeAdapter(list[OnErrorOmit[CompletedTask]])
60
+
61
+
62
+ class RunContext(BaseModel):
63
+ """The context available to a task runner."""
64
+
65
+ step: Step
66
+ idx: int
@@ -0,0 +1,55 @@
1
+ """Control flow using the Pydantic runtime file I/O checks."""
2
+
3
+ from .models import AvailableTA, AvailableTask, CompletedTA, Step, RunContext
4
+
5
+ __all__ = ["run_step"]
6
+
7
+
8
+ def task_runner(task: AvailableTask, context: RunContext) -> None:
9
+ print(f"Hello world {task.model_dump(mode='json', exclude='fn')}")
10
+ task.fn.__call__(**task.src, **task.dst, config=context.step.config)
11
+
12
+
13
+ def run_step(step: Step):
14
+ """Run a pipeline step's tasks based on the availability of task files.
15
+
16
+ Tasks are iterated through, and the relevant in/output files' existence existence
17
+ is checked when the task is reached in the loop (rather than at the start). This
18
+ means that intermediate files can be created by tasks, and their existence will be
19
+ checked when those output files become inputs to subsequent tasks.
20
+
21
+ If any task's required input files are missing, the step bails out: no further tasks
22
+ will run.
23
+ """
24
+ if step.tasks:
25
+ print(f"Running step {step.name!r} with {len(step.tasks)} tasks")
26
+ else:
27
+ raise ValueError("No tasks were assigned")
28
+
29
+ bail = False
30
+ for idx, task in enumerate(step.tasks):
31
+ if idx > 0 and not bail:
32
+ prev_task = step.tasks[idx - 1]
33
+ prev_completed = CompletedTA.validate_python([prev_task.model_dump()])
34
+ if not prev_completed:
35
+ bail = True
36
+ print("(!) Incomplete previous task detected, bailing")
37
+ task_repr = " --> ".join(
38
+ map(str, (task.model_dump(include=["src", "dst"], mode="json").values())),
39
+ )
40
+ print(f"\n--- Task {idx + 1} --- {task_repr}")
41
+ if bail:
42
+ print("(-) Bailing out of step, skipping task")
43
+ continue
44
+
45
+ available = AvailableTA.validate_python([task.model_dump()])
46
+ completed = CompletedTA.validate_python([task.model_dump()])
47
+
48
+ if available:
49
+ print("\033[92;1m>>>\033[0m Running available task")
50
+ task_runner(task=task, context=RunContext(step=step, idx=idx))
51
+ elif completed:
52
+ print("(x) Task already completed, skipping")
53
+ else:
54
+ print("(!) Task requisite missing, bailing")
55
+ bail = True
File without changes
@@ -0,0 +1,76 @@
1
+ from pypdown import run_step
2
+ from pypdown.models import Step
3
+ from pydantic import BaseModel
4
+ from pathlib import Path
5
+
6
+
7
+ def test_long_example():
8
+ class StepParams(BaseModel):
9
+ n1_o: Path = "nil1.out"
10
+ n2_o: Path = "nil2.out"
11
+ a_i: Path = "a.in"
12
+ a_o: Path = "a.out"
13
+ b_o: Path = "b.out"
14
+ c_o: Path = "c.out"
15
+ d_i: Path = "d.in"
16
+ d_o: Path = "d.out"
17
+ e_i: Path = "e.in"
18
+ e_o: Path = "e.out"
19
+
20
+ config = StepParams()
21
+
22
+ def cb_n1(n1_o: Path, config: StepParams):
23
+ n1_o.touch()
24
+ print(f"Touched {n1_o=}")
25
+
26
+ def cb_a(a_i: Path, a_o: Path, config: StepParams):
27
+ assert a_i.exists()
28
+ a_o.touch()
29
+ print(f"Touched {a_o=}")
30
+
31
+ def cb_b(a_o: Path, b_o: Path, config: StepParams):
32
+ assert a_o.exists()
33
+ b_o.touch()
34
+ print(f"Touched {b_o=}")
35
+
36
+ def cb_c(a_o: Path, b_o: Path, c_o: Path, config: StepParams):
37
+ assert a_o.exists() and b_o.exists()
38
+ c_o.touch()
39
+ print(f"Touched {c_o=}")
40
+
41
+ def cb_d(d_i: Path, d_o: Path, config: StepParams):
42
+ assert d_i.exists()
43
+ d_o.touch()
44
+ print(f"Touched {d_o=}")
45
+
46
+ def cb_e(e_i: Path, e_o: Path, config: StepParams):
47
+ assert e_i.exists()
48
+ e_o.touch()
49
+ print(f"Touched {e_o=}")
50
+
51
+ def cb_n2(n2_o: Path, config: StepParams):
52
+ n2_o.touch()
53
+ print(f"Touched {n2_o=}")
54
+
55
+ task_fields = [
56
+ ([], ["n1_o"], cb_n1),
57
+ (["a_i"], ["a_o"], cb_a),
58
+ (["a_o"], ["b_o"], cb_b),
59
+ (["a_o", "b_o"], ["c_o"], cb_c),
60
+ (["d_i"], ["d_o"], cb_d),
61
+ (["e_i"], ["e_o"], cb_e),
62
+ ([], ["n2_o"], cb_n2),
63
+ ]
64
+
65
+ # Turn the in/output lists into dicts keyed by config field name with filename values
66
+ tasks = [
67
+ {
68
+ "src": {field: getattr(config, field) for field in inputs},
69
+ "dst": {field: getattr(config, field) for field in outputs},
70
+ "fn": func,
71
+ }
72
+ for inputs, outputs, func in task_fields
73
+ ]
74
+
75
+ step = Step(name="Demo Step", tasks=tasks, config=config)
76
+ run_step(step)
@@ -0,0 +1,2 @@
1
+ def test_package():
2
+ pass
@@ -0,0 +1,2 @@
1
+ mypy
2
+ pytest
@@ -0,0 +1,43 @@
1
+ from pypdown import run_step
2
+ from pypdown.models import Step
3
+ from pydantic import BaseModel
4
+ from pathlib import Path
5
+
6
+
7
+ def test_simple_example():
8
+ class StepParams(BaseModel):
9
+ a1_i: Path = "a1.in"
10
+ a2_i: Path = "a2.in"
11
+ a_o: Path = "a.out"
12
+ b_i: Path = "b.in"
13
+ b_o: Path = "b.out"
14
+
15
+ def cb_a(a1_i: Path, a2_i: Path, a_o: Path, config: StepParams):
16
+ assert a1_i.exists() and a2_i.exists()
17
+ a_o.touch()
18
+ print(f"Touched {a_o=}")
19
+
20
+ def cb_b(a_o: Path, b_i: Path, b_o: Path, config: StepParams):
21
+ assert a_o.exists() and b_i.exists()
22
+ b_o.touch()
23
+ print(f"Touched {b_o=}")
24
+
25
+ task_fields = [
26
+ (["a1_i", "a2_i"], ["a_o"], cb_a),
27
+ (["a_o", "b_i"], ["b_o"], cb_b),
28
+ ]
29
+
30
+ config = StepParams()
31
+
32
+ # Turn the in/output lists into dicts keyed by config field name with filename values
33
+ tasks = [
34
+ {
35
+ "src": {field: getattr(config, field) for field in inputs},
36
+ "dst": {field: getattr(config, field) for field in outputs},
37
+ "fn": func,
38
+ }
39
+ for inputs, outputs, func in task_fields
40
+ ]
41
+
42
+ step = Step(name="Demo Step", tasks=tasks, config=config)
43
+ run_step(step)