PyPI - pypelite - Versions diffs - 0.1.0__tar.gz - Mend

pypelite 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

pypelite-0.1.0/LICENSE +21 -0
pypelite-0.1.0/PKG-INFO +136 -0
pypelite-0.1.0/README.md +118 -0
pypelite-0.1.0/pyproject.toml +30 -0
pypelite-0.1.0/setup.cfg +4 -0
pypelite-0.1.0/src/pypelite/__init__.py +326 -0
pypelite-0.1.0/src/pypelite.egg-info/PKG-INFO +136 -0
pypelite-0.1.0/src/pypelite.egg-info/SOURCES.txt +9 -0
pypelite-0.1.0/src/pypelite.egg-info/dependency_links.txt +1 -0
pypelite-0.1.0/src/pypelite.egg-info/top_level.txt +1 -0
pypelite-0.1.0/tests/test_pipeline.py +152 -0

pypelite-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 pypelite contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

pypelite-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,136 @@
+Metadata-Version: 2.4
+Name: pypelite
+Version: 0.1.0
+Summary: Tiny pipeline library for ordinary Python scripts.
+Author: pypelite contributors
+License-Expression: MIT
+Keywords: pipeline,cache,workflow,data
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Dynamic: license-file
+# pypelite
+_Pypelite is a tiny pipeline library for ordinary Python scripts._
+```python
+import pypelite
+@pypelite.stage("load", batch="symbols", batch_size=50, workers=4)
+def load_prices(symbols):
+    return market_api.fetch_prices(symbols)
+@pypelite.stage("features")
+def build_features(prices_df):
+    return make_model_features(prices_df)
+@pypelite.stage("train")
+def train_model(features_df):
+    return fit_price_model(features_df)
+with pypelite.pipeline("runs/price-model"):
+    prices_df = load_prices(["AAPL", "MSFT", "NVDA"])
+    features_df = build_features(prices_df)
+    model = train_model(features_df)
+```
+Pipelines are resumable: completed stages load from disk, and selected stages
+can be refreshed when they need to run again. No DSL, no DAG boilerplate, no
+Airflow deployment. The Python code is the pipeline.
+## Installation
+```sh
+pip install pypelite
+```
+## Refresh, Skip, Clean
+Control a run from the pipeline context.
+```python
+with pypelite.pipeline(
+    "runs/experiment",
+    refresh=["features"],
+    skip=["train"],
+    clean=["predict"],
+    until="features",
+):
+    run_price_model()
+```
+- `refresh` recomputes named stages touched by the run.
+- `skip` returns a stage's `skip_value` without touching its cache.
+- `clean` removes old keyed artifacts not touched by a successful run.
+- `until` stops after the named stage completes.
+## Advanced Features
+### Item Caches
+Use `key` when each item should have its own saved result.
+```python
+@pypelite.stage("predict", key="symbol")
+def predict_price(symbol, feature_row):
+    return model.predict(feature_row)
+with pypelite.pipeline("runs/predictions"):
+    for symbol, feature_row in features_df.iterrows():
+        predict_price(symbol, feature_row)
+```
+### Fanout
+Use `workers` when fanout should run in parallel.
+```python
+@pypelite.stage("features", key="symbol", batch="symbols", workers=4)
+def build_symbol_features(symbols):
+    prices_df = market_api.fetch_prices(symbols)
+    return make_model_features_by_symbol(prices_df)
+with pypelite.pipeline("runs/features"):
+    feature_rows = build_symbol_features(["AAPL", "MSFT", "NVDA"])
+```
+### Batches
+Use `batch` when work should run in chunks.
+```python
+@pypelite.stage("predict", key="symbol", batch="rows", batch_size=200)
+def predict_prices(rows):
+    return model_api.batch_predict(rows)
+with pypelite.pipeline("runs/predictions"):
+    predictions = predict_prices(feature_rows)
+```
+Batch stages can run chunks in parallel with `workers`. Unkeyed batches
+assemble one final artifact; keyed batches save one result per key.
+### Shared Archives
+Stages can choose named archives.
+```python
+@pypelite.stage("prices", archive="market", key=("date", "symbol"))
+def prices(date, symbol):
+    return market_api.price(date, symbol)
+with pypelite.pipeline(
+    archives={"default": "runs/model", "market": "archive/market"},
+):
+    run_price_model()
+```
+Shared archives make it easy for many experiments to reuse the same market
+data while keeping model outputs in their own run directories.

pypelite-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,118 @@
+# pypelite
+_Pypelite is a tiny pipeline library for ordinary Python scripts._
+```python
+import pypelite
+@pypelite.stage("load", batch="symbols", batch_size=50, workers=4)
+def load_prices(symbols):
+    return market_api.fetch_prices(symbols)
+@pypelite.stage("features")
+def build_features(prices_df):
+    return make_model_features(prices_df)
+@pypelite.stage("train")
+def train_model(features_df):
+    return fit_price_model(features_df)
+with pypelite.pipeline("runs/price-model"):
+    prices_df = load_prices(["AAPL", "MSFT", "NVDA"])
+    features_df = build_features(prices_df)
+    model = train_model(features_df)
+```
+Pipelines are resumable: completed stages load from disk, and selected stages
+can be refreshed when they need to run again. No DSL, no DAG boilerplate, no
+Airflow deployment. The Python code is the pipeline.
+## Installation
+```sh
+pip install pypelite
+```
+## Refresh, Skip, Clean
+Control a run from the pipeline context.
+```python
+with pypelite.pipeline(
+    "runs/experiment",
+    refresh=["features"],
+    skip=["train"],
+    clean=["predict"],
+    until="features",
+):
+    run_price_model()
+```
+- `refresh` recomputes named stages touched by the run.
+- `skip` returns a stage's `skip_value` without touching its cache.
+- `clean` removes old keyed artifacts not touched by a successful run.
+- `until` stops after the named stage completes.
+## Advanced Features
+### Item Caches
+Use `key` when each item should have its own saved result.
+```python
+@pypelite.stage("predict", key="symbol")
+def predict_price(symbol, feature_row):
+    return model.predict(feature_row)
+with pypelite.pipeline("runs/predictions"):
+    for symbol, feature_row in features_df.iterrows():
+        predict_price(symbol, feature_row)
+```
+### Fanout
+Use `workers` when fanout should run in parallel.
+```python
+@pypelite.stage("features", key="symbol", batch="symbols", workers=4)
+def build_symbol_features(symbols):
+    prices_df = market_api.fetch_prices(symbols)
+    return make_model_features_by_symbol(prices_df)
+with pypelite.pipeline("runs/features"):
+    feature_rows = build_symbol_features(["AAPL", "MSFT", "NVDA"])
+```
+### Batches
+Use `batch` when work should run in chunks.
+```python
+@pypelite.stage("predict", key="symbol", batch="rows", batch_size=200)
+def predict_prices(rows):
+    return model_api.batch_predict(rows)
+with pypelite.pipeline("runs/predictions"):
+    predictions = predict_prices(feature_rows)
+```
+Batch stages can run chunks in parallel with `workers`. Unkeyed batches
+assemble one final artifact; keyed batches save one result per key.
+### Shared Archives
+Stages can choose named archives.
+```python
+@pypelite.stage("prices", archive="market", key=("date", "symbol"))
+def prices(date, symbol):
+    return market_api.price(date, symbol)
+with pypelite.pipeline(
+    archives={"default": "runs/model", "market": "archive/market"},
+):
+    run_price_model()
+```
+Shared archives make it easy for many experiments to reuse the same market
+data while keeping model outputs in their own run directories.

pypelite-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,30 @@
+[build-system]
+requires = ["setuptools>=77", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "pypelite"
+version = "0.1.0"
+description = "Tiny pipeline library for ordinary Python scripts."
+readme = "README.md"
+requires-python = ">=3.12"
+license = "MIT"
+license-files = ["LICENSE"]
+authors = [{ name = "pypelite contributors" }]
+dependencies = []
+keywords = ["pipeline", "cache", "workflow", "data"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+[tool.setuptools.packages.find]
+where = ["src"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+pythonpath = ["src"]

pypelite-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

pypelite-0.1.0/src/pypelite/__init__.py ADDED Viewed

@@ -0,0 +1,326 @@
+"""Tiny pipeline library for ordinary Python scripts."""
+import concurrent.futures
+import contextlib
+import contextvars
+import dataclasses
+import enum
+import functools
+import hashlib
+import inspect
+import itertools
+import pathlib
+import pickle
+import shutil
+import tempfile
+__version__ = "0.1.0"
+__all__ = ["MultiprocessingType", "pipeline", "stage"]
+_ACTIVE_PIPELINE = contextvars.ContextVar("pypelite_active_pipeline")
+class _PipelineComplete(Exception):
+    pass
+class MultiprocessingType(enum.Enum):
+    THREAD = "thread"
+    PROCESS = "process"
+    def pool(self, workers):
+        if self == self.THREAD:
+            return concurrent.futures.ThreadPoolExecutor(workers)
+        return concurrent.futures.ProcessPoolExecutor(workers)
+@dataclasses.dataclass
+class StageState:
+    stage: object
+    path: pathlib.Path
+    refresh: bool
+    touched: set
+    def artifact_dir(self, key):
+        return self.path / hashlib.sha256(pickle.dumps(key)).hexdigest()
+def _names(value):
+    return (
+        set()
+        if value is None
+        else {value} if isinstance(value, str) else set(value)
+    )
+def _dump(path, value):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with tempfile.NamedTemporaryFile(delete=False, dir=path.parent) as f:
+        pickle.dump(value, f, protocol=pickle.HIGHEST_PROTOCOL)
+        temp_path = f.name
+    pathlib.Path(temp_path).replace(path)
+class Pipeline:
+    def __init__(self, path=None, **config):
+        archives = config.get("archives") or {"default": path}
+        if not isinstance(archives, dict):
+            archives = {"default": archives}
+        self.archives = {
+            name: pathlib.Path(path) for name, path in archives.items()
+        }
+        self.refresh = _names(config.get("refresh"))
+        self.clean = _names(config.get("clean"))
+        self.skip = _names(config.get("skip"))
+        self.until = config.get("until")
+        self.stage_state = {}
+        for path in self.archives.values():
+            path.mkdir(parents=True, exist_ok=True)
+    def state_for(self, stage):
+        if state := self.stage_state.get(stage.name):
+            return state
+        self.stage_state[stage.name] = StageState(
+            stage,
+            self.archives[stage.archive] / stage.name,
+            "all" in self.refresh or stage.name in self.refresh,
+            set(),
+        )
+        return self.stage_state[stage.name]
+    def finish(self, completed):
+        if not completed:
+            return
+        for state in self.stage_state.values():
+            if state.touched and (
+                "all" in self.clean or state.stage.name in self.clean
+            ):
+                for artifact in state.path.glob("*"):
+                    if artifact.name not in state.touched:
+                        shutil.rmtree(artifact)
+class Stage:
+    def __init__(self, generate, **config):
+        functools.update_wrapper(self, generate)
+        self.generate = generate
+        self.name = config.get("name") or generate.__name__
+        self.archive = config.get("archive", "default")
+        self.key = config.get("key")
+        self.batch_size = config.get("batch_size")
+        self.skip_value = config.get("skip_value")
+        self.workers = config.get("workers")
+        self.executor = config.get("executor") or MultiprocessingType.THREAD
+        if not isinstance(self.executor, MultiprocessingType):
+            self.executor = MultiprocessingType(self.executor)
+        self.signature = inspect.signature(generate)
+        batch = config.get("batch")
+        self.batch = (
+            next(iter(self.signature.parameters)) if batch is True else batch
+        )
+    def __call__(self, *args, **kwargs):
+        ctx = _ACTIVE_PIPELINE.get(None)
+        if ctx is None:
+            raise RuntimeError("pypelite.stage called outside pipeline()")
+        if self.name in ctx.skip:
+            value = self.skip_value
+        else:
+            value = self.run(ctx.state_for(self), args, kwargs)
+        if ctx.until == self.name:
+            raise _PipelineComplete
+        return value
+    def bound(self, args, kwargs):
+        bound = self.signature.bind(*args, **kwargs)
+        bound.apply_defaults()
+        return bound.arguments
+    def run(self, state, args, kwargs):
+        artifact_dir = state.artifact_dir(self.bound(args, kwargs))
+        out = artifact_dir / "artifact.pkl"
+        if out.exists() and not state.refresh:
+            return pickle.loads(out.read_bytes())
+        value = self.generate(*args, **kwargs)
+        _dump(out, value)
+        return value
+class KeyedStage(Stage):
+    def key_value(self, args, kwargs):
+        arguments = self.bound(args, kwargs)
+        if callable(self.key):
+            return self.key(**arguments)
+        if isinstance(self.key, str):
+            return arguments[self.key]
+        return tuple(arguments[name] for name in self.key)
+    def run(self, state, args, kwargs):
+        key = self.key_value(args, kwargs)
+        artifact_dir = state.artifact_dir(key)
+        out = artifact_dir / "artifact.pkl"
+        if out.exists() and not state.refresh:
+            value = pickle.loads(out.read_bytes())
+        else:
+            value = self.generate(*args, **kwargs)
+            _dump(artifact_dir / "key.pkl", key)
+            _dump(out, value)
+        state.touched.add(artifact_dir.name)
+        return value
+class BatchStage(Stage):
+    def run_batches(self, chunks, args_d):
+        if not self.workers:
+            for chunk in chunks:
+                yield self.generate(**{**args_d, self.batch: list(chunk)})
+            return
+        with self.executor.pool(self.workers) as worker_pool:
+            futures = [
+                worker_pool.submit(
+                    self.generate, **{**args_d, self.batch: list(chunk)}
+                )
+                for chunk in chunks
+            ]
+            for future in futures:
+                yield future.result()
+class BatchCollectionStage(BatchStage):
+    def run(self, state, args, kwargs):
+        args_d = self.bound(args, kwargs)
+        artifact_dir = state.artifact_dir(args_d)
+        final = artifact_dir / "artifact.pkl"
+        if final.exists() and not state.refresh:
+            return pickle.loads(final.read_bytes())
+        rows = list(args_d[self.batch])
+        size = self.batch_size or len(rows) or 1
+        chunks = list(enumerate(itertools.batched(rows, size)))
+        values = [None] * len(chunks)
+        missing = []
+        for index, chunk in chunks:
+            part = artifact_dir / "chunks" / str(index)
+            out = part / "artifact.pkl"
+            if out.exists() and not state.refresh:
+                values[index] = pickle.loads(out.read_bytes())
+            else:
+                missing.append((index, chunk, part))
+        for (index, _chunk, part), batch_value in zip(
+            missing,
+            self.run_batches(
+                (chunk for _index, chunk, _part in missing), args_d
+            ),
+        ):
+            values[index] = batch_value
+            _dump(part / "artifact.pkl", batch_value)
+        value = list(itertools.chain.from_iterable(values))
+        _dump(final, value)
+        return value
+class BatchKeyedStage(BatchStage, KeyedStage):
+    def item_key(self, record):
+        if callable(self.key):
+            return self.key(record)
+        if isinstance(record, dict):
+            if isinstance(self.key, str):
+                return record[self.key]
+            return tuple(record[name] for name in self.key)
+        if isinstance(self.key, str):
+            return getattr(record, self.key)
+        return tuple(getattr(record, name) for name in self.key)
+    def run(self, state, args, kwargs):
+        args_d = self.bound(args, kwargs)
+        rows = list(args_d[self.batch])
+        keys = [self.item_key(row) for row in rows]
+        artifact_dirs = [state.artifact_dir(key) for key in keys]
+        results = [None] * len(rows)
+        missing = []
+        for index, artifact_dir in enumerate(artifact_dirs):
+            out = artifact_dir / "artifact.pkl"
+            if out.exists() and not state.refresh:
+                results[index] = pickle.loads(out.read_bytes())
+                state.touched.add(artifact_dir.name)
+            else:
+                missing.append((index, rows[index]))
+        size = self.batch_size or len(missing) or 1
+        for indexes, values in zip(
+            itertools.batched([index for index, _row in missing], size),
+            self.run_batches(
+                itertools.batched([row for _index, row in missing], size),
+                args_d,
+            ),
+        ):
+            if len(indexes) != len(values):
+                raise ValueError("batch result length must match input length")
+            for index, value in zip(indexes, values):
+                artifact_dir = artifact_dirs[index]
+                _dump(artifact_dir / "key.pkl", keys[index])
+                _dump(artifact_dir / "artifact.pkl", value)
+                state.touched.add(artifact_dir.name)
+                results[index] = value
+        return results
+def stage(
+    name=None,
+    *,
+    archive="default",
+    key=None,
+    batch=None,
+    batch_size=None,
+    skip_value=None,
+    workers=None,
+    executor=None,
+):
+    config = {
+        "archive": archive,
+        "key": key,
+        "batch": batch,
+        "batch_size": batch_size,
+        "skip_value": skip_value,
+        "workers": workers,
+        "executor": executor,
+    }
+    stage_class = Stage if key is None else KeyedStage
+    if batch is not None:
+        stage_class = BatchCollectionStage if key is None else BatchKeyedStage
+    if callable(name):
+        return stage_class(name)
+    def decorate(generate):
+        return stage_class(generate, name=name, **config)
+    return decorate
+@contextlib.contextmanager
+def pipeline(
+    path=None,
+    *,
+    archives=None,
+    refresh=None,
+    clean=None,
+    skip=None,
+    until=None,
+):
+    if _ACTIVE_PIPELINE.get(None) is not None:
+        raise RuntimeError("pypelite.pipeline contexts cannot be nested")
+    ctx = Pipeline(
+        path,
+        archives=archives,
+        refresh=refresh,
+        clean=clean,
+        skip=skip,
+        until=until,
+    )
+    token = _ACTIVE_PIPELINE.set(ctx)
+    completed = False
+    try:
+        yield ctx
+        completed = True
+    except _PipelineComplete:
+        completed = True
+    finally:
+        ctx.finish(completed)
+        _ACTIVE_PIPELINE.reset(token)

pypelite-0.1.0/src/pypelite.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,136 @@
+Metadata-Version: 2.4
+Name: pypelite
+Version: 0.1.0
+Summary: Tiny pipeline library for ordinary Python scripts.
+Author: pypelite contributors
+License-Expression: MIT
+Keywords: pipeline,cache,workflow,data
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Dynamic: license-file
+# pypelite
+_Pypelite is a tiny pipeline library for ordinary Python scripts._
+```python
+import pypelite
+@pypelite.stage("load", batch="symbols", batch_size=50, workers=4)
+def load_prices(symbols):
+    return market_api.fetch_prices(symbols)
+@pypelite.stage("features")
+def build_features(prices_df):
+    return make_model_features(prices_df)
+@pypelite.stage("train")
+def train_model(features_df):
+    return fit_price_model(features_df)
+with pypelite.pipeline("runs/price-model"):
+    prices_df = load_prices(["AAPL", "MSFT", "NVDA"])
+    features_df = build_features(prices_df)
+    model = train_model(features_df)
+```
+Pipelines are resumable: completed stages load from disk, and selected stages
+can be refreshed when they need to run again. No DSL, no DAG boilerplate, no
+Airflow deployment. The Python code is the pipeline.
+## Installation
+```sh
+pip install pypelite
+```
+## Refresh, Skip, Clean
+Control a run from the pipeline context.
+```python
+with pypelite.pipeline(
+    "runs/experiment",
+    refresh=["features"],
+    skip=["train"],
+    clean=["predict"],
+    until="features",
+):
+    run_price_model()
+```
+- `refresh` recomputes named stages touched by the run.
+- `skip` returns a stage's `skip_value` without touching its cache.
+- `clean` removes old keyed artifacts not touched by a successful run.
+- `until` stops after the named stage completes.
+## Advanced Features
+### Item Caches
+Use `key` when each item should have its own saved result.
+```python
+@pypelite.stage("predict", key="symbol")
+def predict_price(symbol, feature_row):
+    return model.predict(feature_row)
+with pypelite.pipeline("runs/predictions"):
+    for symbol, feature_row in features_df.iterrows():
+        predict_price(symbol, feature_row)
+```
+### Fanout
+Use `workers` when fanout should run in parallel.
+```python
+@pypelite.stage("features", key="symbol", batch="symbols", workers=4)
+def build_symbol_features(symbols):
+    prices_df = market_api.fetch_prices(symbols)
+    return make_model_features_by_symbol(prices_df)
+with pypelite.pipeline("runs/features"):
+    feature_rows = build_symbol_features(["AAPL", "MSFT", "NVDA"])
+```
+### Batches
+Use `batch` when work should run in chunks.
+```python
+@pypelite.stage("predict", key="symbol", batch="rows", batch_size=200)
+def predict_prices(rows):
+    return model_api.batch_predict(rows)
+with pypelite.pipeline("runs/predictions"):
+    predictions = predict_prices(feature_rows)
+```
+Batch stages can run chunks in parallel with `workers`. Unkeyed batches
+assemble one final artifact; keyed batches save one result per key.
+### Shared Archives
+Stages can choose named archives.
+```python
+@pypelite.stage("prices", archive="market", key=("date", "symbol"))
+def prices(date, symbol):
+    return market_api.price(date, symbol)
+with pypelite.pipeline(
+    archives={"default": "runs/model", "market": "archive/market"},
+):
+    run_price_model()
+```
+Shared archives make it easy for many experiments to reuse the same market
+data while keeping model outputs in their own run directories.

pypelite-0.1.0/src/pypelite.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,9 @@
+LICENSE
+README.md
+pyproject.toml
+src/pypelite/__init__.py
+src/pypelite.egg-info/PKG-INFO
+src/pypelite.egg-info/SOURCES.txt
+src/pypelite.egg-info/dependency_links.txt
+src/pypelite.egg-info/top_level.txt
+tests/test_pipeline.py

pypelite-0.1.0/src/pypelite.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

pypelite-0.1.0/src/pypelite.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ pypelite

pypelite-0.1.0/tests/test_pipeline.py ADDED Viewed

@@ -0,0 +1,152 @@
+import pypelite
+def test_basic_pipeline_reuses_saved_stage_output(tmp_path):
+    version = {"value": 1}
+    @pypelite.stage("load")
+    def load_records(path):
+        return f"{path}:v{version['value']}"
+    @pypelite.stage("build")
+    def build_table(records):
+        return records.upper()
+    with pypelite.pipeline(tmp_path):
+        records = load_records("records.json")
+        assert build_table(records) == "RECORDS.JSON:V1"
+    version["value"] = 2
+    with pypelite.pipeline(tmp_path):
+        records = load_records("records.json")
+        assert build_table(records) == "RECORDS.JSON:V1"
+def test_refresh_recomputes_named_stage(tmp_path):
+    version = {"value": 1}
+    @pypelite.stage("build")
+    def build_table(source):
+        return f"{source}:v{version['value']}"
+    with pypelite.pipeline(tmp_path):
+        assert build_table("records") == "records:v1"
+    version["value"] = 2
+    with pypelite.pipeline(tmp_path, refresh=["build"]):
+        assert build_table("records") == "records:v2"
+def test_skip_returns_stage_skip_value(tmp_path):
+    @pypelite.stage("history", skip_value=None)
+    def history(case_id):
+        return f"history {case_id}"
+    with pypelite.pipeline(tmp_path, skip=["history"]):
+        assert history("a") is None
+def test_clean_removes_old_keyed_results(tmp_path):
+    multiplier = {"value": 2}
+    @pypelite.stage("score", key="case_id")
+    def score(case_id, value):
+        return value * multiplier["value"]
+    with pypelite.pipeline(tmp_path):
+        assert score("a", 3) == 6
+        assert score("b", 4) == 8
+    with pypelite.pipeline(tmp_path, clean=["score"]):
+        assert score("b", 99) == 8
+    multiplier["value"] = 10
+    with pypelite.pipeline(tmp_path):
+        assert score("a", 3) == 30
+        assert score("b", 4) == 8
+def test_keyed_stage_reuses_saved_result_by_key(tmp_path):
+    multiplier = {"value": 2}
+    @pypelite.stage("score", key="case_id")
+    def score(case_id, value):
+        return value * multiplier["value"]
+    with pypelite.pipeline(tmp_path):
+        assert score("a", 3) == 6
+    multiplier["value"] = 10
+    with pypelite.pipeline(tmp_path):
+        assert score("a", 99) == 6
+        assert score("b", 4) == 40
+def test_batch_stage_splits_work_into_chunks(tmp_path):
+    calls = []
+    @pypelite.stage("judge", batch="records", batch_size=2)
+    def judge(records):
+        calls.append([record["case_id"] for record in records])
+        return [record["case_id"].upper() for record in records]
+    records = [
+        {"case_id": "a"},
+        {"case_id": "b"},
+        {"case_id": "c"},
+    ]
+    with pypelite.pipeline(tmp_path):
+        assert judge(records) == ["A", "B", "C"]
+    assert calls == [["a", "b"], ["c"]]
+def test_batch_keyed_stage_reuses_saved_items(tmp_path):
+    version = {"value": 1}
+    @pypelite.stage("judge", key="case_id", batch="records", batch_size=2)
+    def judge(records):
+        return [
+            f"{record['case_id'].upper()}:v{version['value']}"
+            for record in records
+        ]
+    with pypelite.pipeline(tmp_path):
+        assert judge([{"case_id": "a"}, {"case_id": "b"}]) == [
+            "A:v1",
+            "B:v1",
+        ]
+    version["value"] = 2
+    with pypelite.pipeline(tmp_path):
+        assert judge([{"case_id": "b"}, {"case_id": "c"}]) == [
+            "B:v1",
+            "C:v2",
+        ]
+def test_stage_can_use_shared_archive(tmp_path):
+    version = {"value": 1}
+    shared_archive = tmp_path / "shared"
+    @pypelite.stage("prices", archive="shared")
+    def prices(symbol):
+        return f"{symbol}:v{version['value']}"
+    with pypelite.pipeline(
+        archives={"default": tmp_path / "run-a", "shared": shared_archive}
+    ):
+        assert prices("ABC") == "ABC:v1"
+    version["value"] = 2
+    with pypelite.pipeline(
+        archives={"default": tmp_path / "run-b", "shared": shared_archive}
+    ):
+        assert prices("ABC") == "ABC:v1"