PyPI - makeprov - Versions diffs - 0.4.1__tar.gz - Mend

makeprov 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

makeprov-0.4.1/PKG-INFO +143 -0
makeprov-0.4.1/README.md +116 -0
makeprov-0.4.1/pyproject.toml +41 -0
makeprov-0.4.1/setup.cfg +4 -0
makeprov-0.4.1/src/makeprov/__init__.py +44 -0
makeprov-0.4.1/src/makeprov/config.py +180 -0
makeprov-0.4.1/src/makeprov/core.py +638 -0
makeprov-0.4.1/src/makeprov/paths.py +174 -0
makeprov-0.4.1/src/makeprov/prov.py +551 -0
makeprov-0.4.1/src/makeprov/rdfmixin.py +356 -0
makeprov-0.4.1/src/makeprov.egg-info/PKG-INFO +143 -0
makeprov-0.4.1/src/makeprov.egg-info/SOURCES.txt +15 -0
makeprov-0.4.1/src/makeprov.egg-info/dependency_links.txt +1 -0
makeprov-0.4.1/src/makeprov.egg-info/requires.txt +16 -0
makeprov-0.4.1/src/makeprov.egg-info/top_level.txt +1 -0
makeprov-0.4.1/tests/test_makeprov.py +246 -0
makeprov-0.4.1/tests/test_prov_shacl.py +99 -0

makeprov-0.4.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,143 @@
+Metadata-Version: 2.4
+Name: makeprov
+Version: 0.4.1
+Summary: An provenance tracking library for simple Python workflows
+Author-email: Benno Kruit <b.b.kruit@amsterdamumc.nl>
+License: MIT
+Project-URL: Homepage, https://github.com/bennokr/makeprov
+Project-URL: Documentation, https://makeprov.readthedocs.io
+Project-URL: Issue Tracker, https://github.com/bennokr/makeprov/issues
+Keywords: provenance,prov,workflow,python
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Description-Content-Type: text/markdown
+Requires-Dist: parse>=1.20
+Provides-Extra: dev
+Requires-Dist: defopt>=6; extra == "dev"
+Requires-Dist: pytest; extra == "dev"
+Requires-Dist: rdflib>=6.0; extra == "dev"
+Requires-Dist: pyshacl>=0.20; extra == "dev"
+Provides-Extra: docs
+Requires-Dist: sphinx>=7; extra == "docs"
+Requires-Dist: myst-parser[linkify]; extra == "docs"
+Requires-Dist: sphinx-rtd-theme; extra == "docs"
+Requires-Dist: sphinx-autodoc-typehints; extra == "docs"
+Requires-Dist: tomli; python_version < "3.11" and extra == "docs"
+# makeprov: Pythonic Provenance Tracking
+This library provides a way to track file provenance in Python workflows using PROV (W3C Provenance) semantics. Decorators declare inputs and outputs, provenance is written automatically, and templated targets can be resolved on demand.
+## Features
+- Use decorators to define rules for workflows.
+- Resolve templated targets (``results/{sample}.txt``) via ``parse``-style patterns.
+- Support phony/meta rules for orchestration alongside file-producing rules.
+- Automatically generate RDF-based provenance metadata.
+- Handles input and output streams.
+- Integrates with Python's type hints for easy configuration.
+- Outputs provenance data in TRIG format if `rdflib` is installed; otherwise outputs json-ld.
+## Installation
+You can install the module directly from PyPI:
+```bash
+pip install makeprov
+```
+## Usage
+Here’s an example of how to use this package in your Python scripts:
+```python
+from makeprov import rule, InPath, OutPath, build
+@rule()
+def process_data(
+    sample: int | None = None,
+    input_file: InPath = InPath('data/{sample:d}.txt'),
+    output_file: OutPath = OutPath('results/{sample:d}.txt')
+):
+    with input_file.open('r') as infile, output_file.open('w') as outfile:
+        data = infile.read()
+        outfile.write(data.upper())
+if __name__ == '__main__':
+    # Build a specific templated target and its prerequisites
+    from makeprov import build
+    build('results/1.txt')
+    # Or expose rules via a command line interface
+    import defopt
+    defopt.run(process_data)
+```
+You can execute `example.py` via the CLI like so:
+```bash
+python example.py build-all
+# Or set configuration through the CLI
+python example.py build-all --conf='{"base_iri": "http://mybaseiri.org/", "prov_dir": "my_prov_directory"}' --force --input_file input.txt --output_file final_output.txt
+# Or set configuration through a TOML file
+python example.py build-all --conf=@my_config.toml
+# Inspect dependency resolution without executing rules
+python example.py --explain results/1.txt
+python example.py --to-dot results/1.txt
+```
+### Complex CSV-to-RDF Workflow
+For a more involved scenario, see [`complex_example.py`](complex_example.py). It creates multiple CSV files, aggregates their contents, and emits an RDF graph that is both serialized to disk and embedded into the provenance dataset because the function returns an `rdflib.Graph`.
+```python
+@rule()
+def export_totals_graph(
+    totals_csv: InPath = InPath("data/region_totals.csv"),
+    graph_ttl: OutPath = OutPath("data/region_totals.ttl"),
+) -> Graph:
+    graph = Graph()
+    graph.bind("sales", SALES)
+    with totals_csv.open("r", newline="") as handle:
+        for row in csv.DictReader(handle):
+            region_key = row["region"].lower().replace(" ", "-")
+            subject = SALES[f"region/{region_key}"]
+            graph.add((subject, RDF.type, SALES.RegionTotal))
+            graph.add((subject, SALES.regionName, Literal(row["region"])))
+            graph.add((subject, SALES.totalUnits, Literal(row["total_units"], datatype=XSD.integer)))
+            graph.add((subject, SALES.totalRevenue, Literal(row["total_revenue"], datatype=XSD.decimal)))
+    with graph_ttl.open("w") as handle:
+        handle.write(graph.serialize(format="turtle"))
+    return graph
+```
+Run the entire workflow, including CSV generation and RDF export, with:
+```bash
+python complex_example.py build-sales-report
+```
+### Configuration
+You can customize the provenance tracking with the following options:
+ - `base_iri` (str): Base IRI for new resources
+ - `prov_dir` (str): Directory for writing PROV `.json-ld` or `.trig` files
+ - `force` (bool): Force running of dependencies
+ - `dry_run` (bool): Only check workflow, don't run anything
+## Contributing
+Contributions are welcome! Please open an issue or submit a pull request.
+## License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

makeprov-0.4.1/README.md ADDED Viewed

@@ -0,0 +1,116 @@
+# makeprov: Pythonic Provenance Tracking
+This library provides a way to track file provenance in Python workflows using PROV (W3C Provenance) semantics. Decorators declare inputs and outputs, provenance is written automatically, and templated targets can be resolved on demand.
+## Features
+- Use decorators to define rules for workflows.
+- Resolve templated targets (``results/{sample}.txt``) via ``parse``-style patterns.
+- Support phony/meta rules for orchestration alongside file-producing rules.
+- Automatically generate RDF-based provenance metadata.
+- Handles input and output streams.
+- Integrates with Python's type hints for easy configuration.
+- Outputs provenance data in TRIG format if `rdflib` is installed; otherwise outputs json-ld.
+## Installation
+You can install the module directly from PyPI:
+```bash
+pip install makeprov
+```
+## Usage
+Here’s an example of how to use this package in your Python scripts:
+```python
+from makeprov import rule, InPath, OutPath, build
+@rule()
+def process_data(
+    sample: int | None = None,
+    input_file: InPath = InPath('data/{sample:d}.txt'),
+    output_file: OutPath = OutPath('results/{sample:d}.txt')
+):
+    with input_file.open('r') as infile, output_file.open('w') as outfile:
+        data = infile.read()
+        outfile.write(data.upper())
+if __name__ == '__main__':
+    # Build a specific templated target and its prerequisites
+    from makeprov import build
+    build('results/1.txt')
+    # Or expose rules via a command line interface
+    import defopt
+    defopt.run(process_data)
+```
+You can execute `example.py` via the CLI like so:
+```bash
+python example.py build-all
+# Or set configuration through the CLI
+python example.py build-all --conf='{"base_iri": "http://mybaseiri.org/", "prov_dir": "my_prov_directory"}' --force --input_file input.txt --output_file final_output.txt
+# Or set configuration through a TOML file
+python example.py build-all --conf=@my_config.toml
+# Inspect dependency resolution without executing rules
+python example.py --explain results/1.txt
+python example.py --to-dot results/1.txt
+```
+### Complex CSV-to-RDF Workflow
+For a more involved scenario, see [`complex_example.py`](complex_example.py). It creates multiple CSV files, aggregates their contents, and emits an RDF graph that is both serialized to disk and embedded into the provenance dataset because the function returns an `rdflib.Graph`.
+```python
+@rule()
+def export_totals_graph(
+    totals_csv: InPath = InPath("data/region_totals.csv"),
+    graph_ttl: OutPath = OutPath("data/region_totals.ttl"),
+) -> Graph:
+    graph = Graph()
+    graph.bind("sales", SALES)
+    with totals_csv.open("r", newline="") as handle:
+        for row in csv.DictReader(handle):
+            region_key = row["region"].lower().replace(" ", "-")
+            subject = SALES[f"region/{region_key}"]
+            graph.add((subject, RDF.type, SALES.RegionTotal))
+            graph.add((subject, SALES.regionName, Literal(row["region"])))
+            graph.add((subject, SALES.totalUnits, Literal(row["total_units"], datatype=XSD.integer)))
+            graph.add((subject, SALES.totalRevenue, Literal(row["total_revenue"], datatype=XSD.decimal)))
+    with graph_ttl.open("w") as handle:
+        handle.write(graph.serialize(format="turtle"))
+    return graph
+```
+Run the entire workflow, including CSV generation and RDF export, with:
+```bash
+python complex_example.py build-sales-report
+```
+### Configuration
+You can customize the provenance tracking with the following options:
+ - `base_iri` (str): Base IRI for new resources
+ - `prov_dir` (str): Directory for writing PROV `.json-ld` or `.trig` files
+ - `force` (bool): Force running of dependencies
+ - `dry_run` (bool): Only check workflow, don't run anything
+## Contributing
+Contributions are welcome! Please open an issue or submit a pull request.
+## License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

makeprov-0.4.1/pyproject.toml ADDED Viewed

@@ -0,0 +1,41 @@
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "makeprov"
+version = "0.4.1"
+description = "An provenance tracking library for simple Python workflows"
+readme = "README.md"
+license = { text = "MIT" }
+authors = [{ name = "Benno Kruit", email = "b.b.kruit@amsterdamumc.nl" }]
+keywords = ["provenance", "prov", "workflow", "python"]
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+dependencies = ["parse>=1.20"]
+[project.optional-dependencies]
+dev = [
+    "defopt>=6",
+    "pytest",
+    "rdflib>=6.0",
+    "pyshacl>=0.20"
+]
+docs = [
+    "sphinx>=7",
+    "myst-parser[linkify]",
+    "sphinx-rtd-theme",
+    "sphinx-autodoc-typehints",
+    "tomli; python_version<'3.11'",
+]
+[project.urls]
+"Homepage" = "https://github.com/bennokr/makeprov"
+"Documentation" = "https://makeprov.readthedocs.io"
+"Issue Tracker" = "https://github.com/bennokr/makeprov/issues"
+[tool.pytape]
+test = "tests/test_makeprov.py"

makeprov-0.4.1/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

makeprov-0.4.1/src/makeprov/__init__.py ADDED Viewed

@@ -0,0 +1,44 @@
+"""Track file provenance in Python workflows using PROV semantics"""
+from __future__ import annotations
+from .config import ProvenanceConfig, GLOBAL_CONFIG, main
+from .paths import ProvPath, InPath, OutPath
+from .core import (
+    COMMANDS,
+    build,
+    build_all,
+    dry_run_build,
+    explain,
+    list_rules,
+    list_targets,
+    needs_update,
+    plan,
+    resolve_target,
+    root_targets,
+    rule,
+    to_dot,
+)
+from .rdfmixin import RDFMixin
+__all__ = [
+    "ProvenanceConfig",
+    "GLOBAL_CONFIG",
+    "main",
+    "ProvPath",
+    "InPath",
+    "OutPath",
+    "rule",
+    "needs_update",
+    "build",
+    "build_all",
+    "COMMANDS",
+    "resolve_target",
+    "plan",
+    "explain",
+    "to_dot",
+    "list_rules",
+    "list_targets",
+    "root_targets",
+    "dry_run_build",
+    "RDFMixin",
+]

makeprov-0.4.1/src/makeprov/config.py ADDED Viewed

@@ -0,0 +1,180 @@
+from __future__ import annotations
+from dataclasses import dataclass, fields, is_dataclass
+from typing import Literal
+import sys, logging, tomllib as toml, defopt
+import argparse
+ProvFormat = Literal["json", "trig"]
+@dataclass
+class ProvenanceConfig:
+    """Runtime configuration for provenance generation.
+    Args:
+        base_iri: Default base IRI used when constructing provenance identifiers.
+        prov_dir: Directory where provenance documents are written by default.
+        prov_path: Explicit provenance output path that overrides ``prov_dir``.
+        force: When ``True``, rebuild rules regardless of input/output freshness.
+        merge: When ``True``, provenance from multiple rules is buffered and
+            merged into a single document.
+        dry_run: When ``True``, log rule execution without running the wrapped
+            function.
+        out_fmt: Output format for provenance files (``"json"`` or ``"trig"``).
+        context: Whether JSON-LD outputs include the context inline.
+    Examples:
+        .. code-block:: python
+            from makeprov import ProvenanceConfig, GLOBAL_CONFIG
+            GLOBAL_CONFIG = ProvenanceConfig(
+                prov_dir="artifacts/prov", out_fmt="trig"
+            )
+    """
+    base_iri: str | None = None
+    prov_dir: str = "prov"
+    prov_path: str | None = None
+    force: bool = False
+    merge: bool = True
+    dry_run: bool = False
+    out_fmt: ProvFormat = "json"
+    context: bool = False
+GLOBAL_CONFIG = ProvenanceConfig()
+def apply_config(conf_obj, toml_ref):
+    """Update a dataclass configuration from TOML content.
+    Args:
+        conf_obj (dataclass): Configuration object to mutate in place.
+        toml_ref (str): Either a TOML string or an ``@``-prefixed path to a
+            TOML file.
+    Raises:
+        FileNotFoundError: If ``toml_ref`` points to a missing file.
+        tomllib.TOMLDecodeError: If TOML content cannot be parsed.
+    Examples:
+        Load configuration overrides from a file and apply them to the global
+        settings:
+        .. code-block:: python
+            from makeprov.config import GLOBAL_CONFIG, apply_config
+            apply_config(GLOBAL_CONFIG, "@config/provenance.toml")
+    """
+    def set_conf(dc, params):
+        for f in fields(dc):
+            if f.name in params:
+                cur, new = getattr(dc, f.name), params[f.name]
+                if is_dataclass(cur) and isinstance(new, dict):
+                    set_conf(cur, new)
+                else:
+                    setattr(dc, f.name, new)
+    logging.debug(f"Parsing config {toml_ref}")
+    t = toml_ref
+    param = toml.load(open(t[1:], "rb")) if t.startswith("@") else toml.loads(t)
+    logging.debug(f"Setting config {param}")
+    set_conf(conf_obj, param)
+def main(subcommands=None, conf_obj=None, argparse_kwargs={}, **kwargs):
+    """Entry point for running registered CLI subcommands.
+    Args:
+        subcommands (Iterable[Callable] | None): Functions decorated with
+            :func:`makeprov.core.rule` to expose on the command line; defaults to
+            registered commands.
+        conf_obj (ProvenanceConfig | None): Configuration to update from command
+            line flags; defaults to :data:`GLOBAL_CONFIG`.
+    Examples:
+        Expose decorated rules as CLI commands and honor configuration flags:
+        .. code-block:: bash
+            python -m makeprov --conf @config/provenance.toml --verbose my_rule arg1
+    """
+    from .core import COMMANDS, flush_prov_buffer, start_prov_buffer
+    from .core import build, build_all, explain, to_dot
+    global GLOBAL_CONFIG
+    subcommands = subcommands or COMMANDS
+    conf_obj = conf_obj or GLOBAL_CONFIG
+    parent = argparse.ArgumentParser(add_help=False)
+    parent.add_argument(
+        "-c",
+        "--conf",
+        action="append",
+        default=[],
+        help="Set config param from TOML snippet or @file.toml",
+    )
+    parent.add_argument(
+        "-v", "--verbose", action="count", default=0, help="Show more logging output (-vv for even more)"
+    )
+    parent.add_argument(
+        "-a", "--build-all", action="store_true",
+        help="Build all concrete targets that have no dependents",
+    )
+    parent.add_argument(
+        "-b", "--build",
+        help="Recursively build a TARGET and its prerequisites",
+        metavar="TARGET",
+    )
+    parent.add_argument(
+        "-e", "--explain",
+        help="Show dependency resolution for TARGET without running rules",
+        metavar="TARGET",
+    )
+    parent.add_argument(
+        "-d", "--to-dot",
+        help="Render dependency graph for TARGET in DOT format",
+        metavar="TARGET",
+    )
+    def apply_globals(argv):
+        ns, _ = parent.parse_known_args(argv)
+        lvl = ("WARNING", "INFO", "DEBUG")[min(max(ns.verbose, 0), 2)]
+        logging.basicConfig(level=getattr(logging, lvl))
+        for toml_ref in ns.conf:
+            apply_config(conf_obj, toml_ref)
+        return ns
+    apply_globals(sys.argv[1:])  # apply effects early
+    logging.debug(f"Config: {GLOBAL_CONFIG}")
+    try:
+        early_ns = parent.parse_known_args(sys.argv[1:])[0]
+        if early_ns.build_all:
+            build_all()
+            return
+        if early_ns.build:
+            build(early_ns.build)
+            return
+        if early_ns.explain:
+            explain(early_ns.explain)
+            return
+        if early_ns.to_dot:
+            print(to_dot(early_ns.to_dot))
+            return
+        if GLOBAL_CONFIG.merge:
+            start_prov_buffer()
+        defopt.run(
+            subcommands,
+            argv=sys.argv[1:],
+            argparse_kwargs={"parents": [parent], **argparse_kwargs},
+            **kwargs
+        )
+    finally:
+        if GLOBAL_CONFIG.merge:
+            flush_prov_buffer()