makeprov 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,143 @@
1
+ Metadata-Version: 2.4
2
+ Name: makeprov
3
+ Version: 0.4.1
4
+ Summary: An provenance tracking library for simple Python workflows
5
+ Author-email: Benno Kruit <b.b.kruit@amsterdamumc.nl>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/bennokr/makeprov
8
+ Project-URL: Documentation, https://makeprov.readthedocs.io
9
+ Project-URL: Issue Tracker, https://github.com/bennokr/makeprov/issues
10
+ Keywords: provenance,prov,workflow,python
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: parse>=1.20
16
+ Provides-Extra: dev
17
+ Requires-Dist: defopt>=6; extra == "dev"
18
+ Requires-Dist: pytest; extra == "dev"
19
+ Requires-Dist: rdflib>=6.0; extra == "dev"
20
+ Requires-Dist: pyshacl>=0.20; extra == "dev"
21
+ Provides-Extra: docs
22
+ Requires-Dist: sphinx>=7; extra == "docs"
23
+ Requires-Dist: myst-parser[linkify]; extra == "docs"
24
+ Requires-Dist: sphinx-rtd-theme; extra == "docs"
25
+ Requires-Dist: sphinx-autodoc-typehints; extra == "docs"
26
+ Requires-Dist: tomli; python_version < "3.11" and extra == "docs"
27
+
28
+ # makeprov: Pythonic Provenance Tracking
29
+
30
+ This library provides a way to track file provenance in Python workflows using PROV (W3C Provenance) semantics. Decorators declare inputs and outputs, provenance is written automatically, and templated targets can be resolved on demand.
31
+
32
+ ## Features
33
+
34
+ - Use decorators to define rules for workflows.
35
+ - Resolve templated targets (``results/{sample}.txt``) via ``parse``-style patterns.
36
+ - Support phony/meta rules for orchestration alongside file-producing rules.
37
+ - Automatically generate RDF-based provenance metadata.
38
+ - Handles input and output streams.
39
+ - Integrates with Python's type hints for easy configuration.
40
+ - Outputs provenance data in TRIG format if `rdflib` is installed; otherwise outputs json-ld.
41
+
42
+ ## Installation
43
+
44
+ You can install the module directly from PyPI:
45
+
46
+ ```bash
47
+ pip install makeprov
48
+ ```
49
+
50
+ ## Usage
51
+
52
+ Here’s an example of how to use this package in your Python scripts:
53
+
54
+ ```python
55
+ from makeprov import rule, InPath, OutPath, build
56
+
57
+ @rule()
58
+ def process_data(
59
+ sample: int | None = None,
60
+ input_file: InPath = InPath('data/{sample:d}.txt'),
61
+ output_file: OutPath = OutPath('results/{sample:d}.txt')
62
+ ):
63
+ with input_file.open('r') as infile, output_file.open('w') as outfile:
64
+ data = infile.read()
65
+ outfile.write(data.upper())
66
+
67
+ if __name__ == '__main__':
68
+ # Build a specific templated target and its prerequisites
69
+ from makeprov import build
70
+ build('results/1.txt')
71
+
72
+ # Or expose rules via a command line interface
73
+ import defopt
74
+ defopt.run(process_data)
75
+ ```
76
+
77
+ You can execute `example.py` via the CLI like so:
78
+
79
+ ```bash
80
+ python example.py build-all
81
+
82
+ # Or set configuration through the CLI
83
+ python example.py build-all --conf='{"base_iri": "http://mybaseiri.org/", "prov_dir": "my_prov_directory"}' --force --input_file input.txt --output_file final_output.txt
84
+
85
+ # Or set configuration through a TOML file
86
+ python example.py build-all --conf=@my_config.toml
87
+
88
+ # Inspect dependency resolution without executing rules
89
+ python example.py --explain results/1.txt
90
+ python example.py --to-dot results/1.txt
91
+ ```
92
+
93
+ ### Complex CSV-to-RDF Workflow
94
+
95
+ For a more involved scenario, see [`complex_example.py`](complex_example.py). It creates multiple CSV files, aggregates their contents, and emits an RDF graph that is both serialized to disk and embedded into the provenance dataset because the function returns an `rdflib.Graph`.
96
+
97
+ ```python
98
+ @rule()
99
+ def export_totals_graph(
100
+ totals_csv: InPath = InPath("data/region_totals.csv"),
101
+ graph_ttl: OutPath = OutPath("data/region_totals.ttl"),
102
+ ) -> Graph:
103
+ graph = Graph()
104
+ graph.bind("sales", SALES)
105
+
106
+ with totals_csv.open("r", newline="") as handle:
107
+ for row in csv.DictReader(handle):
108
+ region_key = row["region"].lower().replace(" ", "-")
109
+ subject = SALES[f"region/{region_key}"]
110
+
111
+ graph.add((subject, RDF.type, SALES.RegionTotal))
112
+ graph.add((subject, SALES.regionName, Literal(row["region"])))
113
+ graph.add((subject, SALES.totalUnits, Literal(row["total_units"], datatype=XSD.integer)))
114
+ graph.add((subject, SALES.totalRevenue, Literal(row["total_revenue"], datatype=XSD.decimal)))
115
+
116
+ with graph_ttl.open("w") as handle:
117
+ handle.write(graph.serialize(format="turtle"))
118
+
119
+ return graph
120
+ ```
121
+
122
+ Run the entire workflow, including CSV generation and RDF export, with:
123
+
124
+ ```bash
125
+ python complex_example.py build-sales-report
126
+ ```
127
+
128
+ ### Configuration
129
+
130
+ You can customize the provenance tracking with the following options:
131
+
132
+ - `base_iri` (str): Base IRI for new resources
133
+ - `prov_dir` (str): Directory for writing PROV `.json-ld` or `.trig` files
134
+ - `force` (bool): Force running of dependencies
135
+ - `dry_run` (bool): Only check workflow, don't run anything
136
+
137
+ ## Contributing
138
+
139
+ Contributions are welcome! Please open an issue or submit a pull request.
140
+
141
+ ## License
142
+
143
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,116 @@
1
+ # makeprov: Pythonic Provenance Tracking
2
+
3
+ This library provides a way to track file provenance in Python workflows using PROV (W3C Provenance) semantics. Decorators declare inputs and outputs, provenance is written automatically, and templated targets can be resolved on demand.
4
+
5
+ ## Features
6
+
7
+ - Use decorators to define rules for workflows.
8
+ - Resolve templated targets (``results/{sample}.txt``) via ``parse``-style patterns.
9
+ - Support phony/meta rules for orchestration alongside file-producing rules.
10
+ - Automatically generate RDF-based provenance metadata.
11
+ - Handles input and output streams.
12
+ - Integrates with Python's type hints for easy configuration.
13
+ - Outputs provenance data in TRIG format if `rdflib` is installed; otherwise outputs json-ld.
14
+
15
+ ## Installation
16
+
17
+ You can install the module directly from PyPI:
18
+
19
+ ```bash
20
+ pip install makeprov
21
+ ```
22
+
23
+ ## Usage
24
+
25
+ Here’s an example of how to use this package in your Python scripts:
26
+
27
+ ```python
28
+ from makeprov import rule, InPath, OutPath, build
29
+
30
+ @rule()
31
+ def process_data(
32
+ sample: int | None = None,
33
+ input_file: InPath = InPath('data/{sample:d}.txt'),
34
+ output_file: OutPath = OutPath('results/{sample:d}.txt')
35
+ ):
36
+ with input_file.open('r') as infile, output_file.open('w') as outfile:
37
+ data = infile.read()
38
+ outfile.write(data.upper())
39
+
40
+ if __name__ == '__main__':
41
+ # Build a specific templated target and its prerequisites
42
+ from makeprov import build
43
+ build('results/1.txt')
44
+
45
+ # Or expose rules via a command line interface
46
+ import defopt
47
+ defopt.run(process_data)
48
+ ```
49
+
50
+ You can execute `example.py` via the CLI like so:
51
+
52
+ ```bash
53
+ python example.py build-all
54
+
55
+ # Or set configuration through the CLI
56
+ python example.py build-all --conf='{"base_iri": "http://mybaseiri.org/", "prov_dir": "my_prov_directory"}' --force --input_file input.txt --output_file final_output.txt
57
+
58
+ # Or set configuration through a TOML file
59
+ python example.py build-all --conf=@my_config.toml
60
+
61
+ # Inspect dependency resolution without executing rules
62
+ python example.py --explain results/1.txt
63
+ python example.py --to-dot results/1.txt
64
+ ```
65
+
66
+ ### Complex CSV-to-RDF Workflow
67
+
68
+ For a more involved scenario, see [`complex_example.py`](complex_example.py). It creates multiple CSV files, aggregates their contents, and emits an RDF graph that is both serialized to disk and embedded into the provenance dataset because the function returns an `rdflib.Graph`.
69
+
70
+ ```python
71
+ @rule()
72
+ def export_totals_graph(
73
+ totals_csv: InPath = InPath("data/region_totals.csv"),
74
+ graph_ttl: OutPath = OutPath("data/region_totals.ttl"),
75
+ ) -> Graph:
76
+ graph = Graph()
77
+ graph.bind("sales", SALES)
78
+
79
+ with totals_csv.open("r", newline="") as handle:
80
+ for row in csv.DictReader(handle):
81
+ region_key = row["region"].lower().replace(" ", "-")
82
+ subject = SALES[f"region/{region_key}"]
83
+
84
+ graph.add((subject, RDF.type, SALES.RegionTotal))
85
+ graph.add((subject, SALES.regionName, Literal(row["region"])))
86
+ graph.add((subject, SALES.totalUnits, Literal(row["total_units"], datatype=XSD.integer)))
87
+ graph.add((subject, SALES.totalRevenue, Literal(row["total_revenue"], datatype=XSD.decimal)))
88
+
89
+ with graph_ttl.open("w") as handle:
90
+ handle.write(graph.serialize(format="turtle"))
91
+
92
+ return graph
93
+ ```
94
+
95
+ Run the entire workflow, including CSV generation and RDF export, with:
96
+
97
+ ```bash
98
+ python complex_example.py build-sales-report
99
+ ```
100
+
101
+ ### Configuration
102
+
103
+ You can customize the provenance tracking with the following options:
104
+
105
+ - `base_iri` (str): Base IRI for new resources
106
+ - `prov_dir` (str): Directory for writing PROV `.json-ld` or `.trig` files
107
+ - `force` (bool): Force running of dependencies
108
+ - `dry_run` (bool): Only check workflow, don't run anything
109
+
110
+ ## Contributing
111
+
112
+ Contributions are welcome! Please open an issue or submit a pull request.
113
+
114
+ ## License
115
+
116
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,41 @@
1
+ [build-system]
2
+ requires = ["setuptools", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "makeprov"
7
+ version = "0.4.1"
8
+ description = "An provenance tracking library for simple Python workflows"
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ authors = [{ name = "Benno Kruit", email = "b.b.kruit@amsterdamumc.nl" }]
12
+ keywords = ["provenance", "prov", "workflow", "python"]
13
+ classifiers = [
14
+ "Programming Language :: Python :: 3",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Operating System :: OS Independent",
17
+ ]
18
+ dependencies = ["parse>=1.20"]
19
+
20
+ [project.optional-dependencies]
21
+ dev = [
22
+ "defopt>=6",
23
+ "pytest",
24
+ "rdflib>=6.0",
25
+ "pyshacl>=0.20"
26
+ ]
27
+ docs = [
28
+ "sphinx>=7",
29
+ "myst-parser[linkify]",
30
+ "sphinx-rtd-theme",
31
+ "sphinx-autodoc-typehints",
32
+ "tomli; python_version<'3.11'",
33
+ ]
34
+
35
+ [project.urls]
36
+ "Homepage" = "https://github.com/bennokr/makeprov"
37
+ "Documentation" = "https://makeprov.readthedocs.io"
38
+ "Issue Tracker" = "https://github.com/bennokr/makeprov/issues"
39
+
40
+ [tool.pytape]
41
+ test = "tests/test_makeprov.py"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,44 @@
1
+ """Track file provenance in Python workflows using PROV semantics"""
2
+ from __future__ import annotations
3
+
4
+ from .config import ProvenanceConfig, GLOBAL_CONFIG, main
5
+ from .paths import ProvPath, InPath, OutPath
6
+ from .core import (
7
+ COMMANDS,
8
+ build,
9
+ build_all,
10
+ dry_run_build,
11
+ explain,
12
+ list_rules,
13
+ list_targets,
14
+ needs_update,
15
+ plan,
16
+ resolve_target,
17
+ root_targets,
18
+ rule,
19
+ to_dot,
20
+ )
21
+ from .rdfmixin import RDFMixin
22
+
23
+ __all__ = [
24
+ "ProvenanceConfig",
25
+ "GLOBAL_CONFIG",
26
+ "main",
27
+ "ProvPath",
28
+ "InPath",
29
+ "OutPath",
30
+ "rule",
31
+ "needs_update",
32
+ "build",
33
+ "build_all",
34
+ "COMMANDS",
35
+ "resolve_target",
36
+ "plan",
37
+ "explain",
38
+ "to_dot",
39
+ "list_rules",
40
+ "list_targets",
41
+ "root_targets",
42
+ "dry_run_build",
43
+ "RDFMixin",
44
+ ]
@@ -0,0 +1,180 @@
1
+ from __future__ import annotations
2
+ from dataclasses import dataclass, fields, is_dataclass
3
+ from typing import Literal
4
+ import sys, logging, tomllib as toml, defopt
5
+ import argparse
6
+
7
+ ProvFormat = Literal["json", "trig"]
8
+
9
+
10
+ @dataclass
11
+ class ProvenanceConfig:
12
+ """Runtime configuration for provenance generation.
13
+
14
+ Args:
15
+ base_iri: Default base IRI used when constructing provenance identifiers.
16
+ prov_dir: Directory where provenance documents are written by default.
17
+ prov_path: Explicit provenance output path that overrides ``prov_dir``.
18
+ force: When ``True``, rebuild rules regardless of input/output freshness.
19
+ merge: When ``True``, provenance from multiple rules is buffered and
20
+ merged into a single document.
21
+ dry_run: When ``True``, log rule execution without running the wrapped
22
+ function.
23
+ out_fmt: Output format for provenance files (``"json"`` or ``"trig"``).
24
+ context: Whether JSON-LD outputs include the context inline.
25
+
26
+ Examples:
27
+ .. code-block:: python
28
+
29
+ from makeprov import ProvenanceConfig, GLOBAL_CONFIG
30
+
31
+ GLOBAL_CONFIG = ProvenanceConfig(
32
+ prov_dir="artifacts/prov", out_fmt="trig"
33
+ )
34
+ """
35
+
36
+ base_iri: str | None = None
37
+ prov_dir: str = "prov"
38
+ prov_path: str | None = None
39
+ force: bool = False
40
+ merge: bool = True
41
+ dry_run: bool = False
42
+ out_fmt: ProvFormat = "json"
43
+ context: bool = False
44
+
45
+
46
+ GLOBAL_CONFIG = ProvenanceConfig()
47
+
48
+
49
+ def apply_config(conf_obj, toml_ref):
50
+ """Update a dataclass configuration from TOML content.
51
+
52
+ Args:
53
+ conf_obj (dataclass): Configuration object to mutate in place.
54
+ toml_ref (str): Either a TOML string or an ``@``-prefixed path to a
55
+ TOML file.
56
+
57
+ Raises:
58
+ FileNotFoundError: If ``toml_ref`` points to a missing file.
59
+ tomllib.TOMLDecodeError: If TOML content cannot be parsed.
60
+
61
+ Examples:
62
+ Load configuration overrides from a file and apply them to the global
63
+ settings:
64
+
65
+ .. code-block:: python
66
+
67
+ from makeprov.config import GLOBAL_CONFIG, apply_config
68
+
69
+ apply_config(GLOBAL_CONFIG, "@config/provenance.toml")
70
+ """
71
+
72
+ def set_conf(dc, params):
73
+ for f in fields(dc):
74
+ if f.name in params:
75
+ cur, new = getattr(dc, f.name), params[f.name]
76
+ if is_dataclass(cur) and isinstance(new, dict):
77
+ set_conf(cur, new)
78
+ else:
79
+ setattr(dc, f.name, new)
80
+
81
+ logging.debug(f"Parsing config {toml_ref}")
82
+ t = toml_ref
83
+ param = toml.load(open(t[1:], "rb")) if t.startswith("@") else toml.loads(t)
84
+ logging.debug(f"Setting config {param}")
85
+ set_conf(conf_obj, param)
86
+
87
+
88
+ def main(subcommands=None, conf_obj=None, argparse_kwargs={}, **kwargs):
89
+ """Entry point for running registered CLI subcommands.
90
+
91
+ Args:
92
+ subcommands (Iterable[Callable] | None): Functions decorated with
93
+ :func:`makeprov.core.rule` to expose on the command line; defaults to
94
+ registered commands.
95
+ conf_obj (ProvenanceConfig | None): Configuration to update from command
96
+ line flags; defaults to :data:`GLOBAL_CONFIG`.
97
+
98
+ Examples:
99
+ Expose decorated rules as CLI commands and honor configuration flags:
100
+
101
+ .. code-block:: bash
102
+
103
+ python -m makeprov --conf @config/provenance.toml --verbose my_rule arg1
104
+ """
105
+
106
+ from .core import COMMANDS, flush_prov_buffer, start_prov_buffer
107
+ from .core import build, build_all, explain, to_dot
108
+
109
+ global GLOBAL_CONFIG
110
+
111
+ subcommands = subcommands or COMMANDS
112
+ conf_obj = conf_obj or GLOBAL_CONFIG
113
+
114
+ parent = argparse.ArgumentParser(add_help=False)
115
+ parent.add_argument(
116
+ "-c",
117
+ "--conf",
118
+ action="append",
119
+ default=[],
120
+ help="Set config param from TOML snippet or @file.toml",
121
+ )
122
+ parent.add_argument(
123
+ "-v", "--verbose", action="count", default=0, help="Show more logging output (-vv for even more)"
124
+ )
125
+ parent.add_argument(
126
+ "-a", "--build-all", action="store_true",
127
+ help="Build all concrete targets that have no dependents",
128
+ )
129
+ parent.add_argument(
130
+ "-b", "--build",
131
+ help="Recursively build a TARGET and its prerequisites",
132
+ metavar="TARGET",
133
+ )
134
+ parent.add_argument(
135
+ "-e", "--explain",
136
+ help="Show dependency resolution for TARGET without running rules",
137
+ metavar="TARGET",
138
+ )
139
+ parent.add_argument(
140
+ "-d", "--to-dot",
141
+ help="Render dependency graph for TARGET in DOT format",
142
+ metavar="TARGET",
143
+ )
144
+
145
+ def apply_globals(argv):
146
+ ns, _ = parent.parse_known_args(argv)
147
+ lvl = ("WARNING", "INFO", "DEBUG")[min(max(ns.verbose, 0), 2)]
148
+ logging.basicConfig(level=getattr(logging, lvl))
149
+ for toml_ref in ns.conf:
150
+ apply_config(conf_obj, toml_ref)
151
+ return ns
152
+
153
+ apply_globals(sys.argv[1:]) # apply effects early
154
+ logging.debug(f"Config: {GLOBAL_CONFIG}")
155
+ try:
156
+ early_ns = parent.parse_known_args(sys.argv[1:])[0]
157
+ if early_ns.build_all:
158
+ build_all()
159
+ return
160
+ if early_ns.build:
161
+ build(early_ns.build)
162
+ return
163
+ if early_ns.explain:
164
+ explain(early_ns.explain)
165
+ return
166
+ if early_ns.to_dot:
167
+ print(to_dot(early_ns.to_dot))
168
+ return
169
+
170
+ if GLOBAL_CONFIG.merge:
171
+ start_prov_buffer()
172
+ defopt.run(
173
+ subcommands,
174
+ argv=sys.argv[1:],
175
+ argparse_kwargs={"parents": [parent], **argparse_kwargs},
176
+ **kwargs
177
+ )
178
+ finally:
179
+ if GLOBAL_CONFIG.merge:
180
+ flush_prov_buffer()