makeprov 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: makeprov
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: An provenance tracking library for simple Python workflows
5
5
  Author-email: Benno Kruit <b.b.kruit@amsterdamumc.nl>
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "makeprov"
7
- version = "0.2.0"
7
+ version = "0.2.2"
8
8
  description = "An provenance tracking library for simple Python workflows"
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -10,14 +10,16 @@ ProvFormat = Literal["json", "trig"]
10
10
  class ProvenanceConfig:
11
11
  base_iri: str = "http://example.org/"
12
12
  prov_dir: str = "prov"
13
+ prov_path: str | None = None
13
14
  force: bool = False
14
15
  dry_run: bool = False
15
16
  out_fmt: ProvFormat = "json"
17
+ jsonld_with_context: bool = False
16
18
 
17
19
  GLOBAL_CONFIG = ProvenanceConfig()
18
20
 
19
21
  def main(subcommands=None, conf_obj=None, parsers=None):
20
- from .core import COMMANDS
22
+ from .core import COMMANDS, flush_prov_buffer, start_prov_buffer
21
23
 
22
24
  subcommands = subcommands or COMMANDS
23
25
  conf_obj = conf_obj or GLOBAL_CONFIG
@@ -53,11 +55,25 @@ def main(subcommands=None, conf_obj=None, parsers=None):
53
55
  logging.debug(f"Setting config {p}")
54
56
  conf(conf_obj, p)
55
57
 
56
- apply_globals(sys.argv[1:]) # apply effects early
57
- logging.debug(f"Config: {conf_obj}")
58
- defopt.run(
59
- subcommands,
60
- parsers=parsers or {},
61
- argv=sys.argv[1:],
62
- argparse_kwargs={"parents": [parent]},
58
+ return ns
59
+
60
+ parent.add_argument(
61
+ "--merge-prov",
62
+ action="store_true",
63
+ help="Merge provenance from invoked commands into a single output",
63
64
  )
65
+
66
+ ns = apply_globals(sys.argv[1:]) # apply effects early
67
+ logging.debug(f"Config: {conf_obj}")
68
+ try:
69
+ if ns.merge_prov:
70
+ start_prov_buffer()
71
+ defopt.run(
72
+ subcommands,
73
+ parsers=parsers or {},
74
+ argv=sys.argv[1:],
75
+ argparse_kwargs={"parents": [parent]},
76
+ )
77
+ finally:
78
+ if ns.merge_prov:
79
+ flush_prov_buffer()
@@ -10,7 +10,7 @@ from collections.abc import Callable
10
10
 
11
11
  from .config import ProvenanceConfig, ProvFormat, GLOBAL_CONFIG
12
12
  from .paths import InPath, OutPath
13
- from .prov import Prov
13
+ from .prov import Prov, ProvResult, write_combined_prov
14
14
 
15
15
  try:
16
16
  import rdflib # optional
@@ -21,6 +21,29 @@ except Exception:
21
21
  RULES: dict[str, dict[str, Any]] = {}
22
22
  COMMANDS: set[Callable] = set()
23
23
 
24
+
25
+ PROV_BUFFER: list[ProvResult] | None = None
26
+
27
+
28
+ def start_prov_buffer() -> None:
29
+ global PROV_BUFFER
30
+ PROV_BUFFER = []
31
+
32
+
33
+ def flush_prov_buffer() -> None:
34
+ global PROV_BUFFER
35
+ try:
36
+ if PROV_BUFFER:
37
+ write_combined_prov(
38
+ PROV_BUFFER,
39
+ prov_path=GLOBAL_CONFIG.prov_path or Path(GLOBAL_CONFIG.prov_dir)
40
+ / "combined",
41
+ fmt=GLOBAL_CONFIG.out_fmt,
42
+ jsonld_with_context=GLOBAL_CONFIG.jsonld_with_context,
43
+ )
44
+ finally:
45
+ PROV_BUFFER = None
46
+
24
47
  def needs_update(outputs, deps) -> bool:
25
48
  """Return True if any output missing or older than any dependency."""
26
49
  out_paths = [Path(o) for o in outputs]
@@ -43,6 +66,7 @@ def build(target, _seen=None):
43
66
  Recursively build target after its dependencies, if needed.
44
67
  `target` is a path (string/Path). Only rules with default OutPath are in DAG.
45
68
  """
69
+ top_level = _seen is None
46
70
  if _seen is None:
47
71
  _seen = set()
48
72
  target = str(target)
@@ -50,12 +74,18 @@ def build(target, _seen=None):
50
74
  raise RuntimeError(f"Cycle in build graph at {target!r}")
51
75
  _seen.add(target)
52
76
 
77
+ if top_level:
78
+ start_prov_buffer()
79
+
53
80
  rule = RULES[target]
54
81
  for dep in rule["deps"]:
55
82
  if dep in RULES:
56
83
  build(dep, _seen)
57
84
  rule["func"]()
58
85
 
86
+ if top_level:
87
+ flush_prov_buffer()
88
+
59
89
  def _is_kind_annotation(ann: Any, cls: type) -> bool:
60
90
  if ann is cls:
61
91
  return True
@@ -74,7 +104,7 @@ def rule(
74
104
  dry_run: bool | None = None,
75
105
  out_fmt: ProvFormat | None = None,
76
106
  config: ProvenanceConfig | None = None,
77
- jsonld_with_context: bool = False,
107
+ jsonld_with_context: bool | None = None,
78
108
  ):
79
109
  """
80
110
  Decorator that infers inputs/outputs from type annotations
@@ -84,9 +114,11 @@ def rule(
84
114
  rule_config = ProvenanceConfig(
85
115
  base_iri=base_iri if base_iri is not None else base_config.base_iri,
86
116
  prov_dir=prov_dir if prov_dir is not None else base_config.prov_dir,
117
+ prov_path=base_config.prov_path,
87
118
  force=force if force is not None else base_config.force,
88
119
  dry_run=dry_run if dry_run is not None else base_config.dry_run,
89
120
  out_fmt=out_fmt if out_fmt is not None else base_config.out_fmt,
121
+ jsonld_with_context=base_config.jsonld_with_context,
90
122
  )
91
123
 
92
124
  def decorator(func):
@@ -136,6 +168,12 @@ def rule(
136
168
  bound = sig.bind_partial(*args, **kwargs)
137
169
  bound.apply_defaults()
138
170
 
171
+ effective_jsonld_with_context = (
172
+ jsonld_with_context
173
+ if jsonld_with_context is not None
174
+ else rule_config.jsonld_with_context
175
+ )
176
+
139
177
  in_files: list[Path] = []
140
178
  out_files: list[Path] = []
141
179
 
@@ -199,14 +237,20 @@ def rule(
199
237
  )
200
238
  if prov_path is not None:
201
239
  rule_prov_path = prov_path
240
+ elif rule_config.prov_path is not None:
241
+ rule_prov_path = rule_config.prov_path
202
242
  else:
203
243
  rule_prov_path = Path(rule_config.prov_dir) / logical_name
204
- prov.write(
205
- rule_prov_path,
206
- fmt=rule_config.out_fmt,
207
- result=result,
208
- jsonld_with_context=jsonld_with_context
209
- )
244
+
245
+ if PROV_BUFFER is not None:
246
+ PROV_BUFFER.append(ProvResult(prov, result))
247
+ else:
248
+ prov.write(
249
+ rule_prov_path,
250
+ fmt=rule_config.out_fmt,
251
+ result=result,
252
+ jsonld_with_context=effective_jsonld_with_context,
253
+ )
210
254
  except Exception as prov_exc: # noqa: BLE001
211
255
  logging.warning("Failed to write provenance for %s: %s", logical_name, prov_exc)
212
256
 
@@ -221,4 +265,4 @@ def rule(
221
265
 
222
266
  return wrapped
223
267
 
224
- return decorator
268
+ return decorator
@@ -1,21 +1,24 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import os
3
4
  import sys
4
5
  from pathlib import Path
5
6
 
6
7
  # Platform-appropriate base class for Path subclassing
7
8
  _BasePath = type(Path())
8
9
 
10
+
9
11
  class ProvPath(_BasePath):
10
12
  """
11
13
  A Path subclass that understands '-' as a special stream path.
12
14
  For subclasses InPath and OutPath, '-' maps to stdin/stdout, respectively.
13
15
  """
14
16
 
15
- def __new__(cls, path: str | bytes | "ProvPath"):
16
- self = super().__new__(cls, path)
17
+ def __new__(cls, *paths: str | bytes | "ProvPath"):
18
+ raw_paths = [os.fspath(p) for p in paths]
19
+ self = super().__new__(cls, *paths)
17
20
  # We store stream flags on the instance. Path is immutable, but allows attributes.
18
- self._is_stream = str(self) == "-"
21
+ self._is_stream = len(raw_paths) == 1 and raw_paths[0] == "-"
19
22
  self._stream_name = None
20
23
  return self
21
24
 
@@ -47,8 +50,8 @@ class ProvPath(_BasePath):
47
50
 
48
51
  class InPath(ProvPath):
49
52
  """Marker for input paths. '-' means stdin."""
50
- def __new__(cls, path: str | bytes | ProvPath):
51
- self = super().__new__(cls, path)
53
+ def __new__(cls, *paths: str | bytes | ProvPath):
54
+ self = super().__new__(cls, *paths)
52
55
  if self.is_stream:
53
56
  self._stream_name = "stdin"
54
57
  return self
@@ -61,8 +64,8 @@ class InPath(ProvPath):
61
64
 
62
65
  class OutPath(ProvPath):
63
66
  """Marker for output paths. '-' means stdout."""
64
- def __new__(cls, path: str | bytes | ProvPath):
65
- self = super().__new__(cls, path)
67
+ def __new__(cls, *paths: str | bytes | ProvPath):
68
+ self = super().__new__(cls, *paths)
66
69
  if self.is_stream:
67
70
  self._stream_name = "stdout"
68
71
  return self
@@ -93,6 +93,12 @@ class ProvDoc(JSONLDMixin):
93
93
  provenance: list[Any] = field(default_factory=list)
94
94
  __context__ = COMMON_CONTEXT
95
95
 
96
+
97
+ @dataclass
98
+ class ProvResult:
99
+ prov: "Prov"
100
+ result: Any | None = None
101
+
96
102
  # ---------- helpers ----------
97
103
 
98
104
  def _safe_cmd(argv: list[str]) -> str | None:
@@ -275,7 +281,7 @@ class Prov:
275
281
  norm = pep503_normalize(pkg_name)
276
282
  dep_iri = f"https://pypi.org/project/{norm}/"
277
283
  reqs.append(DepNode(id=dep_iri, type="rdfs:Resource", label=spec_str))
278
- self.env_node = EnvNode(
284
+ self.env_node = EnvNode(
279
285
  id=env_id,
280
286
  type=["prov:Entity", "prov:Collection"],
281
287
  label="Python environment",
@@ -288,44 +294,110 @@ class Prov:
288
294
  self.activity.used = []
289
295
  self.activity.used.append(env_id)
290
296
 
297
+ def to_doc(self, *, include_graph_meta: bool = False) -> ProvDoc:
298
+ provenance: list[Any] = [
299
+ self.activity,
300
+ self.agent,
301
+ *self.output_nodes,
302
+ *([self.env_node] if self.env_node else []),
303
+ ]
304
+
305
+ if include_graph_meta:
306
+ provenance.append(self.graph_meta)
307
+
308
+ return ProvDoc(provenance=provenance)
309
+
310
+ def to_dataset(self, result=None):
311
+ import rdflib
312
+
313
+ ds = rdflib.Dataset()
314
+ ds.bind("", self.base_iri)
315
+ default_graph = ds.default_context
316
+
317
+ for triple in self.to_doc(include_graph_meta=True).to_graph():
318
+ default_graph.add(triple)
319
+
320
+ if result is not None and isinstance(result, (rdflib.Graph, rdflib.Dataset)):
321
+ gx = ds.get_context(self.graph_id)
322
+ for triple in result:
323
+ gx.add(triple)
324
+
325
+ return ds
291
326
 
292
327
  def write(self, prov_path: str | Path, result=None, fmt="json", jsonld_with_context=False) -> Path:
293
328
  out = Path(prov_path)
294
329
  out.parent.mkdir(parents=True, exist_ok=True)
295
- # Assemble document
296
- doc = ProvDoc(
297
- provenance=[
298
- self.activity,
299
- self.agent,
300
- *self.output_nodes,
301
- *([self.env_node] if self.env_node else [])
302
- ]
303
- )
304
330
  if fmt == "json":
305
- data = doc.to_jsonld(with_context=jsonld_with_context)
331
+ data = self.to_doc().to_jsonld(with_context=jsonld_with_context)
332
+ if result is not None and isinstance(result, JSONLDMixin):
333
+ data["result"] = [result.to_jsonld(with_context=jsonld_with_context)]
306
334
  final = out.with_suffix(".json")
307
335
  logging.info("Writing JSON-LD provenance %s", final)
308
336
  final.write_text(json.dumps(data, indent=2), encoding="utf-8")
309
337
  return final
310
338
  elif fmt == "trig":
311
- import rdflib
312
-
313
- ds = rdflib.Dataset()
314
- ds.bind("", self.base_iri)
315
- D = ds.default_context
316
- doc.provenance.append(self.graph_meta)
317
- for triple in doc.to_graph():
318
- D.add(triple)
319
-
320
- if result is not None:
321
- if isinstance(result, (rdflib.Graph, rdflib.Dataset)):
322
- gx = ds.get_context(self.graph_id)
323
- for triple in result:
324
- gx.add(triple)
339
+ ds = self.to_dataset(result=result)
325
340
 
326
341
  final = out.with_suffix(".trig")
327
342
  logging.info("Writing TRIG provenance %s", final)
328
343
  ds.serialize(final, format="trig")
344
+ return final
329
345
 
330
346
  else:
331
- raise Exception(f"No handler to write Prov object in format '{fmt}'")
347
+ raise Exception(f"No handler to write Prov object in format '{fmt}'")
348
+
349
+
350
+ def write_combined_prov(
351
+ provs: list[ProvResult],
352
+ prov_path: str | Path,
353
+ fmt: str = "json",
354
+ jsonld_with_context: bool = False,
355
+ ):
356
+ if not provs:
357
+ raise ValueError("No provenance objects provided for combination")
358
+
359
+ out = Path(prov_path)
360
+ out.parent.mkdir(parents=True, exist_ok=True)
361
+
362
+ if fmt == "json":
363
+ combined_doc = ProvDoc(provenance=[])
364
+ for prov_result in provs:
365
+ combined_doc.provenance.extend(prov_result.prov.to_doc().provenance)
366
+
367
+ data = combined_doc.to_jsonld(with_context=jsonld_with_context)
368
+ data["result"] = []
369
+
370
+ for prov_result in provs:
371
+ if isinstance(prov_result.result, JSONLDMixin):
372
+ data["result"].append(
373
+ prov_result.result.to_jsonld(with_context=jsonld_with_context)
374
+ )
375
+
376
+ final = out.with_suffix(".json")
377
+ logging.info("Writing combined JSON-LD provenance %s", final)
378
+ final.write_text(json.dumps(data, indent=2), encoding="utf-8")
379
+ return final
380
+
381
+ if fmt == "trig":
382
+ import rdflib
383
+
384
+ ds = rdflib.Dataset()
385
+
386
+ for prov_result in provs:
387
+ ds.bind("", prov_result.prov.base_iri)
388
+ default_graph = ds.default_context
389
+
390
+ for triple in prov_result.prov.to_doc(include_graph_meta=True).to_graph():
391
+ default_graph.add(triple)
392
+
393
+ if isinstance(prov_result.result, (rdflib.Graph, rdflib.Dataset)):
394
+ gx = ds.get_context(prov_result.prov.graph_id)
395
+ for triple in prov_result.result:
396
+ gx.add(triple)
397
+
398
+ final = out.with_suffix(".trig")
399
+ logging.info("Writing combined TRIG provenance %s", final)
400
+ ds.serialize(final, format="trig")
401
+ return final
402
+
403
+ raise Exception(f"No handler to write combined Prov objects in format '{fmt}'")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: makeprov
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: An provenance tracking library for simple Python workflows
5
5
  Author-email: Benno Kruit <b.b.kruit@amsterdamumc.nl>
6
6
  License: MIT
@@ -0,0 +1,160 @@
1
+ import json
2
+ import sys
3
+ import tempfile
4
+ from pathlib import Path
5
+
6
+ from rdflib import Graph, Literal, Namespace
7
+ from rdflib.namespace import RDF, XSD
8
+
9
+ from makeprov import InPath, OutPath, ProvenanceConfig, build, main, rule
10
+
11
+ @rule(name="test_process_data")
12
+ def process_data(input_file: InPath, output_file: OutPath):
13
+ with input_file.open('r') as infile, output_file.open('w') as outfile:
14
+ data = infile.read()
15
+ outfile.write(data)
16
+
17
+
18
+ SALES_NS = Namespace("http://example.org/test/")
19
+ TEST_PROV_DIR = Path(tempfile.mkdtemp(prefix="makeprov-tests-"))
20
+ TEST_PROV_CONFIG = ProvenanceConfig(prov_dir=str(TEST_PROV_DIR))
21
+
22
+
23
+ @rule(name="test_totals_graph", config=TEST_PROV_CONFIG)
24
+ def totals_graph(input_csv: InPath, graph_out: OutPath) -> Graph:
25
+ graph = Graph()
26
+ graph.bind("sales", SALES_NS)
27
+
28
+
29
+ with input_csv.open('r') as handle:
30
+ for line in handle.read().strip().splitlines()[1:]:
31
+ region, units, revenue = line.split(',')
32
+ subject = SALES_NS[f"region/{region.lower()}"]
33
+ graph.add((subject, RDF.type, SALES_NS.RegionTotal))
34
+ graph.add((subject, SALES_NS.regionName, Literal(region)))
35
+ graph.add((subject, SALES_NS.totalUnits, Literal(units, datatype=XSD.integer)))
36
+ graph.add((subject, SALES_NS.totalRevenue, Literal(revenue, datatype=XSD.decimal)))
37
+
38
+ with graph_out.open('w') as handle:
39
+ handle.write(graph.serialize(format='turtle'))
40
+
41
+ return graph
42
+
43
+
44
+ def test_process_data(tmp_path):
45
+ input_file = tmp_path / "input.txt"
46
+ output_file = tmp_path / "output.txt"
47
+
48
+ input_file.write_text("Hello, world!")
49
+
50
+ # Run the process_data function
51
+ result = process_data(InPath(str(input_file)), OutPath(str(output_file)))
52
+
53
+ # Check that the output file was created and contains the correct data
54
+ assert output_file.exists()
55
+ assert output_file.read_text() == "Hello, world!"
56
+
57
+
58
+ def test_rule_returns_graph(tmp_path):
59
+ input_csv = tmp_path / "region_totals.csv"
60
+ graph_ttl = tmp_path / "region_totals.ttl"
61
+ input_csv.write_text("region,total_units,total_revenue\nNorth,6,119.94\n")
62
+
63
+ result = totals_graph(InPath(str(input_csv)), OutPath(str(graph_ttl)))
64
+
65
+ assert isinstance(result, Graph)
66
+ assert graph_ttl.exists()
67
+ assert "North" in graph_ttl.read_text()
68
+ print(*TEST_PROV_DIR.glob('*'))
69
+ assert list(TEST_PROV_DIR.glob('*'))
70
+
71
+
72
+ def test_build_combines_provenance(tmp_path, monkeypatch):
73
+ prov_dir = tmp_path / "prov"
74
+ config = ProvenanceConfig(prov_dir=str(prov_dir))
75
+
76
+ @rule(name="combine_step_one", config=config)
77
+ def step_one(
78
+ source: InPath = InPath("combine-source.txt"),
79
+ mid: OutPath = OutPath("combine-mid.txt"),
80
+ ):
81
+ with source.open("r") as src, mid.open("w") as dst:
82
+ dst.write(src.read() + " step1")
83
+
84
+ @rule(name="combine_step_two", config=config)
85
+ def step_two(
86
+ mid: InPath = InPath("combine-mid.txt"),
87
+ final: OutPath = OutPath("combine-final.txt"),
88
+ ):
89
+ with mid.open("r") as src, final.open("w") as dst:
90
+ dst.write(src.read() + " step2")
91
+
92
+ monkeypatch.chdir(tmp_path)
93
+ (tmp_path / "combine-source.txt").write_text("data")
94
+
95
+ build("combine-final.txt")
96
+
97
+ final_output = tmp_path / "combine-final.txt"
98
+ assert final_output.exists()
99
+ assert final_output.read_text() == "data step1 step2"
100
+
101
+ prov_files = list(prov_dir.glob("*"))
102
+ assert len(prov_files) == 1
103
+
104
+ prov_json = json.loads(prov_files[0].read_text())
105
+ activities = [
106
+ node
107
+ for node in prov_json["provenance"]
108
+ if node.get("type") == "prov:Activity"
109
+ or (
110
+ isinstance(node.get("type"), list)
111
+ and "prov:Activity" in node.get("type", [])
112
+ )
113
+ ]
114
+
115
+ assert len(activities) == 2
116
+
117
+
118
+ def test_cli_merge_prov(tmp_path, monkeypatch):
119
+ prov_dir = tmp_path / "prov"
120
+ intermediate = tmp_path / "cli-mid.txt"
121
+ final = tmp_path / "cli-final.txt"
122
+ config = ProvenanceConfig(prov_dir=str(prov_dir))
123
+
124
+ @rule(name="cli_merge_one", config=config)
125
+ def step_one(mid: OutPath = OutPath(intermediate)):
126
+ with mid.open("w") as dst:
127
+ dst.write("stage1")
128
+
129
+ @rule(name="cli_merge_two", config=config)
130
+ def step_two(mid: InPath = InPath(intermediate), final: OutPath = OutPath(final)):
131
+ with mid.open("r") as src, final.open("w") as dst:
132
+ dst.write(src.read() + " stage2")
133
+
134
+ def run_pipeline():
135
+ step_one()
136
+ step_two()
137
+
138
+ monkeypatch.chdir(tmp_path)
139
+ monkeypatch.setattr(sys, "argv", ["prog", "--merge-prov", "run-pipeline"])
140
+
141
+ main(subcommands=[run_pipeline])
142
+
143
+ assert final.exists()
144
+ assert final.read_text() == "stage1 stage2"
145
+
146
+ prov_files = list(prov_dir.glob("*"))
147
+ assert len(prov_files) == 1
148
+
149
+ prov_json = json.loads(prov_files[0].read_text())
150
+ activities = [
151
+ node
152
+ for node in prov_json["provenance"]
153
+ if node.get("type") == "prov:Activity"
154
+ or (
155
+ isinstance(node.get("type"), list)
156
+ and "prov:Activity" in node.get("type", [])
157
+ )
158
+ ]
159
+
160
+ assert len(activities) == 2
@@ -1,67 +0,0 @@
1
- import tempfile
2
- from pathlib import Path
3
-
4
- from rdflib import Graph, Literal, Namespace
5
- from rdflib.namespace import RDF, XSD
6
-
7
- from makeprov import InPath, OutPath, ProvenanceConfig, rule
8
-
9
- @rule(name="test_process_data")
10
- def process_data(input_file: InPath, output_file: OutPath):
11
- with input_file.open('r') as infile, output_file.open('w') as outfile:
12
- data = infile.read()
13
- outfile.write(data)
14
-
15
-
16
- SALES_NS = Namespace("http://example.org/test/")
17
- TEST_PROV_DIR = Path(tempfile.mkdtemp(prefix="makeprov-tests-"))
18
- TEST_PROV_CONFIG = ProvenanceConfig(prov_dir=str(TEST_PROV_DIR))
19
-
20
-
21
- @rule(name="test_totals_graph", config=TEST_PROV_CONFIG)
22
- def totals_graph(input_csv: InPath, graph_out: OutPath) -> Graph:
23
- graph = Graph()
24
- graph.bind("sales", SALES_NS)
25
-
26
-
27
- with input_csv.open('r') as handle:
28
- for line in handle.read().strip().splitlines()[1:]:
29
- region, units, revenue = line.split(',')
30
- subject = SALES_NS[f"region/{region.lower()}"]
31
- graph.add((subject, RDF.type, SALES_NS.RegionTotal))
32
- graph.add((subject, SALES_NS.regionName, Literal(region)))
33
- graph.add((subject, SALES_NS.totalUnits, Literal(units, datatype=XSD.integer)))
34
- graph.add((subject, SALES_NS.totalRevenue, Literal(revenue, datatype=XSD.decimal)))
35
-
36
- with graph_out.open('w') as handle:
37
- handle.write(graph.serialize(format='turtle'))
38
-
39
- return graph
40
-
41
-
42
- def test_process_data(tmp_path):
43
- input_file = tmp_path / "input.txt"
44
- output_file = tmp_path / "output.txt"
45
-
46
- input_file.write_text("Hello, world!")
47
-
48
- # Run the process_data function
49
- result = process_data(InPath(str(input_file)), OutPath(str(output_file)))
50
-
51
- # Check that the output file was created and contains the correct data
52
- assert output_file.exists()
53
- assert output_file.read_text() == "Hello, world!"
54
-
55
-
56
- def test_rule_returns_graph(tmp_path):
57
- input_csv = tmp_path / "region_totals.csv"
58
- graph_ttl = tmp_path / "region_totals.ttl"
59
- input_csv.write_text("region,total_units,total_revenue\nNorth,6,119.94\n")
60
-
61
- result = totals_graph(InPath(str(input_csv)), OutPath(str(graph_ttl)))
62
-
63
- assert isinstance(result, Graph)
64
- assert graph_ttl.exists()
65
- assert "North" in graph_ttl.read_text()
66
- print(*TEST_PROV_DIR.glob('*'))
67
- assert list(TEST_PROV_DIR.glob('*'))
File without changes
File without changes