cognite-neat 0.96.6__py3-none-any.whl → 0.97.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (68) hide show
  1. cognite/neat/_constants.py +3 -1
  2. cognite/neat/_graph/extractors/__init__.py +3 -0
  3. cognite/neat/_graph/extractors/_base.py +1 -1
  4. cognite/neat/_graph/extractors/_classic_cdf/_assets.py +1 -1
  5. cognite/neat/_graph/extractors/_classic_cdf/_base.py +1 -1
  6. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +1 -1
  7. cognite/neat/_graph/extractors/_classic_cdf/_data_sets.py +1 -1
  8. cognite/neat/_graph/extractors/_classic_cdf/_events.py +1 -1
  9. cognite/neat/_graph/extractors/_classic_cdf/_files.py +1 -1
  10. cognite/neat/_graph/extractors/_classic_cdf/_labels.py +1 -1
  11. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +1 -1
  12. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +1 -1
  13. cognite/neat/_graph/extractors/_classic_cdf/_timeseries.py +1 -1
  14. cognite/neat/_graph/extractors/_dexpi.py +1 -1
  15. cognite/neat/_graph/extractors/_dms.py +1 -1
  16. cognite/neat/_graph/extractors/_iodd.py +1 -1
  17. cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
  18. cognite/neat/_graph/extractors/_rdf_file.py +1 -1
  19. cognite/neat/_graph/loaders/_rdf2dms.py +1 -1
  20. cognite/neat/_graph/queries/_base.py +1 -1
  21. cognite/neat/_graph/transformers/__init__.py +3 -1
  22. cognite/neat/_graph/transformers/_rdfpath.py +60 -1
  23. cognite/neat/_issues/errors/__init__.py +2 -0
  24. cognite/neat/_issues/errors/_properties.py +12 -0
  25. cognite/neat/_issues/warnings/__init__.py +2 -0
  26. cognite/neat/_issues/warnings/_models.py +11 -0
  27. cognite/neat/_rules/importers/__init__.py +11 -0
  28. cognite/neat/_rules/importers/_base.py +7 -0
  29. cognite/neat/_rules/importers/_dms2rules.py +12 -3
  30. cognite/neat/_rules/importers/_rdf/_inference2rules.py +17 -2
  31. cognite/neat/_rules/models/asset/_rules.py +6 -2
  32. cognite/neat/_rules/models/asset/_rules_input.py +6 -1
  33. cognite/neat/_rules/models/data_types.py +6 -0
  34. cognite/neat/_rules/models/dms/_rules.py +8 -1
  35. cognite/neat/_rules/models/dms/_rules_input.py +8 -0
  36. cognite/neat/_rules/models/dms/_validation.py +64 -2
  37. cognite/neat/_rules/models/domain.py +10 -0
  38. cognite/neat/_rules/models/entities/_loaders.py +3 -5
  39. cognite/neat/_rules/models/information/_rules.py +6 -2
  40. cognite/neat/_rules/models/information/_rules_input.py +6 -1
  41. cognite/neat/_rules/transformers/_base.py +7 -0
  42. cognite/neat/_rules/transformers/_converters.py +56 -4
  43. cognite/neat/_session/_base.py +94 -23
  44. cognite/neat/_session/_inspect.py +12 -4
  45. cognite/neat/_session/_prepare.py +144 -21
  46. cognite/neat/_session/_read.py +137 -30
  47. cognite/neat/_session/_set.py +22 -3
  48. cognite/neat/_session/_show.py +171 -45
  49. cognite/neat/_session/_state.py +79 -30
  50. cognite/neat/_session/_to.py +16 -17
  51. cognite/neat/_session/engine/__init__.py +4 -0
  52. cognite/neat/_session/engine/_import.py +7 -0
  53. cognite/neat/_session/engine/_interface.py +24 -0
  54. cognite/neat/_session/engine/_load.py +129 -0
  55. cognite/neat/_session/exceptions.py +13 -3
  56. cognite/neat/_shared.py +6 -1
  57. cognite/neat/_store/_base.py +3 -24
  58. cognite/neat/_store/_provenance.py +185 -42
  59. cognite/neat/_utils/rdf_.py +34 -1
  60. cognite/neat/_utils/reader/__init__.py +3 -0
  61. cognite/neat/_utils/reader/_base.py +162 -0
  62. cognite/neat/_version.py +2 -1
  63. {cognite_neat-0.96.6.dist-info → cognite_neat-0.97.0.dist-info}/METADATA +5 -3
  64. {cognite_neat-0.96.6.dist-info → cognite_neat-0.97.0.dist-info}/RECORD +67 -62
  65. cognite/neat/_graph/models.py +0 -7
  66. {cognite_neat-0.96.6.dist-info → cognite_neat-0.97.0.dist-info}/LICENSE +0 -0
  67. {cognite_neat-0.96.6.dist-info → cognite_neat-0.97.0.dist-info}/WHEEL +0 -0
  68. {cognite_neat-0.96.6.dist-info → cognite_neat-0.97.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,129 @@
1
+ import os
2
+ import re
3
+ import shutil
4
+ import sys
5
+ import tempfile
6
+ import warnings
7
+ from collections.abc import Callable
8
+ from pathlib import Path
9
+ from typing import Literal, cast
10
+
11
+ from cognite.client import CogniteClient
12
+ from packaging.version import Version
13
+ from packaging.version import parse as parse_version
14
+
15
+ from cognite.neat._issues.errors import NeatValueError
16
+ from cognite.neat._version import __engine__
17
+
18
+ ENVIRONMENT_VARIABLE = "NEATENGINE"
19
+ PACKAGE_NAME = "neatengine"
20
+ PYVERSION = f"{sys.version_info.major}{sys.version_info.minor}"
21
+
22
+
23
+ def load_neat_engine(client: CogniteClient | None, location: Literal["newest", "cache"]) -> str | None:
24
+ if location not in ["newest", "cache"]:
25
+ raise NeatValueError(f"Cannot load engine from location: {location}")
26
+
27
+ if not __engine__.startswith("^"):
28
+ # Using value error as this is a developer error
29
+ raise ValueError(f"Invalid engine version: {__engine__}")
30
+
31
+ lower_bound = parse_version(__engine__[1:])
32
+ upper_bound = Version(f"{lower_bound.major + 1}.0.0")
33
+
34
+ cache_dir = Path(tempfile.gettempdir()) / PACKAGE_NAME
35
+ cache_dir.mkdir(exist_ok=True)
36
+ pattern = re.compile(rf"{PACKAGE_NAME}-(\d+\.\d+\.\d+)-{PYVERSION}.zip")
37
+
38
+ candidates: dict[Version, Callable[[], Path]] = {}
39
+ if location == "cache" and cache_dir.exists():
40
+ candidates = _load_from_path(cache_dir, pattern, lower_bound, upper_bound)
41
+
42
+ if location == "newest" or not candidates:
43
+ # Loading in reverse order of priority
44
+ # 3. Downloads folder
45
+ candidates = _load_from_path(Path.home() / "Downloads", pattern, lower_bound, upper_bound)
46
+ # 2. CDF
47
+ if client:
48
+ candidates.update(_load_from_cdf(client, pattern, lower_bound, upper_bound, cache_dir))
49
+ # 1. Environment variable
50
+ if ENVIRONMENT_VARIABLE in os.environ:
51
+ environ_path = Path(os.environ[ENVIRONMENT_VARIABLE])
52
+ if environ_path.exists():
53
+ candidates.update(_load_from_path(environ_path, pattern, lower_bound, upper_bound))
54
+ else:
55
+ warnings.warn(
56
+ f"Environment variable {ENVIRONMENT_VARIABLE} points to non-existing path: {environ_path}",
57
+ UserWarning,
58
+ stacklevel=2,
59
+ )
60
+
61
+ if not candidates:
62
+ return None
63
+
64
+ selected_version = max(candidates.keys(), default=None)
65
+ if not selected_version:
66
+ return None
67
+ source_path = candidates[selected_version]()
68
+ destination_path = cache_dir / source_path.name
69
+ if not destination_path.exists():
70
+ shutil.copy(source_path, destination_path)
71
+ sys.path.append(str(destination_path))
72
+ try:
73
+ from neatengine._version import __version__ as engine_version # type: ignore[import-not-found]
74
+ except ImportError:
75
+ return None
76
+ return engine_version
77
+
78
+
79
+ def _load_from_path(
80
+ path: Path, pattern: re.Pattern[str], lower_bound: Version, upper_bound: Version
81
+ ) -> dict[Version, Callable[[], Path]]:
82
+ if path.is_file() and (match := pattern.match(path.name)):
83
+ version = parse_version(match.group(1))
84
+ if lower_bound <= version < upper_bound:
85
+ return {parse_version(match.group(1)): lambda: path}
86
+ return {}
87
+ elif path.is_dir():
88
+ output: dict[Version, Callable[[], Path]] = {}
89
+ for candidate in path.iterdir():
90
+ if candidate.is_file() and (match := pattern.match(candidate.name)):
91
+ version = parse_version(match.group(1))
92
+ if lower_bound <= version < upper_bound:
93
+ # Setting default value to ensure we use the candidate from the current iteration
94
+ # If not set, the function will use the last candidate from the loop
95
+ def return_path(the_path: Path = candidate) -> Path:
96
+ return the_path
97
+
98
+ output[parse_version(match.group(1))] = return_path
99
+
100
+ return output
101
+ return {}
102
+
103
+
104
+ def _load_from_cdf(
105
+ client: CogniteClient, pattern: re.Pattern[str], lower_bound: Version, upper_bound: Version, cache_dir: Path
106
+ ) -> dict[Version, Callable[[], Path]]:
107
+ file_metadata = client.files.list(
108
+ limit=-1,
109
+ data_set_external_ids=PACKAGE_NAME,
110
+ external_id_prefix=PACKAGE_NAME,
111
+ metadata={"python_version": PYVERSION},
112
+ )
113
+ output: dict[Version, Callable[[], Path]] = {}
114
+ for file in file_metadata:
115
+ name = cast(str, file.name)
116
+
117
+ # Use function to lazily download file
118
+ # Setting default value to ensure we use the file_id from the current iteration
119
+ # If not set, the function will use the last file_id from the loop
120
+ def download_file(file_id: int = file.id, filename: str = name) -> Path:
121
+ client.files.download(cache_dir, file_id)
122
+ return cache_dir / filename
123
+
124
+ if match := pattern.match(name):
125
+ version = parse_version(match.group(1))
126
+ if lower_bound <= version < upper_bound:
127
+ output[version] = download_file
128
+
129
+ return output
@@ -22,10 +22,20 @@ def _intercept_session_exceptions(func: Callable):
22
22
  try:
23
23
  return func(*args, **kwargs)
24
24
  except NeatSessionError as e:
25
- action = func.__name__
26
- if action == "__call__":
27
- action = func.__qualname__.removesuffix(".__call__").removesuffix("API")
25
+ action = _get_action()
28
26
  print(f"{_PREFIX} Cannot {action}: {e}")
27
+ except ModuleNotFoundError as e:
28
+ if e.name == "neatengine":
29
+ action = _get_action()
30
+ print(f"{_PREFIX} The functionality {action} requires the NeatEngine.")
31
+ else:
32
+ raise e
33
+
34
+ def _get_action():
35
+ action = func.__name__
36
+ if action == "__call__":
37
+ action = func.__qualname__.removesuffix(".__call__").removesuffix("API")
38
+ return action
29
39
 
30
40
  return wrapper
31
41
 
cognite/neat/_shared.py CHANGED
@@ -1,9 +1,10 @@
1
1
  from abc import abstractmethod
2
2
  from collections.abc import Hashable, Sequence
3
3
  from dataclasses import dataclass
4
- from typing import Any, TypeVar
4
+ from typing import Any, TypeAlias, TypeVar
5
5
 
6
6
  import pandas as pd
7
+ from rdflib import Literal, URIRef
7
8
 
8
9
  T_ID = TypeVar("T_ID", bound=Hashable)
9
10
 
@@ -49,3 +50,7 @@ class NeatList(list, Sequence[T_NeatObject]):
49
50
 
50
51
  def _repr_html_(self) -> str:
51
52
  return self.to_pandas()._repr_html_() # type: ignore[operator]
53
+
54
+
55
+ Triple: TypeAlias = tuple[URIRef, URIRef, Literal | URIRef]
56
+ InstanceType: TypeAlias = URIRef
@@ -12,13 +12,14 @@ from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
12
12
 
13
13
  from cognite.neat._constants import DEFAULT_NAMESPACE
14
14
  from cognite.neat._graph.extractors import RdfFileExtractor, TripleExtractors
15
- from cognite.neat._graph.models import InstanceType, Triple
16
15
  from cognite.neat._graph.queries import Queries
17
16
  from cognite.neat._graph.transformers import Transformers
18
17
  from cognite.neat._rules.analysis import InformationAnalysis
19
18
  from cognite.neat._rules.models import InformationRules
20
19
  from cognite.neat._rules.models.entities import ClassEntity
20
+ from cognite.neat._shared import InstanceType, Triple
21
21
  from cognite.neat._utils.auxiliary import local_import
22
+ from cognite.neat._utils.rdf_ import add_triples_in_batch
22
23
 
23
24
  from ._provenance import Change, Provenance
24
25
 
@@ -288,29 +289,7 @@ class NeatGraphStore:
288
289
  batch_size: Batch size of triples per commit, by default 10_000
289
290
  verbose: Verbose mode, by default False
290
291
  """
291
-
292
- commit_counter = 0
293
- number_of_written_triples = 0
294
-
295
- def check_commit(force_commit: bool = False):
296
- """Commit nodes to the graph if batch counter is reached or if force_commit is True"""
297
- nonlocal commit_counter
298
- nonlocal number_of_written_triples
299
- if force_commit:
300
- number_of_written_triples += commit_counter
301
- self.graph.commit()
302
- return
303
- commit_counter += 1
304
- if commit_counter >= batch_size:
305
- number_of_written_triples += commit_counter
306
- self.graph.commit()
307
- commit_counter = 0
308
-
309
- for triple in triples:
310
- self.graph.add(triple)
311
- check_commit()
312
-
313
- check_commit(force_commit=True)
292
+ add_triples_in_batch(self.graph, triples, batch_size)
314
293
 
315
294
  def transform(self, transformer: Transformers) -> None:
316
295
  """Transforms the graph store using a transformer."""
@@ -1,22 +1,33 @@
1
- # we will use prov-o to represent the provenance of the neat graph store
2
- # basically tracking changes that occur in the graph store
3
- # prov-o use concepts of Agent, Activity and Entity to represent provenance
4
- # where in case of neat we have:
5
- # Agent: triples extractors, graph enhancers, contextualizers, etc.
6
- # Activity: write triple, add connection, etc.
7
- # Entity: neat graph store
1
+ """
2
+ We use prov-o to represent the provenance of instances and data models
3
+ basically tracking changes that occur.
4
+ prov-o use concepts of Agent, Activity and Entity to represent provenance
5
+ where in case of neat when dealing with instances we have:
8
6
 
7
+ * Agent: triples extractors, graph enhancers, contextualizers, etc.
8
+ * Activity: write/remove triples such as add connection, etc.
9
+ * Entity: neat graph store
10
+
11
+ and in case of data models we have:
12
+
13
+ * Agent: Rules importers, exporters, transformers, etc.
14
+ * Activity: convert, verify, etc.
15
+ * Entity: data model (aka Rules)
16
+
17
+ """
9
18
 
10
19
  import uuid
11
- from collections.abc import Sequence
12
- from dataclasses import dataclass
20
+ from collections.abc import Iterable, Sequence
21
+ from dataclasses import dataclass, field
13
22
  from datetime import datetime
14
- from typing import TypeVar
23
+ from typing import Optional, TypeVar
15
24
 
25
+ from cognite.client.data_classes.data_modeling import DataModelId, DataModelIdentifier
16
26
  from rdflib import PROV, RDF, Literal, URIRef
17
27
 
18
- from cognite.neat._constants import DEFAULT_NAMESPACE
19
- from cognite.neat._shared import FrozenNeatObject, NeatList
28
+ from cognite.neat._constants import CDF_NAMESPACE, DEFAULT_NAMESPACE
29
+ from cognite.neat._rules._shared import JustRules, ReadRules, VerifiedRules
30
+ from cognite.neat._shared import FrozenNeatObject, NeatList, Triple
20
31
 
21
32
 
22
33
  @dataclass(frozen=True)
@@ -24,61 +35,137 @@ class Agent:
24
35
  id_: URIRef = DEFAULT_NAMESPACE.agent
25
36
  acted_on_behalf_of: str = "NEAT"
26
37
 
27
- def as_triples(self):
38
+ def as_triples(self) -> list[Triple]:
28
39
  return [
29
40
  (self.id_, RDF.type, PROV[type(self).__name__]),
30
- (self.id_, PROV.actedOnBehalfOf, self.acted_on_behalf_of),
41
+ (self.id_, PROV.actedOnBehalfOf, Literal(self.acted_on_behalf_of)),
42
+ ]
43
+
44
+
45
+ CDF_AGENT = Agent(acted_on_behalf_of="UNKNOWN", id_=CDF_NAMESPACE["agent"])
46
+ NEAT_AGENT = Agent(acted_on_behalf_of="UNKNOWN", id_=DEFAULT_NAMESPACE["agent"])
47
+ UNKNOWN_AGENT = Agent(acted_on_behalf_of="UNKNOWN", id_=DEFAULT_NAMESPACE["unknown-agent"])
48
+
49
+
50
+ @dataclass(frozen=True)
51
+ class Entity:
52
+ was_attributed_to: Agent
53
+ was_generated_by: Optional["Activity"] = field(default=None, repr=False)
54
+ id_: URIRef = DEFAULT_NAMESPACE["graph-store"]
55
+
56
+ def as_triples(self) -> list[Triple]:
57
+ output: list[tuple[URIRef, URIRef, Literal | URIRef]] = [
58
+ (self.id_, RDF.type, PROV[type(self).__name__]),
59
+ (self.id_, PROV.wasAttributedTo, self.was_attributed_to.id_),
31
60
  ]
32
61
 
62
+ if self.was_generated_by:
63
+ output.append(
64
+ (
65
+ self.id_,
66
+ PROV.wasGeneratedBy,
67
+ self.was_generated_by.id_,
68
+ )
69
+ )
70
+
71
+ return output
72
+
73
+ @classmethod
74
+ def from_data_model_id(cls, data_model_id: DataModelIdentifier) -> "Entity":
75
+ data_model_id = DataModelId.load(data_model_id)
76
+
77
+ return cls(
78
+ was_attributed_to=CDF_AGENT,
79
+ id_=CDF_NAMESPACE[
80
+ f"dms/data-model/{data_model_id.space}/{data_model_id.external_id}/{data_model_id.version}"
81
+ ],
82
+ )
83
+
84
+ @classmethod
85
+ def from_rules(
86
+ cls,
87
+ rules: ReadRules | JustRules | VerifiedRules,
88
+ agent: Agent | None = None,
89
+ activity: "Activity | None" = None,
90
+ ) -> "Entity":
91
+ agent = agent or UNKNOWN_AGENT
92
+ if isinstance(rules, VerifiedRules):
93
+ return cls(
94
+ was_attributed_to=agent,
95
+ was_generated_by=activity,
96
+ id_=rules.id_,
97
+ )
98
+
99
+ elif isinstance(rules, ReadRules | JustRules) and rules.rules is not None:
100
+ return cls(
101
+ was_attributed_to=agent,
102
+ was_generated_by=activity,
103
+ id_=rules.rules.id_,
104
+ )
105
+ else:
106
+ return cls(
107
+ was_attributed_to=agent,
108
+ was_generated_by=activity,
109
+ id_=DEFAULT_NAMESPACE["unknown-entity"],
110
+ )
111
+
112
+ @classmethod
113
+ def new_unknown_entity(cls) -> "Entity":
114
+ return cls(
115
+ was_attributed_to=UNKNOWN_AGENT,
116
+ id_=DEFAULT_NAMESPACE[f"unknown-entity/{uuid.uuid4()}"],
117
+ )
118
+
119
+
120
+ INSTANCES_ENTITY = Entity(was_attributed_to=NEAT_AGENT, id_=CDF_NAMESPACE["instances"])
121
+
33
122
 
34
123
  @dataclass(frozen=True)
35
124
  class Activity:
36
125
  was_associated_with: Agent
37
126
  ended_at_time: datetime
38
127
  started_at_time: datetime
39
- used: str # this would be set to for example Extractor, Enhancer, Contextualizer, etc.
40
- id_: URIRef = DEFAULT_NAMESPACE[f"activity-{uuid.uuid4()}"]
128
+ used: str | Entity | None = None
129
+ id_: URIRef = field(default_factory=lambda: DEFAULT_NAMESPACE[f"activity-{uuid.uuid4()}"])
41
130
 
42
- def as_triples(self):
43
- return [
131
+ def as_triples(self) -> list[Triple]:
132
+ output: list[tuple[URIRef, URIRef, Literal | URIRef]] = [
44
133
  (self.id_, RDF.type, PROV[type(self).__name__]),
45
134
  (self.id_, PROV.wasAssociatedWith, self.was_associated_with.id_),
46
135
  (self.id_, PROV.startedAtTime, Literal(self.started_at_time)),
47
136
  (self.id_, PROV.endedAtTime, Literal(self.ended_at_time)),
48
- (self.id_, PROV.used, self.used),
49
137
  ]
50
138
 
139
+ if self.used:
140
+ output.append(
141
+ (
142
+ self.id_,
143
+ PROV.used,
144
+ (self.used.id_ if isinstance(self.used, Entity) else Literal(self.used)),
145
+ )
146
+ )
51
147
 
52
- @dataclass(frozen=True)
53
- class Entity:
54
- was_generated_by: Activity
55
- was_attributed_to: Agent
56
- id_: URIRef = DEFAULT_NAMESPACE["graph-store"]
57
-
58
- def as_triples(self):
59
- return [
60
- (self.id_, RDF.type, PROV[type(self).__name__]),
61
- (self.id_, PROV.wasGeneratedBy, self.was_generated_by.id_),
62
- (self.id_, PROV.wasAttributedTo, self.was_attributed_to.id_),
63
- ]
148
+ return output
64
149
 
65
150
 
66
151
  @dataclass(frozen=True)
67
152
  class Change(FrozenNeatObject):
68
153
  agent: Agent
69
154
  activity: Activity
70
- entity: Entity
155
+ target_entity: Entity
71
156
  description: str
72
- # triples that were added to the graph store
73
- addition: list[tuple[URIRef, URIRef, URIRef | Literal]] | None = None
74
- # triples that were removed from the graph store
75
- subtraction: list[tuple[URIRef, URIRef, URIRef | Literal]] | None = None
157
+ source_entity: Entity = field(default_factory=Entity.new_unknown_entity)
76
158
 
77
- def as_triples(self):
78
- return self.agent.as_triples() + self.activity.as_triples() + self.entity.as_triples()
159
+ def as_triples(self) -> list[Triple]:
160
+ return (
161
+ self.source_entity.as_triples()
162
+ + self.agent.as_triples()
163
+ + self.activity.as_triples()
164
+ + self.target_entity.as_triples() # type: ignore[operator]
165
+ )
79
166
 
80
167
  @classmethod
81
- def record(cls, activity: str, start: datetime, end: datetime, description: str):
168
+ def record(cls, activity: str, start: datetime, end: datetime, description: str) -> "Change":
82
169
  """User friendly method to record a change that occurred in the graph store."""
83
170
  agent = Agent()
84
171
  activity = Activity(
@@ -87,14 +174,48 @@ class Change(FrozenNeatObject):
87
174
  started_at_time=start,
88
175
  ended_at_time=end,
89
176
  )
90
- entity = Entity(was_generated_by=activity, was_attributed_to=agent)
91
- return cls(agent, activity, entity, description)
177
+ target_entity = Entity(was_generated_by=activity, was_attributed_to=agent)
178
+ return cls(
179
+ agent=agent,
180
+ activity=activity,
181
+ target_entity=target_entity,
182
+ description=description,
183
+ )
184
+
185
+ @classmethod
186
+ def from_rules_activity(
187
+ cls,
188
+ rules: ReadRules | JustRules | VerifiedRules,
189
+ agent: Agent,
190
+ start: datetime,
191
+ end: datetime,
192
+ description: str,
193
+ source_entity: Entity | None = None,
194
+ ) -> "Change":
195
+ source_entity = source_entity or Entity.new_unknown_entity()
196
+ activity = Activity(
197
+ started_at_time=start,
198
+ ended_at_time=end,
199
+ was_associated_with=agent,
200
+ used=source_entity,
201
+ )
202
+
203
+ target_entity = Entity.from_rules(rules, agent, activity)
204
+
205
+ return cls(
206
+ agent=agent,
207
+ activity=activity,
208
+ target_entity=target_entity,
209
+ description=description,
210
+ source_entity=source_entity,
211
+ )
92
212
 
93
213
  def dump(self, aggregate: bool = True) -> dict[str, str]:
94
214
  return {
215
+ "Source Entity": self.source_entity.id_,
95
216
  "Agent": self.agent.id_,
96
217
  "Activity": self.activity.id_,
97
- "Entity": self.entity.id_,
218
+ "Target Entity": self.target_entity.id_,
98
219
  "Description": self.description,
99
220
  }
100
221
 
@@ -124,3 +245,25 @@ class Provenance(NeatList[Change]):
124
245
  text += "</ul>"
125
246
 
126
247
  return text
248
+
249
+ def activity(self, id_: URIRef) -> Activity | None:
250
+ return next((change.activity for change in self if change.activity.id_ == id_), None)
251
+
252
+ def agent(self, id_: URIRef) -> Agent | None:
253
+ return next((change.agent for change in self if change.agent.id_ == id_), None)
254
+
255
+ def target_entity(self, id_: URIRef) -> Entity | None:
256
+ return next(
257
+ (change.target_entity for change in self if change.target_entity.id_ == id_),
258
+ None,
259
+ )
260
+
261
+ def source_entity(self, id_: URIRef) -> Entity | None:
262
+ return next(
263
+ (change.source_entity for change in self if change.source_entity.id_ == id_),
264
+ None,
265
+ )
266
+
267
+ def as_triples(self) -> Iterable[Triple]:
268
+ for change in self:
269
+ yield from change.as_triples()
@@ -4,8 +4,8 @@ from typing import Any, Literal, TypeAlias, overload
4
4
 
5
5
  from cognite.client.utils.useful_types import SequenceNotStr
6
6
  from pydantic import HttpUrl, TypeAdapter, ValidationError
7
+ from rdflib import Graph, Namespace, URIRef
7
8
  from rdflib import Literal as RdfLiteral
8
- from rdflib import Namespace, URIRef
9
9
 
10
10
  Triple: TypeAlias = tuple[URIRef, URIRef, RdfLiteral | URIRef]
11
11
 
@@ -171,3 +171,36 @@ def get_inheritance_path(child: Any, child_parent: dict[Any, list[Any]]) -> list
171
171
  for parent in child_parent[child]:
172
172
  path.extend(get_inheritance_path(parent, child_parent))
173
173
  return path
174
+
175
+
176
+ def add_triples_in_batch(graph: Graph, triples: Iterable[Triple], batch_size: int = 10_000):
177
+ """Adds triples to the graph store in batches.
178
+
179
+ Args:
180
+ triples: list of triples to be added to the graph store
181
+ batch_size: Batch size of triples per commit, by default 10_000
182
+ verbose: Verbose mode, by default False
183
+ """
184
+
185
+ commit_counter = 0
186
+ number_of_written_triples = 0
187
+
188
+ def check_commit(force_commit: bool = False):
189
+ """Commit nodes to the graph if batch counter is reached or if force_commit is True"""
190
+ nonlocal commit_counter
191
+ nonlocal number_of_written_triples
192
+ if force_commit:
193
+ number_of_written_triples += commit_counter
194
+ graph.commit()
195
+ return
196
+ commit_counter += 1
197
+ if commit_counter >= batch_size:
198
+ number_of_written_triples += commit_counter
199
+ graph.commit()
200
+ commit_counter = 0
201
+
202
+ for triple in triples:
203
+ graph.add(triple)
204
+ check_commit()
205
+
206
+ check_commit(force_commit=True)
@@ -0,0 +1,3 @@
1
+ from ._base import GitHubReader, NeatReader, PathReader
2
+
3
+ __all__ = ["NeatReader", "PathReader", "GitHubReader"]