cognite-neat 0.96.5__py3-none-any.whl → 0.97.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_constants.py +4 -1
- cognite/neat/_graph/extractors/__init__.py +3 -0
- cognite/neat/_graph/extractors/_base.py +1 -1
- cognite/neat/_graph/extractors/_classic_cdf/_assets.py +1 -1
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +1 -1
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +1 -1
- cognite/neat/_graph/extractors/_classic_cdf/_data_sets.py +1 -1
- cognite/neat/_graph/extractors/_classic_cdf/_events.py +1 -1
- cognite/neat/_graph/extractors/_classic_cdf/_files.py +1 -1
- cognite/neat/_graph/extractors/_classic_cdf/_labels.py +1 -1
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +1 -1
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +1 -1
- cognite/neat/_graph/extractors/_classic_cdf/_timeseries.py +1 -1
- cognite/neat/_graph/extractors/_dexpi.py +1 -1
- cognite/neat/_graph/extractors/_dms.py +1 -1
- cognite/neat/_graph/extractors/_iodd.py +1 -1
- cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
- cognite/neat/_graph/extractors/_rdf_file.py +1 -1
- cognite/neat/_graph/loaders/_rdf2dms.py +1 -1
- cognite/neat/_graph/queries/_base.py +1 -1
- cognite/neat/_graph/transformers/__init__.py +3 -1
- cognite/neat/_graph/transformers/_rdfpath.py +60 -1
- cognite/neat/_issues/errors/__init__.py +2 -0
- cognite/neat/_issues/errors/_properties.py +12 -0
- cognite/neat/_issues/warnings/__init__.py +2 -0
- cognite/neat/_issues/warnings/_models.py +11 -0
- cognite/neat/_rules/importers/__init__.py +11 -0
- cognite/neat/_rules/importers/_base.py +7 -0
- cognite/neat/_rules/importers/_dms2rules.py +12 -3
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +17 -2
- cognite/neat/_rules/importers/_spreadsheet2rules.py +5 -1
- cognite/neat/_rules/models/asset/_rules.py +6 -2
- cognite/neat/_rules/models/asset/_rules_input.py +6 -1
- cognite/neat/_rules/models/data_types.py +6 -0
- cognite/neat/_rules/models/dms/_exporter.py +16 -3
- cognite/neat/_rules/models/dms/_rules.py +37 -12
- cognite/neat/_rules/models/dms/_rules_input.py +8 -0
- cognite/neat/_rules/models/dms/_validation.py +64 -2
- cognite/neat/_rules/models/domain.py +10 -0
- cognite/neat/_rules/models/entities/_loaders.py +3 -5
- cognite/neat/_rules/models/information/_rules.py +6 -2
- cognite/neat/_rules/models/information/_rules_input.py +6 -1
- cognite/neat/_rules/transformers/_base.py +7 -0
- cognite/neat/_rules/transformers/_converters.py +56 -4
- cognite/neat/_session/_base.py +94 -23
- cognite/neat/_session/_inspect.py +12 -4
- cognite/neat/_session/_prepare.py +144 -21
- cognite/neat/_session/_read.py +137 -30
- cognite/neat/_session/_set.py +22 -3
- cognite/neat/_session/_show.py +171 -45
- cognite/neat/_session/_state.py +79 -30
- cognite/neat/_session/_to.py +16 -17
- cognite/neat/_session/engine/__init__.py +4 -0
- cognite/neat/_session/engine/_import.py +7 -0
- cognite/neat/_session/engine/_interface.py +24 -0
- cognite/neat/_session/engine/_load.py +129 -0
- cognite/neat/_session/exceptions.py +13 -3
- cognite/neat/_shared.py +6 -1
- cognite/neat/_store/_base.py +3 -24
- cognite/neat/_store/_provenance.py +185 -42
- cognite/neat/_utils/rdf_.py +34 -1
- cognite/neat/_utils/reader/__init__.py +3 -0
- cognite/neat/_utils/reader/_base.py +162 -0
- cognite/neat/_version.py +2 -1
- {cognite_neat-0.96.5.dist-info → cognite_neat-0.97.0.dist-info}/METADATA +5 -3
- {cognite_neat-0.96.5.dist-info → cognite_neat-0.97.0.dist-info}/RECORD +69 -64
- cognite/neat/_graph/models.py +0 -7
- {cognite_neat-0.96.5.dist-info → cognite_neat-0.97.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.96.5.dist-info → cognite_neat-0.97.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.96.5.dist-info → cognite_neat-0.97.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import shutil
|
|
4
|
+
import sys
|
|
5
|
+
import tempfile
|
|
6
|
+
import warnings
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Literal, cast
|
|
10
|
+
|
|
11
|
+
from cognite.client import CogniteClient
|
|
12
|
+
from packaging.version import Version
|
|
13
|
+
from packaging.version import parse as parse_version
|
|
14
|
+
|
|
15
|
+
from cognite.neat._issues.errors import NeatValueError
|
|
16
|
+
from cognite.neat._version import __engine__
|
|
17
|
+
|
|
18
|
+
ENVIRONMENT_VARIABLE = "NEATENGINE"
|
|
19
|
+
PACKAGE_NAME = "neatengine"
|
|
20
|
+
PYVERSION = f"{sys.version_info.major}{sys.version_info.minor}"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def load_neat_engine(client: CogniteClient | None, location: Literal["newest", "cache"]) -> str | None:
|
|
24
|
+
if location not in ["newest", "cache"]:
|
|
25
|
+
raise NeatValueError(f"Cannot load engine from location: {location}")
|
|
26
|
+
|
|
27
|
+
if not __engine__.startswith("^"):
|
|
28
|
+
# Using value error as this is a developer error
|
|
29
|
+
raise ValueError(f"Invalid engine version: {__engine__}")
|
|
30
|
+
|
|
31
|
+
lower_bound = parse_version(__engine__[1:])
|
|
32
|
+
upper_bound = Version(f"{lower_bound.major + 1}.0.0")
|
|
33
|
+
|
|
34
|
+
cache_dir = Path(tempfile.gettempdir()) / PACKAGE_NAME
|
|
35
|
+
cache_dir.mkdir(exist_ok=True)
|
|
36
|
+
pattern = re.compile(rf"{PACKAGE_NAME}-(\d+\.\d+\.\d+)-{PYVERSION}.zip")
|
|
37
|
+
|
|
38
|
+
candidates: dict[Version, Callable[[], Path]] = {}
|
|
39
|
+
if location == "cache" and cache_dir.exists():
|
|
40
|
+
candidates = _load_from_path(cache_dir, pattern, lower_bound, upper_bound)
|
|
41
|
+
|
|
42
|
+
if location == "newest" or not candidates:
|
|
43
|
+
# Loading in reverse order of priority
|
|
44
|
+
# 3. Downloads folder
|
|
45
|
+
candidates = _load_from_path(Path.home() / "Downloads", pattern, lower_bound, upper_bound)
|
|
46
|
+
# 2. CDF
|
|
47
|
+
if client:
|
|
48
|
+
candidates.update(_load_from_cdf(client, pattern, lower_bound, upper_bound, cache_dir))
|
|
49
|
+
# 1. Environment variable
|
|
50
|
+
if ENVIRONMENT_VARIABLE in os.environ:
|
|
51
|
+
environ_path = Path(os.environ[ENVIRONMENT_VARIABLE])
|
|
52
|
+
if environ_path.exists():
|
|
53
|
+
candidates.update(_load_from_path(environ_path, pattern, lower_bound, upper_bound))
|
|
54
|
+
else:
|
|
55
|
+
warnings.warn(
|
|
56
|
+
f"Environment variable {ENVIRONMENT_VARIABLE} points to non-existing path: {environ_path}",
|
|
57
|
+
UserWarning,
|
|
58
|
+
stacklevel=2,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
if not candidates:
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
selected_version = max(candidates.keys(), default=None)
|
|
65
|
+
if not selected_version:
|
|
66
|
+
return None
|
|
67
|
+
source_path = candidates[selected_version]()
|
|
68
|
+
destination_path = cache_dir / source_path.name
|
|
69
|
+
if not destination_path.exists():
|
|
70
|
+
shutil.copy(source_path, destination_path)
|
|
71
|
+
sys.path.append(str(destination_path))
|
|
72
|
+
try:
|
|
73
|
+
from neatengine._version import __version__ as engine_version # type: ignore[import-not-found]
|
|
74
|
+
except ImportError:
|
|
75
|
+
return None
|
|
76
|
+
return engine_version
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _load_from_path(
|
|
80
|
+
path: Path, pattern: re.Pattern[str], lower_bound: Version, upper_bound: Version
|
|
81
|
+
) -> dict[Version, Callable[[], Path]]:
|
|
82
|
+
if path.is_file() and (match := pattern.match(path.name)):
|
|
83
|
+
version = parse_version(match.group(1))
|
|
84
|
+
if lower_bound <= version < upper_bound:
|
|
85
|
+
return {parse_version(match.group(1)): lambda: path}
|
|
86
|
+
return {}
|
|
87
|
+
elif path.is_dir():
|
|
88
|
+
output: dict[Version, Callable[[], Path]] = {}
|
|
89
|
+
for candidate in path.iterdir():
|
|
90
|
+
if candidate.is_file() and (match := pattern.match(candidate.name)):
|
|
91
|
+
version = parse_version(match.group(1))
|
|
92
|
+
if lower_bound <= version < upper_bound:
|
|
93
|
+
# Setting default value to ensure we use the candidate from the current iteration
|
|
94
|
+
# If not set, the function will use the last candidate from the loop
|
|
95
|
+
def return_path(the_path: Path = candidate) -> Path:
|
|
96
|
+
return the_path
|
|
97
|
+
|
|
98
|
+
output[parse_version(match.group(1))] = return_path
|
|
99
|
+
|
|
100
|
+
return output
|
|
101
|
+
return {}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _load_from_cdf(
|
|
105
|
+
client: CogniteClient, pattern: re.Pattern[str], lower_bound: Version, upper_bound: Version, cache_dir: Path
|
|
106
|
+
) -> dict[Version, Callable[[], Path]]:
|
|
107
|
+
file_metadata = client.files.list(
|
|
108
|
+
limit=-1,
|
|
109
|
+
data_set_external_ids=PACKAGE_NAME,
|
|
110
|
+
external_id_prefix=PACKAGE_NAME,
|
|
111
|
+
metadata={"python_version": PYVERSION},
|
|
112
|
+
)
|
|
113
|
+
output: dict[Version, Callable[[], Path]] = {}
|
|
114
|
+
for file in file_metadata:
|
|
115
|
+
name = cast(str, file.name)
|
|
116
|
+
|
|
117
|
+
# Use function to lazily download file
|
|
118
|
+
# Setting default value to ensure we use the file_id from the current iteration
|
|
119
|
+
# If not set, the function will use the last file_id from the loop
|
|
120
|
+
def download_file(file_id: int = file.id, filename: str = name) -> Path:
|
|
121
|
+
client.files.download(cache_dir, file_id)
|
|
122
|
+
return cache_dir / filename
|
|
123
|
+
|
|
124
|
+
if match := pattern.match(name):
|
|
125
|
+
version = parse_version(match.group(1))
|
|
126
|
+
if lower_bound <= version < upper_bound:
|
|
127
|
+
output[version] = download_file
|
|
128
|
+
|
|
129
|
+
return output
|
|
@@ -22,10 +22,20 @@ def _intercept_session_exceptions(func: Callable):
|
|
|
22
22
|
try:
|
|
23
23
|
return func(*args, **kwargs)
|
|
24
24
|
except NeatSessionError as e:
|
|
25
|
-
action =
|
|
26
|
-
if action == "__call__":
|
|
27
|
-
action = func.__qualname__.removesuffix(".__call__").removesuffix("API")
|
|
25
|
+
action = _get_action()
|
|
28
26
|
print(f"{_PREFIX} Cannot {action}: {e}")
|
|
27
|
+
except ModuleNotFoundError as e:
|
|
28
|
+
if e.name == "neatengine":
|
|
29
|
+
action = _get_action()
|
|
30
|
+
print(f"{_PREFIX} The functionality {action} requires the NeatEngine.")
|
|
31
|
+
else:
|
|
32
|
+
raise e
|
|
33
|
+
|
|
34
|
+
def _get_action():
|
|
35
|
+
action = func.__name__
|
|
36
|
+
if action == "__call__":
|
|
37
|
+
action = func.__qualname__.removesuffix(".__call__").removesuffix("API")
|
|
38
|
+
return action
|
|
29
39
|
|
|
30
40
|
return wrapper
|
|
31
41
|
|
cognite/neat/_shared.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
2
|
from collections.abc import Hashable, Sequence
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from typing import Any, TypeVar
|
|
4
|
+
from typing import Any, TypeAlias, TypeVar
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
|
+
from rdflib import Literal, URIRef
|
|
7
8
|
|
|
8
9
|
T_ID = TypeVar("T_ID", bound=Hashable)
|
|
9
10
|
|
|
@@ -49,3 +50,7 @@ class NeatList(list, Sequence[T_NeatObject]):
|
|
|
49
50
|
|
|
50
51
|
def _repr_html_(self) -> str:
|
|
51
52
|
return self.to_pandas()._repr_html_() # type: ignore[operator]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
Triple: TypeAlias = tuple[URIRef, URIRef, Literal | URIRef]
|
|
56
|
+
InstanceType: TypeAlias = URIRef
|
cognite/neat/_store/_base.py
CHANGED
|
@@ -12,13 +12,14 @@ from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
|
|
|
12
12
|
|
|
13
13
|
from cognite.neat._constants import DEFAULT_NAMESPACE
|
|
14
14
|
from cognite.neat._graph.extractors import RdfFileExtractor, TripleExtractors
|
|
15
|
-
from cognite.neat._graph.models import InstanceType, Triple
|
|
16
15
|
from cognite.neat._graph.queries import Queries
|
|
17
16
|
from cognite.neat._graph.transformers import Transformers
|
|
18
17
|
from cognite.neat._rules.analysis import InformationAnalysis
|
|
19
18
|
from cognite.neat._rules.models import InformationRules
|
|
20
19
|
from cognite.neat._rules.models.entities import ClassEntity
|
|
20
|
+
from cognite.neat._shared import InstanceType, Triple
|
|
21
21
|
from cognite.neat._utils.auxiliary import local_import
|
|
22
|
+
from cognite.neat._utils.rdf_ import add_triples_in_batch
|
|
22
23
|
|
|
23
24
|
from ._provenance import Change, Provenance
|
|
24
25
|
|
|
@@ -288,29 +289,7 @@ class NeatGraphStore:
|
|
|
288
289
|
batch_size: Batch size of triples per commit, by default 10_000
|
|
289
290
|
verbose: Verbose mode, by default False
|
|
290
291
|
"""
|
|
291
|
-
|
|
292
|
-
commit_counter = 0
|
|
293
|
-
number_of_written_triples = 0
|
|
294
|
-
|
|
295
|
-
def check_commit(force_commit: bool = False):
|
|
296
|
-
"""Commit nodes to the graph if batch counter is reached or if force_commit is True"""
|
|
297
|
-
nonlocal commit_counter
|
|
298
|
-
nonlocal number_of_written_triples
|
|
299
|
-
if force_commit:
|
|
300
|
-
number_of_written_triples += commit_counter
|
|
301
|
-
self.graph.commit()
|
|
302
|
-
return
|
|
303
|
-
commit_counter += 1
|
|
304
|
-
if commit_counter >= batch_size:
|
|
305
|
-
number_of_written_triples += commit_counter
|
|
306
|
-
self.graph.commit()
|
|
307
|
-
commit_counter = 0
|
|
308
|
-
|
|
309
|
-
for triple in triples:
|
|
310
|
-
self.graph.add(triple)
|
|
311
|
-
check_commit()
|
|
312
|
-
|
|
313
|
-
check_commit(force_commit=True)
|
|
292
|
+
add_triples_in_batch(self.graph, triples, batch_size)
|
|
314
293
|
|
|
315
294
|
def transform(self, transformer: Transformers) -> None:
|
|
316
295
|
"""Transforms the graph store using a transformer."""
|
|
@@ -1,22 +1,33 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
# Activity: write triple, add connection, etc.
|
|
7
|
-
# Entity: neat graph store
|
|
1
|
+
"""
|
|
2
|
+
We use prov-o to represent the provenance of instances and data models
|
|
3
|
+
basically tracking changes that occur.
|
|
4
|
+
prov-o use concepts of Agent, Activity and Entity to represent provenance
|
|
5
|
+
where in case of neat when dealing with instances we have:
|
|
8
6
|
|
|
7
|
+
* Agent: triples extractors, graph enhancers, contextualizers, etc.
|
|
8
|
+
* Activity: write/remove triples such as add connection, etc.
|
|
9
|
+
* Entity: neat graph store
|
|
10
|
+
|
|
11
|
+
and in case of data models we have:
|
|
12
|
+
|
|
13
|
+
* Agent: Rules importers, exporters, transformers, etc.
|
|
14
|
+
* Activity: convert, verify, etc.
|
|
15
|
+
* Entity: data model (aka Rules)
|
|
16
|
+
|
|
17
|
+
"""
|
|
9
18
|
|
|
10
19
|
import uuid
|
|
11
|
-
from collections.abc import Sequence
|
|
12
|
-
from dataclasses import dataclass
|
|
20
|
+
from collections.abc import Iterable, Sequence
|
|
21
|
+
from dataclasses import dataclass, field
|
|
13
22
|
from datetime import datetime
|
|
14
|
-
from typing import TypeVar
|
|
23
|
+
from typing import Optional, TypeVar
|
|
15
24
|
|
|
25
|
+
from cognite.client.data_classes.data_modeling import DataModelId, DataModelIdentifier
|
|
16
26
|
from rdflib import PROV, RDF, Literal, URIRef
|
|
17
27
|
|
|
18
|
-
from cognite.neat._constants import DEFAULT_NAMESPACE
|
|
19
|
-
from cognite.neat._shared import
|
|
28
|
+
from cognite.neat._constants import CDF_NAMESPACE, DEFAULT_NAMESPACE
|
|
29
|
+
from cognite.neat._rules._shared import JustRules, ReadRules, VerifiedRules
|
|
30
|
+
from cognite.neat._shared import FrozenNeatObject, NeatList, Triple
|
|
20
31
|
|
|
21
32
|
|
|
22
33
|
@dataclass(frozen=True)
|
|
@@ -24,61 +35,137 @@ class Agent:
|
|
|
24
35
|
id_: URIRef = DEFAULT_NAMESPACE.agent
|
|
25
36
|
acted_on_behalf_of: str = "NEAT"
|
|
26
37
|
|
|
27
|
-
def as_triples(self):
|
|
38
|
+
def as_triples(self) -> list[Triple]:
|
|
28
39
|
return [
|
|
29
40
|
(self.id_, RDF.type, PROV[type(self).__name__]),
|
|
30
|
-
(self.id_, PROV.actedOnBehalfOf, self.acted_on_behalf_of),
|
|
41
|
+
(self.id_, PROV.actedOnBehalfOf, Literal(self.acted_on_behalf_of)),
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
CDF_AGENT = Agent(acted_on_behalf_of="UNKNOWN", id_=CDF_NAMESPACE["agent"])
|
|
46
|
+
NEAT_AGENT = Agent(acted_on_behalf_of="UNKNOWN", id_=DEFAULT_NAMESPACE["agent"])
|
|
47
|
+
UNKNOWN_AGENT = Agent(acted_on_behalf_of="UNKNOWN", id_=DEFAULT_NAMESPACE["unknown-agent"])
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass(frozen=True)
|
|
51
|
+
class Entity:
|
|
52
|
+
was_attributed_to: Agent
|
|
53
|
+
was_generated_by: Optional["Activity"] = field(default=None, repr=False)
|
|
54
|
+
id_: URIRef = DEFAULT_NAMESPACE["graph-store"]
|
|
55
|
+
|
|
56
|
+
def as_triples(self) -> list[Triple]:
|
|
57
|
+
output: list[tuple[URIRef, URIRef, Literal | URIRef]] = [
|
|
58
|
+
(self.id_, RDF.type, PROV[type(self).__name__]),
|
|
59
|
+
(self.id_, PROV.wasAttributedTo, self.was_attributed_to.id_),
|
|
31
60
|
]
|
|
32
61
|
|
|
62
|
+
if self.was_generated_by:
|
|
63
|
+
output.append(
|
|
64
|
+
(
|
|
65
|
+
self.id_,
|
|
66
|
+
PROV.wasGeneratedBy,
|
|
67
|
+
self.was_generated_by.id_,
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
return output
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def from_data_model_id(cls, data_model_id: DataModelIdentifier) -> "Entity":
|
|
75
|
+
data_model_id = DataModelId.load(data_model_id)
|
|
76
|
+
|
|
77
|
+
return cls(
|
|
78
|
+
was_attributed_to=CDF_AGENT,
|
|
79
|
+
id_=CDF_NAMESPACE[
|
|
80
|
+
f"dms/data-model/{data_model_id.space}/{data_model_id.external_id}/{data_model_id.version}"
|
|
81
|
+
],
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def from_rules(
|
|
86
|
+
cls,
|
|
87
|
+
rules: ReadRules | JustRules | VerifiedRules,
|
|
88
|
+
agent: Agent | None = None,
|
|
89
|
+
activity: "Activity | None" = None,
|
|
90
|
+
) -> "Entity":
|
|
91
|
+
agent = agent or UNKNOWN_AGENT
|
|
92
|
+
if isinstance(rules, VerifiedRules):
|
|
93
|
+
return cls(
|
|
94
|
+
was_attributed_to=agent,
|
|
95
|
+
was_generated_by=activity,
|
|
96
|
+
id_=rules.id_,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
elif isinstance(rules, ReadRules | JustRules) and rules.rules is not None:
|
|
100
|
+
return cls(
|
|
101
|
+
was_attributed_to=agent,
|
|
102
|
+
was_generated_by=activity,
|
|
103
|
+
id_=rules.rules.id_,
|
|
104
|
+
)
|
|
105
|
+
else:
|
|
106
|
+
return cls(
|
|
107
|
+
was_attributed_to=agent,
|
|
108
|
+
was_generated_by=activity,
|
|
109
|
+
id_=DEFAULT_NAMESPACE["unknown-entity"],
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
@classmethod
|
|
113
|
+
def new_unknown_entity(cls) -> "Entity":
|
|
114
|
+
return cls(
|
|
115
|
+
was_attributed_to=UNKNOWN_AGENT,
|
|
116
|
+
id_=DEFAULT_NAMESPACE[f"unknown-entity/{uuid.uuid4()}"],
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
INSTANCES_ENTITY = Entity(was_attributed_to=NEAT_AGENT, id_=CDF_NAMESPACE["instances"])
|
|
121
|
+
|
|
33
122
|
|
|
34
123
|
@dataclass(frozen=True)
|
|
35
124
|
class Activity:
|
|
36
125
|
was_associated_with: Agent
|
|
37
126
|
ended_at_time: datetime
|
|
38
127
|
started_at_time: datetime
|
|
39
|
-
used: str
|
|
40
|
-
id_: URIRef = DEFAULT_NAMESPACE[f"activity-{uuid.uuid4()}"]
|
|
128
|
+
used: str | Entity | None = None
|
|
129
|
+
id_: URIRef = field(default_factory=lambda: DEFAULT_NAMESPACE[f"activity-{uuid.uuid4()}"])
|
|
41
130
|
|
|
42
|
-
def as_triples(self):
|
|
43
|
-
|
|
131
|
+
def as_triples(self) -> list[Triple]:
|
|
132
|
+
output: list[tuple[URIRef, URIRef, Literal | URIRef]] = [
|
|
44
133
|
(self.id_, RDF.type, PROV[type(self).__name__]),
|
|
45
134
|
(self.id_, PROV.wasAssociatedWith, self.was_associated_with.id_),
|
|
46
135
|
(self.id_, PROV.startedAtTime, Literal(self.started_at_time)),
|
|
47
136
|
(self.id_, PROV.endedAtTime, Literal(self.ended_at_time)),
|
|
48
|
-
(self.id_, PROV.used, self.used),
|
|
49
137
|
]
|
|
50
138
|
|
|
139
|
+
if self.used:
|
|
140
|
+
output.append(
|
|
141
|
+
(
|
|
142
|
+
self.id_,
|
|
143
|
+
PROV.used,
|
|
144
|
+
(self.used.id_ if isinstance(self.used, Entity) else Literal(self.used)),
|
|
145
|
+
)
|
|
146
|
+
)
|
|
51
147
|
|
|
52
|
-
|
|
53
|
-
class Entity:
|
|
54
|
-
was_generated_by: Activity
|
|
55
|
-
was_attributed_to: Agent
|
|
56
|
-
id_: URIRef = DEFAULT_NAMESPACE["graph-store"]
|
|
57
|
-
|
|
58
|
-
def as_triples(self):
|
|
59
|
-
return [
|
|
60
|
-
(self.id_, RDF.type, PROV[type(self).__name__]),
|
|
61
|
-
(self.id_, PROV.wasGeneratedBy, self.was_generated_by.id_),
|
|
62
|
-
(self.id_, PROV.wasAttributedTo, self.was_attributed_to.id_),
|
|
63
|
-
]
|
|
148
|
+
return output
|
|
64
149
|
|
|
65
150
|
|
|
66
151
|
@dataclass(frozen=True)
|
|
67
152
|
class Change(FrozenNeatObject):
|
|
68
153
|
agent: Agent
|
|
69
154
|
activity: Activity
|
|
70
|
-
|
|
155
|
+
target_entity: Entity
|
|
71
156
|
description: str
|
|
72
|
-
|
|
73
|
-
addition: list[tuple[URIRef, URIRef, URIRef | Literal]] | None = None
|
|
74
|
-
# triples that were removed from the graph store
|
|
75
|
-
subtraction: list[tuple[URIRef, URIRef, URIRef | Literal]] | None = None
|
|
157
|
+
source_entity: Entity = field(default_factory=Entity.new_unknown_entity)
|
|
76
158
|
|
|
77
|
-
def as_triples(self):
|
|
78
|
-
return
|
|
159
|
+
def as_triples(self) -> list[Triple]:
|
|
160
|
+
return (
|
|
161
|
+
self.source_entity.as_triples()
|
|
162
|
+
+ self.agent.as_triples()
|
|
163
|
+
+ self.activity.as_triples()
|
|
164
|
+
+ self.target_entity.as_triples() # type: ignore[operator]
|
|
165
|
+
)
|
|
79
166
|
|
|
80
167
|
@classmethod
|
|
81
|
-
def record(cls, activity: str, start: datetime, end: datetime, description: str):
|
|
168
|
+
def record(cls, activity: str, start: datetime, end: datetime, description: str) -> "Change":
|
|
82
169
|
"""User friendly method to record a change that occurred in the graph store."""
|
|
83
170
|
agent = Agent()
|
|
84
171
|
activity = Activity(
|
|
@@ -87,14 +174,48 @@ class Change(FrozenNeatObject):
|
|
|
87
174
|
started_at_time=start,
|
|
88
175
|
ended_at_time=end,
|
|
89
176
|
)
|
|
90
|
-
|
|
91
|
-
return cls(
|
|
177
|
+
target_entity = Entity(was_generated_by=activity, was_attributed_to=agent)
|
|
178
|
+
return cls(
|
|
179
|
+
agent=agent,
|
|
180
|
+
activity=activity,
|
|
181
|
+
target_entity=target_entity,
|
|
182
|
+
description=description,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
@classmethod
|
|
186
|
+
def from_rules_activity(
|
|
187
|
+
cls,
|
|
188
|
+
rules: ReadRules | JustRules | VerifiedRules,
|
|
189
|
+
agent: Agent,
|
|
190
|
+
start: datetime,
|
|
191
|
+
end: datetime,
|
|
192
|
+
description: str,
|
|
193
|
+
source_entity: Entity | None = None,
|
|
194
|
+
) -> "Change":
|
|
195
|
+
source_entity = source_entity or Entity.new_unknown_entity()
|
|
196
|
+
activity = Activity(
|
|
197
|
+
started_at_time=start,
|
|
198
|
+
ended_at_time=end,
|
|
199
|
+
was_associated_with=agent,
|
|
200
|
+
used=source_entity,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
target_entity = Entity.from_rules(rules, agent, activity)
|
|
204
|
+
|
|
205
|
+
return cls(
|
|
206
|
+
agent=agent,
|
|
207
|
+
activity=activity,
|
|
208
|
+
target_entity=target_entity,
|
|
209
|
+
description=description,
|
|
210
|
+
source_entity=source_entity,
|
|
211
|
+
)
|
|
92
212
|
|
|
93
213
|
def dump(self, aggregate: bool = True) -> dict[str, str]:
|
|
94
214
|
return {
|
|
215
|
+
"Source Entity": self.source_entity.id_,
|
|
95
216
|
"Agent": self.agent.id_,
|
|
96
217
|
"Activity": self.activity.id_,
|
|
97
|
-
"Entity": self.
|
|
218
|
+
"Target Entity": self.target_entity.id_,
|
|
98
219
|
"Description": self.description,
|
|
99
220
|
}
|
|
100
221
|
|
|
@@ -124,3 +245,25 @@ class Provenance(NeatList[Change]):
|
|
|
124
245
|
text += "</ul>"
|
|
125
246
|
|
|
126
247
|
return text
|
|
248
|
+
|
|
249
|
+
def activity(self, id_: URIRef) -> Activity | None:
|
|
250
|
+
return next((change.activity for change in self if change.activity.id_ == id_), None)
|
|
251
|
+
|
|
252
|
+
def agent(self, id_: URIRef) -> Agent | None:
|
|
253
|
+
return next((change.agent for change in self if change.agent.id_ == id_), None)
|
|
254
|
+
|
|
255
|
+
def target_entity(self, id_: URIRef) -> Entity | None:
|
|
256
|
+
return next(
|
|
257
|
+
(change.target_entity for change in self if change.target_entity.id_ == id_),
|
|
258
|
+
None,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
def source_entity(self, id_: URIRef) -> Entity | None:
|
|
262
|
+
return next(
|
|
263
|
+
(change.source_entity for change in self if change.source_entity.id_ == id_),
|
|
264
|
+
None,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
def as_triples(self) -> Iterable[Triple]:
|
|
268
|
+
for change in self:
|
|
269
|
+
yield from change.as_triples()
|
cognite/neat/_utils/rdf_.py
CHANGED
|
@@ -4,8 +4,8 @@ from typing import Any, Literal, TypeAlias, overload
|
|
|
4
4
|
|
|
5
5
|
from cognite.client.utils.useful_types import SequenceNotStr
|
|
6
6
|
from pydantic import HttpUrl, TypeAdapter, ValidationError
|
|
7
|
+
from rdflib import Graph, Namespace, URIRef
|
|
7
8
|
from rdflib import Literal as RdfLiteral
|
|
8
|
-
from rdflib import Namespace, URIRef
|
|
9
9
|
|
|
10
10
|
Triple: TypeAlias = tuple[URIRef, URIRef, RdfLiteral | URIRef]
|
|
11
11
|
|
|
@@ -171,3 +171,36 @@ def get_inheritance_path(child: Any, child_parent: dict[Any, list[Any]]) -> list
|
|
|
171
171
|
for parent in child_parent[child]:
|
|
172
172
|
path.extend(get_inheritance_path(parent, child_parent))
|
|
173
173
|
return path
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def add_triples_in_batch(graph: Graph, triples: Iterable[Triple], batch_size: int = 10_000):
|
|
177
|
+
"""Adds triples to the graph store in batches.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
triples: list of triples to be added to the graph store
|
|
181
|
+
batch_size: Batch size of triples per commit, by default 10_000
|
|
182
|
+
verbose: Verbose mode, by default False
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
commit_counter = 0
|
|
186
|
+
number_of_written_triples = 0
|
|
187
|
+
|
|
188
|
+
def check_commit(force_commit: bool = False):
|
|
189
|
+
"""Commit nodes to the graph if batch counter is reached or if force_commit is True"""
|
|
190
|
+
nonlocal commit_counter
|
|
191
|
+
nonlocal number_of_written_triples
|
|
192
|
+
if force_commit:
|
|
193
|
+
number_of_written_triples += commit_counter
|
|
194
|
+
graph.commit()
|
|
195
|
+
return
|
|
196
|
+
commit_counter += 1
|
|
197
|
+
if commit_counter >= batch_size:
|
|
198
|
+
number_of_written_triples += commit_counter
|
|
199
|
+
graph.commit()
|
|
200
|
+
commit_counter = 0
|
|
201
|
+
|
|
202
|
+
for triple in triples:
|
|
203
|
+
graph.add(triple)
|
|
204
|
+
check_commit()
|
|
205
|
+
|
|
206
|
+
check_commit(force_commit=True)
|