cognite-neat 0.98.0__py3-none-any.whl → 0.99.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_client/__init__.py +4 -0
- cognite/neat/_client/_api/data_modeling_loaders.py +512 -0
- cognite/neat/_client/_api/schema.py +50 -0
- cognite/neat/_client/_api_client.py +17 -0
- cognite/neat/_client/data_classes/__init__.py +0 -0
- cognite/neat/{_utils/cdf/data_classes.py → _client/data_classes/data_modeling.py} +8 -135
- cognite/neat/{_rules/models/dms/_schema.py → _client/data_classes/schema.py} +21 -281
- cognite/neat/_graph/_shared.py +14 -15
- cognite/neat/_graph/extractors/_classic_cdf/_assets.py +14 -154
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +154 -7
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +23 -12
- cognite/neat/_graph/extractors/_classic_cdf/_data_sets.py +17 -92
- cognite/neat/_graph/extractors/_classic_cdf/_events.py +13 -162
- cognite/neat/_graph/extractors/_classic_cdf/_files.py +15 -179
- cognite/neat/_graph/extractors/_classic_cdf/_labels.py +32 -100
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +27 -178
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +14 -139
- cognite/neat/_graph/extractors/_classic_cdf/_timeseries.py +15 -173
- cognite/neat/_graph/extractors/_rdf_file.py +6 -7
- cognite/neat/_graph/queries/_base.py +17 -1
- cognite/neat/_graph/transformers/_classic_cdf.py +50 -134
- cognite/neat/_graph/transformers/_prune_graph.py +1 -1
- cognite/neat/_graph/transformers/_rdfpath.py +1 -1
- cognite/neat/_issues/warnings/__init__.py +6 -0
- cognite/neat/_issues/warnings/_external.py +8 -0
- cognite/neat/_issues/warnings/_properties.py +16 -0
- cognite/neat/_rules/_constants.py +7 -6
- cognite/neat/_rules/analysis/_base.py +8 -4
- cognite/neat/_rules/exporters/_base.py +3 -4
- cognite/neat/_rules/exporters/_rules2dms.py +29 -40
- cognite/neat/_rules/importers/_dms2rules.py +4 -5
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +25 -33
- cognite/neat/_rules/models/__init__.py +1 -1
- cognite/neat/_rules/models/_base_rules.py +22 -12
- cognite/neat/_rules/models/dms/__init__.py +2 -2
- cognite/neat/_rules/models/dms/_exporter.py +15 -20
- cognite/neat/_rules/models/dms/_rules.py +48 -3
- cognite/neat/_rules/models/dms/_rules_input.py +52 -8
- cognite/neat/_rules/models/dms/_validation.py +10 -5
- cognite/neat/_rules/models/entities/_single_value.py +32 -4
- cognite/neat/_rules/models/information/_rules.py +0 -8
- cognite/neat/_rules/models/mapping/__init__.py +2 -3
- cognite/neat/_rules/models/mapping/_classic2core.py +36 -146
- cognite/neat/_rules/models/mapping/_classic2core.yaml +339 -0
- cognite/neat/_rules/transformers/__init__.py +2 -2
- cognite/neat/_rules/transformers/_converters.py +110 -11
- cognite/neat/_rules/transformers/_mapping.py +105 -30
- cognite/neat/_rules/transformers/_verification.py +5 -2
- cognite/neat/_session/_base.py +49 -8
- cognite/neat/_session/_drop.py +35 -0
- cognite/neat/_session/_inspect.py +17 -5
- cognite/neat/_session/_mapping.py +39 -0
- cognite/neat/_session/_prepare.py +218 -23
- cognite/neat/_session/_read.py +49 -12
- cognite/neat/_session/_to.py +3 -3
- cognite/neat/_store/_base.py +27 -24
- cognite/neat/_utils/rdf_.py +28 -1
- cognite/neat/_version.py +1 -1
- cognite/neat/_workflows/steps/lib/current/rules_exporter.py +8 -3
- cognite/neat/_workflows/steps/lib/current/rules_importer.py +4 -1
- cognite/neat/_workflows/steps/lib/current/rules_validator.py +3 -2
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/METADATA +3 -3
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/RECORD +67 -64
- cognite/neat/_rules/models/mapping/_base.py +0 -131
- cognite/neat/_utils/cdf/loaders/__init__.py +0 -25
- cognite/neat/_utils/cdf/loaders/_base.py +0 -54
- cognite/neat/_utils/cdf/loaders/_data_modeling.py +0 -339
- cognite/neat/_utils/cdf/loaders/_ingestion.py +0 -167
- /cognite/neat/{_utils/cdf → _client/_api}/__init__.py +0 -0
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,26 +1,44 @@
|
|
|
1
|
+
import copy
|
|
1
2
|
from collections.abc import Collection
|
|
2
3
|
from datetime import datetime, timezone
|
|
3
|
-
from typing import Literal
|
|
4
|
+
from typing import Literal, cast
|
|
4
5
|
|
|
5
6
|
from cognite.client.data_classes.data_modeling import DataModelIdentifier
|
|
6
7
|
from rdflib import URIRef
|
|
7
8
|
|
|
9
|
+
from cognite.neat._client import NeatClient
|
|
10
|
+
from cognite.neat._constants import DEFAULT_NAMESPACE
|
|
11
|
+
from cognite.neat._graph.transformers import RelationshipToSchemaTransformer
|
|
8
12
|
from cognite.neat._graph.transformers._rdfpath import MakeConnectionOnExactMatch
|
|
9
|
-
from cognite.neat._rules._shared import ReadRules
|
|
13
|
+
from cognite.neat._rules._shared import InputRules, ReadRules
|
|
14
|
+
from cognite.neat._rules.importers import DMSImporter
|
|
15
|
+
from cognite.neat._rules.models import DMSRules
|
|
10
16
|
from cognite.neat._rules.models.information._rules_input import InformationInputRules
|
|
11
|
-
from cognite.neat._rules.transformers import
|
|
17
|
+
from cognite.neat._rules.transformers import (
|
|
18
|
+
PrefixEntities,
|
|
19
|
+
ReduceCogniteModel,
|
|
20
|
+
ToCompliantEntities,
|
|
21
|
+
ToExtension,
|
|
22
|
+
VerifyDMSRules,
|
|
23
|
+
)
|
|
24
|
+
from cognite.neat._store._provenance import Agent as ProvenanceAgent
|
|
12
25
|
from cognite.neat._store._provenance import Change
|
|
13
26
|
|
|
14
27
|
from ._state import SessionState
|
|
15
28
|
from .exceptions import NeatSessionError, session_class_wrapper
|
|
16
29
|
|
|
30
|
+
try:
|
|
31
|
+
from rich import print
|
|
32
|
+
except ImportError:
|
|
33
|
+
...
|
|
34
|
+
|
|
17
35
|
|
|
18
36
|
@session_class_wrapper
|
|
19
37
|
class PrepareAPI:
|
|
20
|
-
def __init__(self, state: SessionState, verbose: bool) -> None:
|
|
38
|
+
def __init__(self, client: NeatClient | None, state: SessionState, verbose: bool) -> None:
|
|
21
39
|
self._state = state
|
|
22
40
|
self._verbose = verbose
|
|
23
|
-
self.data_model = DataModelPrepareAPI(state, verbose)
|
|
41
|
+
self.data_model = DataModelPrepareAPI(client, state, verbose)
|
|
24
42
|
self.instances = InstancePrepareAPI(state, verbose)
|
|
25
43
|
|
|
26
44
|
|
|
@@ -94,34 +112,77 @@ class InstancePrepareAPI:
|
|
|
94
112
|
raise NeatSessionError(f"Property {property_} is not defined for type {type_}. Cannot make connection")
|
|
95
113
|
return type_uri[0], property_uri[0]
|
|
96
114
|
|
|
115
|
+
def relationships_as_connections(self, limit: int = 1) -> None:
|
|
116
|
+
"""This assumes that you have read a classic CDF knowledge graph including relationships.
|
|
117
|
+
|
|
118
|
+
This transformer analyzes the relationships in the graph and modifies them to be part of the schema
|
|
119
|
+
for Assets, Events, Files, Sequences, and TimeSeries. Relationships without any properties
|
|
120
|
+
are replaced by a simple relationship between the source and target nodes. Relationships with
|
|
121
|
+
properties are replaced by a schema that contains the properties as attributes.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
limit: The minimum number of relationships that need to be present for it
|
|
125
|
+
to be converted into a schema. Default is 1.
|
|
126
|
+
|
|
127
|
+
"""
|
|
128
|
+
transformer = RelationshipToSchemaTransformer(limit=limit)
|
|
129
|
+
self._state.instances.store.transform(transformer)
|
|
130
|
+
|
|
97
131
|
|
|
98
132
|
@session_class_wrapper
|
|
99
133
|
class DataModelPrepareAPI:
|
|
100
|
-
def __init__(self, state: SessionState, verbose: bool) -> None:
|
|
134
|
+
def __init__(self, client: NeatClient | None, state: SessionState, verbose: bool) -> None:
|
|
135
|
+
self._client = client
|
|
101
136
|
self._state = state
|
|
102
137
|
self._verbose = verbose
|
|
103
138
|
|
|
104
139
|
def cdf_compliant_external_ids(self) -> None:
|
|
105
140
|
"""Convert data model component external ids to CDF compliant entities."""
|
|
106
|
-
|
|
107
|
-
|
|
141
|
+
source_id, rules = self._state.data_model.last_info_unverified_rule
|
|
142
|
+
|
|
143
|
+
start = datetime.now(timezone.utc)
|
|
144
|
+
transformer = ToCompliantEntities()
|
|
145
|
+
output: ReadRules[InformationInputRules] = transformer.transform(rules)
|
|
146
|
+
end = datetime.now(timezone.utc)
|
|
147
|
+
|
|
148
|
+
change = Change.from_rules_activity(
|
|
149
|
+
output,
|
|
150
|
+
transformer.agent,
|
|
151
|
+
start,
|
|
152
|
+
end,
|
|
153
|
+
"Converted external ids to CDF compliant entities",
|
|
154
|
+
self._state.data_model.provenance.source_entity(source_id)
|
|
155
|
+
or self._state.data_model.provenance.target_entity(source_id),
|
|
156
|
+
)
|
|
108
157
|
|
|
109
|
-
|
|
110
|
-
transformer = ToCompliantEntities()
|
|
111
|
-
output: ReadRules[InformationInputRules] = transformer.transform(rules)
|
|
112
|
-
end = datetime.now(timezone.utc)
|
|
158
|
+
self._state.data_model.write(output, change)
|
|
113
159
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
transformer.agent,
|
|
117
|
-
start,
|
|
118
|
-
end,
|
|
119
|
-
"Converted external ids to CDF compliant entities",
|
|
120
|
-
self._state.data_model.provenance.source_entity(source_id)
|
|
121
|
-
or self._state.data_model.provenance.target_entity(source_id),
|
|
122
|
-
)
|
|
160
|
+
def prefix(self, prefix: str) -> None:
|
|
161
|
+
"""Prefix all views in the data model with the given prefix.
|
|
123
162
|
|
|
124
|
-
|
|
163
|
+
Args:
|
|
164
|
+
prefix: The prefix to add to the views in the data model.
|
|
165
|
+
|
|
166
|
+
"""
|
|
167
|
+
source_id, rules = self._state.data_model.last_unverified_rule
|
|
168
|
+
|
|
169
|
+
start = datetime.now(timezone.utc)
|
|
170
|
+
transformer = PrefixEntities(prefix)
|
|
171
|
+
new_rules = cast(InputRules, copy.deepcopy(rules.get_rules()))
|
|
172
|
+
output = transformer.transform(new_rules)
|
|
173
|
+
end = datetime.now(timezone.utc)
|
|
174
|
+
|
|
175
|
+
change = Change.from_rules_activity(
|
|
176
|
+
output,
|
|
177
|
+
transformer.agent,
|
|
178
|
+
start,
|
|
179
|
+
end,
|
|
180
|
+
"Added prefix to the data model views",
|
|
181
|
+
self._state.data_model.provenance.source_entity(source_id)
|
|
182
|
+
or self._state.data_model.provenance.target_entity(source_id),
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
self._state.data_model.write(output, change)
|
|
125
186
|
|
|
126
187
|
def to_enterprise(
|
|
127
188
|
self,
|
|
@@ -185,7 +246,7 @@ class DataModelPrepareAPI:
|
|
|
185
246
|
data_model_id: DataModelIdentifier,
|
|
186
247
|
org_name: str = "My",
|
|
187
248
|
mode: Literal["read", "write"] = "read",
|
|
188
|
-
dummy_property: str = "
|
|
249
|
+
dummy_property: str = "GUID",
|
|
189
250
|
) -> None:
|
|
190
251
|
"""Uses the current data model as a basis to create solution data model
|
|
191
252
|
|
|
@@ -235,6 +296,81 @@ class DataModelPrepareAPI:
|
|
|
235
296
|
|
|
236
297
|
self._state.data_model.write(output.rules, change)
|
|
237
298
|
|
|
299
|
+
def to_data_product(
|
|
300
|
+
self,
|
|
301
|
+
data_model_id: DataModelIdentifier,
|
|
302
|
+
org_name: str = "",
|
|
303
|
+
include: Literal["same-space", "all"] = "same-space",
|
|
304
|
+
) -> None:
|
|
305
|
+
"""Uses the current data model as a basis to create data product data model.
|
|
306
|
+
|
|
307
|
+
A data product model is a data model that ONLY maps to containers and do not use implements. This is
|
|
308
|
+
typically used for defining the data in a data product.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
data_model_id: The data product data model id that is being created.
|
|
312
|
+
org_name: Organization name to use for the views in the new data model.
|
|
313
|
+
include: The views to include in the data product data model. Can be either "same-space" or "all".
|
|
314
|
+
If you set same-space, only the views in the same space as the data model will be included.
|
|
315
|
+
"""
|
|
316
|
+
source_id, rules = self._state.data_model.last_verified_dms_rules
|
|
317
|
+
|
|
318
|
+
dms_ref: DMSRules | None = None
|
|
319
|
+
view_ids, container_ids = rules.imported_views_and_containers_ids(include_model_views_with_no_properties=True)
|
|
320
|
+
if view_ids or container_ids:
|
|
321
|
+
if self._client is None:
|
|
322
|
+
raise NeatSessionError(
|
|
323
|
+
"No client provided. You are referencing unknown views and containers in your data model, "
|
|
324
|
+
"NEAT needs a client to lookup the definitions. "
|
|
325
|
+
"Please set the client in the session, NeatSession(client=client)."
|
|
326
|
+
)
|
|
327
|
+
schema = self._client.schema.retrieve(list(view_ids), list(container_ids))
|
|
328
|
+
|
|
329
|
+
importer = DMSImporter(schema)
|
|
330
|
+
reference_rules = importer.to_rules().rules
|
|
331
|
+
if reference_rules is not None:
|
|
332
|
+
imported = VerifyDMSRules("continue").transform(reference_rules)
|
|
333
|
+
if dms_ref := imported.rules:
|
|
334
|
+
rules = rules.model_copy(deep=True)
|
|
335
|
+
if rules.containers is None:
|
|
336
|
+
rules.containers = dms_ref.containers
|
|
337
|
+
else:
|
|
338
|
+
existing_containers = {c.container for c in rules.containers}
|
|
339
|
+
rules.containers.extend(
|
|
340
|
+
[c for c in dms_ref.containers or [] if c.container not in existing_containers]
|
|
341
|
+
)
|
|
342
|
+
existing_views = {v.view for v in rules.views}
|
|
343
|
+
rules.views.extend([v for v in dms_ref.views if v.view not in existing_views])
|
|
344
|
+
existing_properties = {(p.view, p.view_property) for p in rules.properties}
|
|
345
|
+
rules.properties.extend(
|
|
346
|
+
[p for p in dms_ref.properties if (p.view, p.view_property) not in existing_properties]
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
start = datetime.now(timezone.utc)
|
|
350
|
+
transformer = ToExtension(
|
|
351
|
+
new_model_id=data_model_id,
|
|
352
|
+
org_name=org_name,
|
|
353
|
+
type_="data_product",
|
|
354
|
+
include=include,
|
|
355
|
+
)
|
|
356
|
+
output = transformer.transform(rules)
|
|
357
|
+
end = datetime.now(timezone.utc)
|
|
358
|
+
|
|
359
|
+
change = Change.from_rules_activity(
|
|
360
|
+
output,
|
|
361
|
+
transformer.agent,
|
|
362
|
+
start,
|
|
363
|
+
end,
|
|
364
|
+
(
|
|
365
|
+
f"Prepared data model {data_model_id} to be data product model "
|
|
366
|
+
f"on top of {rules.metadata.as_data_model_id()}"
|
|
367
|
+
),
|
|
368
|
+
self._state.data_model.provenance.source_entity(source_id)
|
|
369
|
+
or self._state.data_model.provenance.target_entity(source_id),
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
self._state.data_model.write(output.rules, change)
|
|
373
|
+
|
|
238
374
|
def reduce(self, drop: Collection[Literal["3D", "Annotation", "BaseViews"] | str]) -> None:
|
|
239
375
|
"""This is a special method that allow you to drop parts of the data model.
|
|
240
376
|
This only applies to Cognite Data Models.
|
|
@@ -267,3 +403,62 @@ class DataModelPrepareAPI:
|
|
|
267
403
|
)
|
|
268
404
|
|
|
269
405
|
self._state.data_model.write(output.rules, change)
|
|
406
|
+
|
|
407
|
+
def include_referenced(self) -> None:
|
|
408
|
+
"""Include referenced views and containers in the data model."""
|
|
409
|
+
start = datetime.now(timezone.utc)
|
|
410
|
+
|
|
411
|
+
source_id, rules = self._state.data_model.last_verified_dms_rules
|
|
412
|
+
view_ids, container_ids = rules.imported_views_and_containers_ids(include_model_views_with_no_properties=True)
|
|
413
|
+
if not (view_ids or container_ids):
|
|
414
|
+
print(
|
|
415
|
+
f"Data model {rules.metadata.as_data_model_id()} does not have any referenced views or containers."
|
|
416
|
+
f"that is not already included in the data model."
|
|
417
|
+
)
|
|
418
|
+
return
|
|
419
|
+
if self._client is None:
|
|
420
|
+
raise NeatSessionError(
|
|
421
|
+
"No client provided. You are referencing unknown views and containers in your data model, "
|
|
422
|
+
"NEAT needs a client to lookup the definitions. "
|
|
423
|
+
"Please set the client in the session, NeatSession(client=client)."
|
|
424
|
+
)
|
|
425
|
+
schema = self._client.schema.retrieve(list(view_ids), list(container_ids))
|
|
426
|
+
copy_ = rules.model_copy(deep=True)
|
|
427
|
+
copy_.metadata.version = f"{rules.metadata.version}_completed"
|
|
428
|
+
importer = DMSImporter(schema)
|
|
429
|
+
imported = importer.to_rules()
|
|
430
|
+
if imported.rules is None:
|
|
431
|
+
self._state.data_model.issue_lists.append(imported.issues)
|
|
432
|
+
raise NeatSessionError(
|
|
433
|
+
"Could not import the referenced views and containers. "
|
|
434
|
+
"See `neat.inspect.issues()` for more information."
|
|
435
|
+
)
|
|
436
|
+
verified = VerifyDMSRules("continue", post_validate=False).transform(imported.rules)
|
|
437
|
+
if verified.rules is None:
|
|
438
|
+
self._state.data_model.issue_lists.append(verified.issues)
|
|
439
|
+
raise NeatSessionError(
|
|
440
|
+
"Could not verify the referenced views and containers. "
|
|
441
|
+
"See `neat.inspect.issues()` for more information."
|
|
442
|
+
)
|
|
443
|
+
if copy_.containers is None:
|
|
444
|
+
copy_.containers = verified.rules.containers
|
|
445
|
+
else:
|
|
446
|
+
existing_containers = {c.container for c in copy_.containers}
|
|
447
|
+
copy_.containers.extend(
|
|
448
|
+
[c for c in verified.rules.containers or [] if c.container not in existing_containers]
|
|
449
|
+
)
|
|
450
|
+
existing_views = {v.view for v in copy_.views}
|
|
451
|
+
copy_.views.extend([v for v in verified.rules.views if v.view not in existing_views])
|
|
452
|
+
end = datetime.now(timezone.utc)
|
|
453
|
+
|
|
454
|
+
change = Change.from_rules_activity(
|
|
455
|
+
copy_,
|
|
456
|
+
ProvenanceAgent(id_=DEFAULT_NAMESPACE["agent/"]),
|
|
457
|
+
start,
|
|
458
|
+
end,
|
|
459
|
+
(f"Included referenced views and containers in the data model {rules.metadata.as_data_model_id()}"),
|
|
460
|
+
self._state.data_model.provenance.source_entity(source_id)
|
|
461
|
+
or self._state.data_model.provenance.target_entity(source_id),
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
self._state.data_model.write(copy_, change)
|
cognite/neat/_session/_read.py
CHANGED
|
@@ -3,9 +3,9 @@ from datetime import datetime, timezone
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Any, Literal
|
|
5
5
|
|
|
6
|
-
from cognite.client import CogniteClient
|
|
7
6
|
from cognite.client.data_classes.data_modeling import DataModelId, DataModelIdentifier
|
|
8
7
|
|
|
8
|
+
from cognite.neat._client import NeatClient
|
|
9
9
|
from cognite.neat._constants import COGNITE_SPACES
|
|
10
10
|
from cognite.neat._graph import examples as instances_examples
|
|
11
11
|
from cognite.neat._graph import extractors
|
|
@@ -27,7 +27,7 @@ from .exceptions import NeatSessionError, session_class_wrapper
|
|
|
27
27
|
|
|
28
28
|
@session_class_wrapper
|
|
29
29
|
class ReadAPI:
|
|
30
|
-
def __init__(self, state: SessionState, client:
|
|
30
|
+
def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
|
|
31
31
|
self._state = state
|
|
32
32
|
self._verbose = verbose
|
|
33
33
|
self.cdf = CDFReadAPI(state, client, verbose)
|
|
@@ -39,7 +39,7 @@ class ReadAPI:
|
|
|
39
39
|
|
|
40
40
|
@session_class_wrapper
|
|
41
41
|
class BaseReadAPI:
|
|
42
|
-
def __init__(self, state: SessionState, client:
|
|
42
|
+
def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
|
|
43
43
|
self._state = state
|
|
44
44
|
self._verbose = verbose
|
|
45
45
|
self._client = client
|
|
@@ -67,12 +67,12 @@ class BaseReadAPI:
|
|
|
67
67
|
|
|
68
68
|
@session_class_wrapper
|
|
69
69
|
class CDFReadAPI(BaseReadAPI):
|
|
70
|
-
def __init__(self, state: SessionState, client:
|
|
70
|
+
def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
|
|
71
71
|
super().__init__(state, client, verbose)
|
|
72
72
|
self.classic = CDFClassicAPI(state, client, verbose)
|
|
73
73
|
|
|
74
74
|
@property
|
|
75
|
-
def _get_client(self) ->
|
|
75
|
+
def _get_client(self) -> NeatClient:
|
|
76
76
|
if self._client is None:
|
|
77
77
|
raise NeatValueError("No client provided. Please provide a client to read a data model.")
|
|
78
78
|
return self._client
|
|
@@ -113,16 +113,53 @@ class CDFReadAPI(BaseReadAPI):
|
|
|
113
113
|
@session_class_wrapper
|
|
114
114
|
class CDFClassicAPI(BaseReadAPI):
|
|
115
115
|
@property
|
|
116
|
-
def _get_client(self) ->
|
|
116
|
+
def _get_client(self) -> NeatClient:
|
|
117
117
|
if self._client is None:
|
|
118
118
|
raise ValueError("No client provided. Please provide a client to read a data model.")
|
|
119
119
|
return self._client
|
|
120
120
|
|
|
121
|
-
def
|
|
122
|
-
|
|
121
|
+
def graph(self, root_asset_external_id: str) -> None:
|
|
122
|
+
"""Reads the classic knowledge graph from CDF.
|
|
123
|
+
|
|
124
|
+
The Classic Graph consists of the following core resource type.
|
|
125
|
+
|
|
126
|
+
Classic Node CDF Resources:
|
|
127
|
+
- Assets
|
|
128
|
+
- TimeSeries
|
|
129
|
+
- Sequences
|
|
130
|
+
- Events
|
|
131
|
+
- Files
|
|
132
|
+
|
|
133
|
+
All the classic node CDF resources can have one or more connections to one or more assets. This
|
|
134
|
+
will match a direct relationship in the data modeling of CDF.
|
|
135
|
+
|
|
136
|
+
In addition, you have relationships between the classic node CDF resources. This matches an edge
|
|
137
|
+
in the data modeling of CDF.
|
|
138
|
+
|
|
139
|
+
Finally, you have labels and data sets that to organize the graph. In which data sets have a similar,
|
|
140
|
+
but different, role as a space in data modeling. While labels can be compared to node types in data modeling,
|
|
141
|
+
used to quickly filter and find nodes/edges.
|
|
142
|
+
|
|
143
|
+
This extractor will extract the classic CDF graph into Neat starting from either a data set or a root asset.
|
|
144
|
+
|
|
145
|
+
It works as follows:
|
|
146
|
+
|
|
147
|
+
1. Extract all core nodes (assets, time series, sequences, events, files) filtered by the given data set or
|
|
148
|
+
root asset.
|
|
149
|
+
2. Extract all relationships starting from any of the extracted core nodes.
|
|
150
|
+
3. Extract all core nodes that are targets of the relationships that are not already extracted.
|
|
151
|
+
4. Extract all labels that are connected to the extracted core nodes/relationships.
|
|
152
|
+
5. Extract all data sets that are connected to the extracted core nodes/relationships.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
root_asset_external_id: The external id of the root asset
|
|
156
|
+
|
|
157
|
+
"""
|
|
158
|
+
extractor = extractors.ClassicGraphExtractor(self._get_client, root_asset_external_id=root_asset_external_id)
|
|
159
|
+
|
|
123
160
|
self._state.instances.store.write(extractor)
|
|
124
161
|
if self._verbose:
|
|
125
|
-
print(f"
|
|
162
|
+
print(f"Classic Graph {root_asset_external_id} read successfully")
|
|
126
163
|
|
|
127
164
|
|
|
128
165
|
@session_class_wrapper
|
|
@@ -145,7 +182,7 @@ class ExcelReadAPI(BaseReadAPI):
|
|
|
145
182
|
description=f"Excel file {reader!s} read as unverified data model",
|
|
146
183
|
)
|
|
147
184
|
self._store_rules(input_rules, change)
|
|
148
|
-
|
|
185
|
+
self._state.data_model.issue_lists.append(input_rules.issues)
|
|
149
186
|
return input_rules.issues
|
|
150
187
|
|
|
151
188
|
|
|
@@ -176,7 +213,7 @@ class YamlReadAPI(BaseReadAPI):
|
|
|
176
213
|
"NEAT needs a client to lookup the container definitions. "
|
|
177
214
|
"Please set the client in the session, NeatSession(client=client)."
|
|
178
215
|
)
|
|
179
|
-
system_containers = self._client.
|
|
216
|
+
system_containers = self._client.loaders.containers.retrieve(system_container_ids)
|
|
180
217
|
dms_importer.update_referenced_containers(system_containers)
|
|
181
218
|
|
|
182
219
|
importer = dms_importer
|
|
@@ -222,7 +259,7 @@ class CSVReadAPI(BaseReadAPI):
|
|
|
222
259
|
|
|
223
260
|
@session_class_wrapper
|
|
224
261
|
class RDFReadAPI(BaseReadAPI):
|
|
225
|
-
def __init__(self, state: SessionState, client:
|
|
262
|
+
def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
|
|
226
263
|
super().__init__(state, client, verbose)
|
|
227
264
|
self.examples = RDFExamples(state)
|
|
228
265
|
|
cognite/neat/_session/_to.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
from typing import Any, Literal, overload
|
|
3
3
|
|
|
4
|
-
from cognite.client import CogniteClient
|
|
5
4
|
from cognite.client.data_classes.data_modeling import SpaceApply
|
|
6
5
|
|
|
6
|
+
from cognite.neat._client import NeatClient
|
|
7
7
|
from cognite.neat._graph import loaders
|
|
8
8
|
from cognite.neat._issues import IssueList, catch_warnings
|
|
9
9
|
from cognite.neat._rules import exporters
|
|
@@ -17,7 +17,7 @@ from .exceptions import NeatSessionError, session_class_wrapper
|
|
|
17
17
|
|
|
18
18
|
@session_class_wrapper
|
|
19
19
|
class ToAPI:
|
|
20
|
-
def __init__(self, state: SessionState, client:
|
|
20
|
+
def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
|
|
21
21
|
self._state = state
|
|
22
22
|
self._verbose = verbose
|
|
23
23
|
self.cdf = CDFToAPI(state, client, verbose)
|
|
@@ -75,7 +75,7 @@ class ToAPI:
|
|
|
75
75
|
|
|
76
76
|
@session_class_wrapper
|
|
77
77
|
class CDFToAPI:
|
|
78
|
-
def __init__(self, state: SessionState, client:
|
|
78
|
+
def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
|
|
79
79
|
self._client = client
|
|
80
80
|
self._state = state
|
|
81
81
|
self._verbose = verbose
|
cognite/neat/_store/_base.py
CHANGED
|
@@ -7,10 +7,11 @@ from typing import cast
|
|
|
7
7
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
from pandas import Index
|
|
10
|
-
from rdflib import
|
|
10
|
+
from rdflib import Dataset, Namespace, URIRef
|
|
11
11
|
from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
|
|
12
12
|
|
|
13
13
|
from cognite.neat._constants import DEFAULT_NAMESPACE
|
|
14
|
+
from cognite.neat._graph._shared import rdflib_to_oxi_type
|
|
14
15
|
from cognite.neat._graph.extractors import RdfFileExtractor, TripleExtractors
|
|
15
16
|
from cognite.neat._graph.queries import Queries
|
|
16
17
|
from cognite.neat._graph.transformers import Transformers
|
|
@@ -42,7 +43,7 @@ class NeatGraphStore:
|
|
|
42
43
|
|
|
43
44
|
def __init__(
|
|
44
45
|
self,
|
|
45
|
-
graph:
|
|
46
|
+
graph: Dataset,
|
|
46
47
|
rules: InformationRules | None = None,
|
|
47
48
|
):
|
|
48
49
|
self.rules: InformationRules | None = None
|
|
@@ -109,7 +110,7 @@ class NeatGraphStore:
|
|
|
109
110
|
|
|
110
111
|
@classmethod
|
|
111
112
|
def from_memory_store(cls, rules: InformationRules | None = None) -> "Self":
|
|
112
|
-
return cls(
|
|
113
|
+
return cls(Dataset(), rules)
|
|
113
114
|
|
|
114
115
|
@classmethod
|
|
115
116
|
def from_sparql_store(
|
|
@@ -127,7 +128,7 @@ class NeatGraphStore:
|
|
|
127
128
|
postAsEncoded=False,
|
|
128
129
|
autocommit=False,
|
|
129
130
|
)
|
|
130
|
-
graph =
|
|
131
|
+
graph = Dataset(store=store)
|
|
131
132
|
return cls(graph, rules)
|
|
132
133
|
|
|
133
134
|
@classmethod
|
|
@@ -150,9 +151,8 @@ class NeatGraphStore:
|
|
|
150
151
|
else:
|
|
151
152
|
raise Exception("Error initializing Oxigraph store")
|
|
152
153
|
|
|
153
|
-
graph =
|
|
154
|
+
graph = Dataset(
|
|
154
155
|
store=oxrdflib.OxigraphStore(store=oxi_store),
|
|
155
|
-
identifier=DEFAULT_NAMESPACE,
|
|
156
156
|
)
|
|
157
157
|
|
|
158
158
|
return cls(graph, rules)
|
|
@@ -162,7 +162,7 @@ class NeatGraphStore:
|
|
|
162
162
|
success = True
|
|
163
163
|
|
|
164
164
|
if isinstance(extractor, RdfFileExtractor) and not extractor.issue_list.has_errors:
|
|
165
|
-
self._parse_file(extractor.filepath, cast(str, extractor.
|
|
165
|
+
self._parse_file(extractor.filepath, cast(str, extractor.format), extractor.base_uri)
|
|
166
166
|
elif isinstance(extractor, RdfFileExtractor):
|
|
167
167
|
success = False
|
|
168
168
|
issue_text = "\n".join([issue.as_message() for issue in extractor.issue_list])
|
|
@@ -244,33 +244,36 @@ class NeatGraphStore:
|
|
|
244
244
|
def _parse_file(
|
|
245
245
|
self,
|
|
246
246
|
filepath: Path,
|
|
247
|
-
|
|
247
|
+
format: str = "turtle",
|
|
248
248
|
base_uri: URIRef | None = None,
|
|
249
249
|
) -> None:
|
|
250
250
|
"""Imports graph data from file.
|
|
251
251
|
|
|
252
252
|
Args:
|
|
253
253
|
filepath : File path to file containing graph data, by default None
|
|
254
|
-
|
|
255
|
-
base_uri :
|
|
254
|
+
format : rdflib format file containing RDF graph, by default "turtle"
|
|
255
|
+
base_uri : base URI to add to graph in case of relative URIs, by default None
|
|
256
|
+
|
|
257
|
+
!!! note "Oxigraph store"
|
|
258
|
+
By default we are using non-transactional mode for parsing RDF files.
|
|
259
|
+
This gives us a significant performance boost when importing large RDF files.
|
|
260
|
+
Underhood of rdflib we are triggering oxrdflib plugin which in respect
|
|
261
|
+
calls `bulk_load` method from oxigraph store. See more at:
|
|
262
|
+
https://pyoxigraph.readthedocs.io/en/stable/store.html#pyoxigraph.Store.bulk_load
|
|
256
263
|
"""
|
|
257
264
|
|
|
258
265
|
# Oxigraph store, do not want to type hint this as it is an optional dependency
|
|
259
266
|
if type(self.graph.store).__name__ == "OxigraphStore":
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
)
|
|
271
|
-
cast(pyoxigraph.Store, self.graph.store._store).optimize()
|
|
272
|
-
|
|
273
|
-
parse_to_oxi_store()
|
|
267
|
+
local_import("pyoxigraph", "oxi")
|
|
268
|
+
|
|
269
|
+
# this is necessary to trigger rdflib oxigraph plugin
|
|
270
|
+
self.graph.parse(
|
|
271
|
+
filepath,
|
|
272
|
+
format=rdflib_to_oxi_type(format),
|
|
273
|
+
transactional=False,
|
|
274
|
+
publicID=base_uri,
|
|
275
|
+
)
|
|
276
|
+
self.graph.store._store.optimize() # type: ignore[attr-defined]
|
|
274
277
|
|
|
275
278
|
# All other stores
|
|
276
279
|
else:
|
cognite/neat/_utils/rdf_.py
CHANGED
|
@@ -173,7 +173,7 @@ def get_inheritance_path(child: Any, child_parent: dict[Any, list[Any]]) -> list
|
|
|
173
173
|
return path
|
|
174
174
|
|
|
175
175
|
|
|
176
|
-
def add_triples_in_batch(graph: Graph, triples: Iterable[Triple], batch_size: int = 10_000):
|
|
176
|
+
def add_triples_in_batch(graph: Graph, triples: Iterable[Triple], batch_size: int = 10_000) -> None:
|
|
177
177
|
"""Adds triples to the graph store in batches.
|
|
178
178
|
|
|
179
179
|
Args:
|
|
@@ -204,3 +204,30 @@ def add_triples_in_batch(graph: Graph, triples: Iterable[Triple], batch_size: in
|
|
|
204
204
|
check_commit()
|
|
205
205
|
|
|
206
206
|
check_commit(force_commit=True)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def remove_instance_ids_in_batch(graph: Graph, instance_ids: Iterable[URIRef], batch_size: int = 1_000) -> None:
|
|
210
|
+
"""Removes all triples related to the given instances in the graph store in batches.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
graph: The graph store to remove triples from
|
|
214
|
+
instance_ids: list of instances to remove triples from
|
|
215
|
+
batch_size: Batch size of triples per commit, by default 10_000
|
|
216
|
+
|
|
217
|
+
"""
|
|
218
|
+
batch_count = 0
|
|
219
|
+
|
|
220
|
+
def check_commit(force_commit: bool = False):
|
|
221
|
+
"""Commit nodes to the graph if batch counter is reached or if force_commit is True"""
|
|
222
|
+
nonlocal batch_count
|
|
223
|
+
batch_count += 1
|
|
224
|
+
if force_commit or batch_count >= batch_size:
|
|
225
|
+
graph.commit()
|
|
226
|
+
batch_count = 0
|
|
227
|
+
return
|
|
228
|
+
|
|
229
|
+
for instance_id in instance_ids:
|
|
230
|
+
graph.remove((instance_id, None, None))
|
|
231
|
+
check_commit()
|
|
232
|
+
|
|
233
|
+
check_commit(force_commit=True)
|
cognite/neat/_version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.99.0"
|
|
2
2
|
__engine__ = "^1.0.3"
|
|
@@ -2,6 +2,7 @@ import time
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from typing import ClassVar, Literal, cast
|
|
4
4
|
|
|
5
|
+
from cognite.neat._client import NeatClient
|
|
5
6
|
from cognite.neat._issues.errors import WorkflowStepNotInitializedError
|
|
6
7
|
from cognite.neat._rules import exporters
|
|
7
8
|
from cognite.neat._rules._shared import DMSRules, InformationRules, VerifiedRules
|
|
@@ -100,7 +101,7 @@ class DeleteDataModelFromCDF(Step):
|
|
|
100
101
|
|
|
101
102
|
report_lines = ["# Data Model Deletion from CDF\n\n"]
|
|
102
103
|
errors = []
|
|
103
|
-
for result in dms_exporter.delete_from_cdf(rules=dms_rules, client=cdf_client, dry_run=dry_run):
|
|
104
|
+
for result in dms_exporter.delete_from_cdf(rules=dms_rules, client=NeatClient(cdf_client), dry_run=dry_run):
|
|
104
105
|
report_lines.append(str(result))
|
|
105
106
|
errors.extend(result.error_messages)
|
|
106
107
|
|
|
@@ -220,7 +221,9 @@ class RulesToDMS(Step):
|
|
|
220
221
|
|
|
221
222
|
report_lines = ["# DMS Schema Export to CDF\n\n"]
|
|
222
223
|
errors = []
|
|
223
|
-
for result in dms_exporter.export_to_cdf_iterable(
|
|
224
|
+
for result in dms_exporter.export_to_cdf_iterable(
|
|
225
|
+
rules=dms_rules, client=NeatClient(cdf_client), dry_run=dry_run
|
|
226
|
+
):
|
|
224
227
|
report_lines.append(str(result))
|
|
225
228
|
errors.extend(result.error_messages)
|
|
226
229
|
|
|
@@ -584,7 +587,9 @@ class RulesToCDFTransformations(Step):
|
|
|
584
587
|
|
|
585
588
|
report_lines = ["# DMS Schema Export to CDF\n\n"]
|
|
586
589
|
errors = []
|
|
587
|
-
for result in dms_exporter.export_to_cdf_iterable(
|
|
590
|
+
for result in dms_exporter.export_to_cdf_iterable(
|
|
591
|
+
rules=dms_rules, client=NeatClient(cdf_client), dry_run=dry_run
|
|
592
|
+
):
|
|
588
593
|
report_lines.append(str(result))
|
|
589
594
|
errors.extend(result.error_messages)
|
|
590
595
|
|
|
@@ -5,6 +5,7 @@ from typing import ClassVar
|
|
|
5
5
|
from cognite.client import CogniteClient
|
|
6
6
|
from cognite.client.data_classes.data_modeling import DataModelId
|
|
7
7
|
|
|
8
|
+
from cognite.neat._client import NeatClient
|
|
8
9
|
from cognite.neat._issues.errors import WorkflowStepNotInitializedError
|
|
9
10
|
from cognite.neat._issues.formatters import FORMATTER_BY_NAME
|
|
10
11
|
from cognite.neat._rules import importers
|
|
@@ -299,7 +300,9 @@ class DMSToRules(Step):
|
|
|
299
300
|
return FlowMessage(error_text=error_text, step_execution_status=StepExecutionStatus.ABORT_AND_FAIL)
|
|
300
301
|
ref_model_id = ref_model.as_id()
|
|
301
302
|
|
|
302
|
-
dms_importer = importers.DMSImporter.from_data_model_id(
|
|
303
|
+
dms_importer = importers.DMSImporter.from_data_model_id(
|
|
304
|
+
NeatClient(cdf_client), datamodel_entity.as_id(), ref_model_id
|
|
305
|
+
)
|
|
303
306
|
|
|
304
307
|
# if role is None, it will be inferred from the rules file
|
|
305
308
|
role = self.configs.get("Role")
|