cognite-neat 0.98.0__py3-none-any.whl → 0.99.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (103) hide show
  1. cognite/neat/_client/__init__.py +4 -0
  2. cognite/neat/_client/_api/data_modeling_loaders.py +585 -0
  3. cognite/neat/_client/_api/schema.py +111 -0
  4. cognite/neat/_client/_api_client.py +17 -0
  5. cognite/neat/_client/data_classes/__init__.py +0 -0
  6. cognite/neat/{_utils/cdf/data_classes.py → _client/data_classes/data_modeling.py} +8 -135
  7. cognite/neat/_client/data_classes/schema.py +495 -0
  8. cognite/neat/_constants.py +27 -4
  9. cognite/neat/_graph/_shared.py +14 -15
  10. cognite/neat/_graph/extractors/_classic_cdf/_assets.py +14 -154
  11. cognite/neat/_graph/extractors/_classic_cdf/_base.py +154 -7
  12. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +25 -14
  13. cognite/neat/_graph/extractors/_classic_cdf/_data_sets.py +17 -92
  14. cognite/neat/_graph/extractors/_classic_cdf/_events.py +13 -162
  15. cognite/neat/_graph/extractors/_classic_cdf/_files.py +15 -179
  16. cognite/neat/_graph/extractors/_classic_cdf/_labels.py +32 -100
  17. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +27 -178
  18. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +14 -139
  19. cognite/neat/_graph/extractors/_classic_cdf/_timeseries.py +15 -173
  20. cognite/neat/_graph/extractors/_rdf_file.py +6 -7
  21. cognite/neat/_graph/loaders/_rdf2dms.py +2 -2
  22. cognite/neat/_graph/queries/_base.py +17 -1
  23. cognite/neat/_graph/transformers/_classic_cdf.py +74 -147
  24. cognite/neat/_graph/transformers/_prune_graph.py +1 -1
  25. cognite/neat/_graph/transformers/_rdfpath.py +1 -1
  26. cognite/neat/_issues/_base.py +26 -17
  27. cognite/neat/_issues/errors/__init__.py +4 -2
  28. cognite/neat/_issues/errors/_external.py +7 -0
  29. cognite/neat/_issues/errors/_properties.py +2 -7
  30. cognite/neat/_issues/errors/_resources.py +1 -1
  31. cognite/neat/_issues/warnings/__init__.py +8 -0
  32. cognite/neat/_issues/warnings/_external.py +16 -0
  33. cognite/neat/_issues/warnings/_properties.py +16 -0
  34. cognite/neat/_issues/warnings/_resources.py +26 -2
  35. cognite/neat/_issues/warnings/user_modeling.py +4 -4
  36. cognite/neat/_rules/_constants.py +8 -11
  37. cognite/neat/_rules/analysis/_base.py +8 -4
  38. cognite/neat/_rules/exporters/_base.py +3 -4
  39. cognite/neat/_rules/exporters/_rules2dms.py +33 -46
  40. cognite/neat/_rules/importers/__init__.py +1 -3
  41. cognite/neat/_rules/importers/_base.py +1 -1
  42. cognite/neat/_rules/importers/_dms2rules.py +6 -29
  43. cognite/neat/_rules/importers/_rdf/__init__.py +5 -0
  44. cognite/neat/_rules/importers/_rdf/_base.py +34 -11
  45. cognite/neat/_rules/importers/_rdf/_imf2rules.py +91 -0
  46. cognite/neat/_rules/importers/_rdf/_inference2rules.py +43 -35
  47. cognite/neat/_rules/importers/_rdf/_owl2rules.py +80 -0
  48. cognite/neat/_rules/importers/_rdf/_shared.py +138 -441
  49. cognite/neat/_rules/models/__init__.py +1 -1
  50. cognite/neat/_rules/models/_base_rules.py +22 -12
  51. cognite/neat/_rules/models/dms/__init__.py +4 -2
  52. cognite/neat/_rules/models/dms/_exporter.py +45 -48
  53. cognite/neat/_rules/models/dms/_rules.py +20 -17
  54. cognite/neat/_rules/models/dms/_rules_input.py +52 -8
  55. cognite/neat/_rules/models/dms/_validation.py +391 -119
  56. cognite/neat/_rules/models/entities/_single_value.py +32 -4
  57. cognite/neat/_rules/models/information/__init__.py +2 -0
  58. cognite/neat/_rules/models/information/_rules.py +0 -67
  59. cognite/neat/_rules/models/information/_validation.py +9 -9
  60. cognite/neat/_rules/models/mapping/__init__.py +2 -3
  61. cognite/neat/_rules/models/mapping/_classic2core.py +36 -146
  62. cognite/neat/_rules/models/mapping/_classic2core.yaml +343 -0
  63. cognite/neat/_rules/transformers/__init__.py +2 -2
  64. cognite/neat/_rules/transformers/_converters.py +110 -11
  65. cognite/neat/_rules/transformers/_mapping.py +105 -30
  66. cognite/neat/_rules/transformers/_pipelines.py +1 -1
  67. cognite/neat/_rules/transformers/_verification.py +31 -3
  68. cognite/neat/_session/_base.py +24 -8
  69. cognite/neat/_session/_drop.py +35 -0
  70. cognite/neat/_session/_inspect.py +17 -5
  71. cognite/neat/_session/_mapping.py +39 -0
  72. cognite/neat/_session/_prepare.py +219 -23
  73. cognite/neat/_session/_read.py +49 -12
  74. cognite/neat/_session/_to.py +8 -5
  75. cognite/neat/_session/exceptions.py +4 -0
  76. cognite/neat/_store/_base.py +27 -24
  77. cognite/neat/_utils/rdf_.py +34 -5
  78. cognite/neat/_version.py +1 -1
  79. cognite/neat/_workflows/steps/lib/current/rules_exporter.py +5 -88
  80. cognite/neat/_workflows/steps/lib/current/rules_importer.py +3 -14
  81. cognite/neat/_workflows/steps/lib/current/rules_validator.py +6 -7
  82. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/METADATA +3 -3
  83. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/RECORD +87 -92
  84. cognite/neat/_rules/importers/_rdf/_imf2rules/__init__.py +0 -3
  85. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2classes.py +0 -86
  86. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2metadata.py +0 -29
  87. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2properties.py +0 -130
  88. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2rules.py +0 -154
  89. cognite/neat/_rules/importers/_rdf/_owl2rules/__init__.py +0 -3
  90. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2classes.py +0 -58
  91. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2metadata.py +0 -65
  92. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2properties.py +0 -59
  93. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2rules.py +0 -39
  94. cognite/neat/_rules/models/dms/_schema.py +0 -1101
  95. cognite/neat/_rules/models/mapping/_base.py +0 -131
  96. cognite/neat/_utils/cdf/loaders/__init__.py +0 -25
  97. cognite/neat/_utils/cdf/loaders/_base.py +0 -54
  98. cognite/neat/_utils/cdf/loaders/_data_modeling.py +0 -339
  99. cognite/neat/_utils/cdf/loaders/_ingestion.py +0 -167
  100. /cognite/neat/{_utils/cdf → _client/_api}/__init__.py +0 -0
  101. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/LICENSE +0 -0
  102. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/WHEEL +0 -0
  103. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/entry_points.txt +0 -0
@@ -1,26 +1,45 @@
1
+ import copy
1
2
  from collections.abc import Collection
2
3
  from datetime import datetime, timezone
3
- from typing import Literal
4
+ from typing import Literal, cast
4
5
 
5
6
  from cognite.client.data_classes.data_modeling import DataModelIdentifier
6
7
  from rdflib import URIRef
7
8
 
9
+ from cognite.neat._client import NeatClient
10
+ from cognite.neat._constants import DEFAULT_NAMESPACE
11
+ from cognite.neat._graph.transformers import RelationshipToSchemaTransformer
8
12
  from cognite.neat._graph.transformers._rdfpath import MakeConnectionOnExactMatch
9
- from cognite.neat._rules._shared import ReadRules
13
+ from cognite.neat._rules._shared import InputRules, ReadRules
14
+ from cognite.neat._rules.importers import DMSImporter
15
+ from cognite.neat._rules.models import DMSRules
16
+ from cognite.neat._rules.models.dms import DMSValidation
10
17
  from cognite.neat._rules.models.information._rules_input import InformationInputRules
11
- from cognite.neat._rules.transformers import ReduceCogniteModel, ToCompliantEntities, ToExtension
18
+ from cognite.neat._rules.transformers import (
19
+ PrefixEntities,
20
+ ReduceCogniteModel,
21
+ ToCompliantEntities,
22
+ ToExtension,
23
+ VerifyDMSRules,
24
+ )
25
+ from cognite.neat._store._provenance import Agent as ProvenanceAgent
12
26
  from cognite.neat._store._provenance import Change
13
27
 
14
28
  from ._state import SessionState
15
29
  from .exceptions import NeatSessionError, session_class_wrapper
16
30
 
31
+ try:
32
+ from rich import print
33
+ except ImportError:
34
+ ...
35
+
17
36
 
18
37
  @session_class_wrapper
19
38
  class PrepareAPI:
20
- def __init__(self, state: SessionState, verbose: bool) -> None:
39
+ def __init__(self, client: NeatClient | None, state: SessionState, verbose: bool) -> None:
21
40
  self._state = state
22
41
  self._verbose = verbose
23
- self.data_model = DataModelPrepareAPI(state, verbose)
42
+ self.data_model = DataModelPrepareAPI(client, state, verbose)
24
43
  self.instances = InstancePrepareAPI(state, verbose)
25
44
 
26
45
 
@@ -94,34 +113,77 @@ class InstancePrepareAPI:
94
113
  raise NeatSessionError(f"Property {property_} is not defined for type {type_}. Cannot make connection")
95
114
  return type_uri[0], property_uri[0]
96
115
 
116
+ def relationships_as_connections(self, limit: int = 1) -> None:
117
+ """This assumes that you have read a classic CDF knowledge graph including relationships.
118
+
119
+ This transformer analyzes the relationships in the graph and modifies them to be part of the schema
120
+ for Assets, Events, Files, Sequences, and TimeSeries. Relationships without any properties
121
+ are replaced by a simple relationship between the source and target nodes. Relationships with
122
+ properties are replaced by a schema that contains the properties as attributes.
123
+
124
+ Args:
125
+ limit: The minimum number of relationships that need to be present for it
126
+ to be converted into a schema. Default is 1.
127
+
128
+ """
129
+ transformer = RelationshipToSchemaTransformer(limit=limit)
130
+ self._state.instances.store.transform(transformer)
131
+
97
132
 
98
133
  @session_class_wrapper
99
134
  class DataModelPrepareAPI:
100
- def __init__(self, state: SessionState, verbose: bool) -> None:
135
+ def __init__(self, client: NeatClient | None, state: SessionState, verbose: bool) -> None:
136
+ self._client = client
101
137
  self._state = state
102
138
  self._verbose = verbose
103
139
 
104
140
  def cdf_compliant_external_ids(self) -> None:
105
141
  """Convert data model component external ids to CDF compliant entities."""
106
- if input := self._state.data_model.last_info_unverified_rule:
107
- source_id, rules = input
142
+ source_id, rules = self._state.data_model.last_info_unverified_rule
143
+
144
+ start = datetime.now(timezone.utc)
145
+ transformer = ToCompliantEntities()
146
+ output: ReadRules[InformationInputRules] = transformer.transform(rules)
147
+ end = datetime.now(timezone.utc)
148
+
149
+ change = Change.from_rules_activity(
150
+ output,
151
+ transformer.agent,
152
+ start,
153
+ end,
154
+ "Converted external ids to CDF compliant entities",
155
+ self._state.data_model.provenance.source_entity(source_id)
156
+ or self._state.data_model.provenance.target_entity(source_id),
157
+ )
108
158
 
109
- start = datetime.now(timezone.utc)
110
- transformer = ToCompliantEntities()
111
- output: ReadRules[InformationInputRules] = transformer.transform(rules)
112
- end = datetime.now(timezone.utc)
159
+ self._state.data_model.write(output, change)
113
160
 
114
- change = Change.from_rules_activity(
115
- output,
116
- transformer.agent,
117
- start,
118
- end,
119
- "Converted external ids to CDF compliant entities",
120
- self._state.data_model.provenance.source_entity(source_id)
121
- or self._state.data_model.provenance.target_entity(source_id),
122
- )
161
+ def prefix(self, prefix: str) -> None:
162
+ """Prefix all views in the data model with the given prefix.
123
163
 
124
- self._state.data_model.write(output, change)
164
+ Args:
165
+ prefix: The prefix to add to the views in the data model.
166
+
167
+ """
168
+ source_id, rules = self._state.data_model.last_unverified_rule
169
+
170
+ start = datetime.now(timezone.utc)
171
+ transformer = PrefixEntities(prefix)
172
+ new_rules = cast(InputRules, copy.deepcopy(rules.get_rules()))
173
+ output = transformer.transform(new_rules)
174
+ end = datetime.now(timezone.utc)
175
+
176
+ change = Change.from_rules_activity(
177
+ output,
178
+ transformer.agent,
179
+ start,
180
+ end,
181
+ "Added prefix to the data model views",
182
+ self._state.data_model.provenance.source_entity(source_id)
183
+ or self._state.data_model.provenance.target_entity(source_id),
184
+ )
185
+
186
+ self._state.data_model.write(output, change)
125
187
 
126
188
  def to_enterprise(
127
189
  self,
@@ -185,7 +247,7 @@ class DataModelPrepareAPI:
185
247
  data_model_id: DataModelIdentifier,
186
248
  org_name: str = "My",
187
249
  mode: Literal["read", "write"] = "read",
188
- dummy_property: str = "dummy",
250
+ dummy_property: str = "GUID",
189
251
  ) -> None:
190
252
  """Uses the current data model as a basis to create solution data model
191
253
 
@@ -235,6 +297,81 @@ class DataModelPrepareAPI:
235
297
 
236
298
  self._state.data_model.write(output.rules, change)
237
299
 
300
+ def to_data_product(
301
+ self,
302
+ data_model_id: DataModelIdentifier,
303
+ org_name: str = "",
304
+ include: Literal["same-space", "all"] = "same-space",
305
+ ) -> None:
306
+ """Uses the current data model as a basis to create data product data model.
307
+
308
+ A data product model is a data model that ONLY maps to containers and do not use implements. This is
309
+ typically used for defining the data in a data product.
310
+
311
+ Args:
312
+ data_model_id: The data product data model id that is being created.
313
+ org_name: Organization name to use for the views in the new data model.
314
+ include: The views to include in the data product data model. Can be either "same-space" or "all".
315
+ If you set same-space, only the views in the same space as the data model will be included.
316
+ """
317
+ source_id, rules = self._state.data_model.last_verified_dms_rules
318
+
319
+ dms_ref: DMSRules | None = None
320
+ view_ids, container_ids = DMSValidation(rules, self._client).imported_views_and_containers_ids()
321
+ if view_ids or container_ids:
322
+ if self._client is None:
323
+ raise NeatSessionError(
324
+ "No client provided. You are referencing unknown views and containers in your data model, "
325
+ "NEAT needs a client to lookup the definitions. "
326
+ "Please set the client in the session, NeatSession(client=client)."
327
+ )
328
+ schema = self._client.schema.retrieve([v.as_id() for v in view_ids], [c.as_id() for c in container_ids])
329
+
330
+ importer = DMSImporter(schema)
331
+ reference_rules = importer.to_rules().rules
332
+ if reference_rules is not None:
333
+ imported = VerifyDMSRules("continue").transform(reference_rules)
334
+ if dms_ref := imported.rules:
335
+ rules = rules.model_copy(deep=True)
336
+ if rules.containers is None:
337
+ rules.containers = dms_ref.containers
338
+ else:
339
+ existing_containers = {c.container for c in rules.containers}
340
+ rules.containers.extend(
341
+ [c for c in dms_ref.containers or [] if c.container not in existing_containers]
342
+ )
343
+ existing_views = {v.view for v in rules.views}
344
+ rules.views.extend([v for v in dms_ref.views if v.view not in existing_views])
345
+ existing_properties = {(p.view, p.view_property) for p in rules.properties}
346
+ rules.properties.extend(
347
+ [p for p in dms_ref.properties if (p.view, p.view_property) not in existing_properties]
348
+ )
349
+
350
+ start = datetime.now(timezone.utc)
351
+ transformer = ToExtension(
352
+ new_model_id=data_model_id,
353
+ org_name=org_name,
354
+ type_="data_product",
355
+ include=include,
356
+ )
357
+ output = transformer.transform(rules)
358
+ end = datetime.now(timezone.utc)
359
+
360
+ change = Change.from_rules_activity(
361
+ output,
362
+ transformer.agent,
363
+ start,
364
+ end,
365
+ (
366
+ f"Prepared data model {data_model_id} to be data product model "
367
+ f"on top of {rules.metadata.as_data_model_id()}"
368
+ ),
369
+ self._state.data_model.provenance.source_entity(source_id)
370
+ or self._state.data_model.provenance.target_entity(source_id),
371
+ )
372
+
373
+ self._state.data_model.write(output.rules, change)
374
+
238
375
  def reduce(self, drop: Collection[Literal["3D", "Annotation", "BaseViews"] | str]) -> None:
239
376
  """This is a special method that allow you to drop parts of the data model.
240
377
  This only applies to Cognite Data Models.
@@ -267,3 +404,62 @@ class DataModelPrepareAPI:
267
404
  )
268
405
 
269
406
  self._state.data_model.write(output.rules, change)
407
+
408
+ def include_referenced(self) -> None:
409
+ """Include referenced views and containers in the data model."""
410
+ start = datetime.now(timezone.utc)
411
+
412
+ source_id, rules = self._state.data_model.last_verified_dms_rules
413
+ view_ids, container_ids = DMSValidation(rules, self._client).imported_views_and_containers_ids()
414
+ if not (view_ids or container_ids):
415
+ print(
416
+ f"Data model {rules.metadata.as_data_model_id()} does not have any referenced views or containers."
417
+ f"that is not already included in the data model."
418
+ )
419
+ return
420
+ if self._client is None:
421
+ raise NeatSessionError(
422
+ "No client provided. You are referencing unknown views and containers in your data model, "
423
+ "NEAT needs a client to lookup the definitions. "
424
+ "Please set the client in the session, NeatSession(client=client)."
425
+ )
426
+ schema = self._client.schema.retrieve([v.as_id() for v in view_ids], [c.as_id() for c in container_ids])
427
+ copy_ = rules.model_copy(deep=True)
428
+ copy_.metadata.version = f"{rules.metadata.version}_completed"
429
+ importer = DMSImporter(schema)
430
+ imported = importer.to_rules()
431
+ if imported.rules is None:
432
+ self._state.data_model.issue_lists.append(imported.issues)
433
+ raise NeatSessionError(
434
+ "Could not import the referenced views and containers. "
435
+ "See `neat.inspect.issues()` for more information."
436
+ )
437
+ verified = VerifyDMSRules("continue", validate=False).transform(imported.rules)
438
+ if verified.rules is None:
439
+ self._state.data_model.issue_lists.append(verified.issues)
440
+ raise NeatSessionError(
441
+ "Could not verify the referenced views and containers. "
442
+ "See `neat.inspect.issues()` for more information."
443
+ )
444
+ if copy_.containers is None:
445
+ copy_.containers = verified.rules.containers
446
+ else:
447
+ existing_containers = {c.container for c in copy_.containers}
448
+ copy_.containers.extend(
449
+ [c for c in verified.rules.containers or [] if c.container not in existing_containers]
450
+ )
451
+ existing_views = {v.view for v in copy_.views}
452
+ copy_.views.extend([v for v in verified.rules.views if v.view not in existing_views])
453
+ end = datetime.now(timezone.utc)
454
+
455
+ change = Change.from_rules_activity(
456
+ copy_,
457
+ ProvenanceAgent(id_=DEFAULT_NAMESPACE["agent/"]),
458
+ start,
459
+ end,
460
+ (f"Included referenced views and containers in the data model {rules.metadata.as_data_model_id()}"),
461
+ self._state.data_model.provenance.source_entity(source_id)
462
+ or self._state.data_model.provenance.target_entity(source_id),
463
+ )
464
+
465
+ self._state.data_model.write(copy_, change)
@@ -3,9 +3,9 @@ from datetime import datetime, timezone
3
3
  from pathlib import Path
4
4
  from typing import Any, Literal
5
5
 
6
- from cognite.client import CogniteClient
7
6
  from cognite.client.data_classes.data_modeling import DataModelId, DataModelIdentifier
8
7
 
8
+ from cognite.neat._client import NeatClient
9
9
  from cognite.neat._constants import COGNITE_SPACES
10
10
  from cognite.neat._graph import examples as instances_examples
11
11
  from cognite.neat._graph import extractors
@@ -27,7 +27,7 @@ from .exceptions import NeatSessionError, session_class_wrapper
27
27
 
28
28
  @session_class_wrapper
29
29
  class ReadAPI:
30
- def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
30
+ def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
31
31
  self._state = state
32
32
  self._verbose = verbose
33
33
  self.cdf = CDFReadAPI(state, client, verbose)
@@ -39,7 +39,7 @@ class ReadAPI:
39
39
 
40
40
  @session_class_wrapper
41
41
  class BaseReadAPI:
42
- def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
42
+ def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
43
43
  self._state = state
44
44
  self._verbose = verbose
45
45
  self._client = client
@@ -67,12 +67,12 @@ class BaseReadAPI:
67
67
 
68
68
  @session_class_wrapper
69
69
  class CDFReadAPI(BaseReadAPI):
70
- def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
70
+ def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
71
71
  super().__init__(state, client, verbose)
72
72
  self.classic = CDFClassicAPI(state, client, verbose)
73
73
 
74
74
  @property
75
- def _get_client(self) -> CogniteClient:
75
+ def _get_client(self) -> NeatClient:
76
76
  if self._client is None:
77
77
  raise NeatValueError("No client provided. Please provide a client to read a data model.")
78
78
  return self._client
@@ -113,16 +113,53 @@ class CDFReadAPI(BaseReadAPI):
113
113
  @session_class_wrapper
114
114
  class CDFClassicAPI(BaseReadAPI):
115
115
  @property
116
- def _get_client(self) -> CogniteClient:
116
+ def _get_client(self) -> NeatClient:
117
117
  if self._client is None:
118
118
  raise ValueError("No client provided. Please provide a client to read a data model.")
119
119
  return self._client
120
120
 
121
- def assets(self, root_asset_external_id: str) -> None:
122
- extractor = extractors.AssetsExtractor.from_hierarchy(self._get_client, root_asset_external_id)
121
+ def graph(self, root_asset_external_id: str) -> None:
122
+ """Reads the classic knowledge graph from CDF.
123
+
124
+ The Classic Graph consists of the following core resource type.
125
+
126
+ Classic Node CDF Resources:
127
+ - Assets
128
+ - TimeSeries
129
+ - Sequences
130
+ - Events
131
+ - Files
132
+
133
+ All the classic node CDF resources can have one or more connections to one or more assets. This
134
+ will match a direct relationship in the data modeling of CDF.
135
+
136
+ In addition, you have relationships between the classic node CDF resources. This matches an edge
137
+ in the data modeling of CDF.
138
+
139
+ Finally, you have labels and data sets that to organize the graph. In which data sets have a similar,
140
+ but different, role as a space in data modeling. While labels can be compared to node types in data modeling,
141
+ used to quickly filter and find nodes/edges.
142
+
143
+ This extractor will extract the classic CDF graph into Neat starting from either a data set or a root asset.
144
+
145
+ It works as follows:
146
+
147
+ 1. Extract all core nodes (assets, time series, sequences, events, files) filtered by the given data set or
148
+ root asset.
149
+ 2. Extract all relationships starting from any of the extracted core nodes.
150
+ 3. Extract all core nodes that are targets of the relationships that are not already extracted.
151
+ 4. Extract all labels that are connected to the extracted core nodes/relationships.
152
+ 5. Extract all data sets that are connected to the extracted core nodes/relationships.
153
+
154
+ Args:
155
+ root_asset_external_id: The external id of the root asset
156
+
157
+ """
158
+ extractor = extractors.ClassicGraphExtractor(self._get_client, root_asset_external_id=root_asset_external_id)
159
+
123
160
  self._state.instances.store.write(extractor)
124
161
  if self._verbose:
125
- print(f"Asset hierarchy {root_asset_external_id} read successfully")
162
+ print(f"Classic Graph {root_asset_external_id} read successfully")
126
163
 
127
164
 
128
165
  @session_class_wrapper
@@ -145,7 +182,7 @@ class ExcelReadAPI(BaseReadAPI):
145
182
  description=f"Excel file {reader!s} read as unverified data model",
146
183
  )
147
184
  self._store_rules(input_rules, change)
148
-
185
+ self._state.data_model.issue_lists.append(input_rules.issues)
149
186
  return input_rules.issues
150
187
 
151
188
 
@@ -176,7 +213,7 @@ class YamlReadAPI(BaseReadAPI):
176
213
  "NEAT needs a client to lookup the container definitions. "
177
214
  "Please set the client in the session, NeatSession(client=client)."
178
215
  )
179
- system_containers = self._client.data_modeling.containers.retrieve(system_container_ids)
216
+ system_containers = self._client.loaders.containers.retrieve(system_container_ids)
180
217
  dms_importer.update_referenced_containers(system_containers)
181
218
 
182
219
  importer = dms_importer
@@ -222,7 +259,7 @@ class CSVReadAPI(BaseReadAPI):
222
259
 
223
260
  @session_class_wrapper
224
261
  class RDFReadAPI(BaseReadAPI):
225
- def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
262
+ def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
226
263
  super().__init__(state, client, verbose)
227
264
  self.examples = RDFExamples(state)
228
265
 
@@ -1,9 +1,9 @@
1
1
  from pathlib import Path
2
2
  from typing import Any, Literal, overload
3
3
 
4
- from cognite.client import CogniteClient
5
4
  from cognite.client.data_classes.data_modeling import SpaceApply
6
5
 
6
+ from cognite.neat._client import NeatClient
7
7
  from cognite.neat._graph import loaders
8
8
  from cognite.neat._issues import IssueList, catch_warnings
9
9
  from cognite.neat._rules import exporters
@@ -17,7 +17,7 @@ from .exceptions import NeatSessionError, session_class_wrapper
17
17
 
18
18
  @session_class_wrapper
19
19
  class ToAPI:
20
- def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
20
+ def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
21
21
  self._state = state
22
22
  self._verbose = verbose
23
23
  self.cdf = CDFToAPI(state, client, verbose)
@@ -66,16 +66,19 @@ class ToAPI:
66
66
  "This is required for the 'toolkit' format."
67
67
  )
68
68
  dms_rule = self._state.data_model.last_verified_dms_rules[1]
69
- exporters.DMSExporter().export_to_file(dms_rule, Path(io))
69
+ user_path = Path(io)
70
+ if user_path.suffix == "" and not user_path.exists():
71
+ user_path.mkdir(parents=True)
72
+ exporters.DMSExporter().export_to_file(dms_rule, user_path)
70
73
  else:
71
- raise NeatSessionError("Please provide a valid format. {['neat', 'toolkit']}")
74
+ raise NeatSessionError("Please provide a valid format. 'neat' or 'toolkit'")
72
75
 
73
76
  return None
74
77
 
75
78
 
76
79
  @session_class_wrapper
77
80
  class CDFToAPI:
78
- def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
81
+ def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
79
82
  self._client = client
80
83
  self._state = state
81
84
  self._verbose = verbose
@@ -2,6 +2,8 @@ import functools
2
2
  from collections.abc import Callable
3
3
  from typing import Any
4
4
 
5
+ from cognite.neat._issues.errors import CDFMissingClientError
6
+
5
7
  from ._collector import _COLLECTOR
6
8
 
7
9
  try:
@@ -27,6 +29,8 @@ def _session_method_wrapper(func: Callable, cls_name: str):
27
29
  except NeatSessionError as e:
28
30
  action = _get_action()
29
31
  print(f"{_PREFIX} Cannot {action}: {e}")
32
+ except CDFMissingClientError as e:
33
+ print(f"{_PREFIX} {e.as_message()}")
30
34
  except ModuleNotFoundError as e:
31
35
  if e.name == "neatengine":
32
36
  action = _get_action()
@@ -7,10 +7,11 @@ from typing import cast
7
7
 
8
8
  import pandas as pd
9
9
  from pandas import Index
10
- from rdflib import Graph, Namespace, URIRef
10
+ from rdflib import Dataset, Namespace, URIRef
11
11
  from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
12
12
 
13
13
  from cognite.neat._constants import DEFAULT_NAMESPACE
14
+ from cognite.neat._graph._shared import rdflib_to_oxi_type
14
15
  from cognite.neat._graph.extractors import RdfFileExtractor, TripleExtractors
15
16
  from cognite.neat._graph.queries import Queries
16
17
  from cognite.neat._graph.transformers import Transformers
@@ -42,7 +43,7 @@ class NeatGraphStore:
42
43
 
43
44
  def __init__(
44
45
  self,
45
- graph: Graph,
46
+ graph: Dataset,
46
47
  rules: InformationRules | None = None,
47
48
  ):
48
49
  self.rules: InformationRules | None = None
@@ -109,7 +110,7 @@ class NeatGraphStore:
109
110
 
110
111
  @classmethod
111
112
  def from_memory_store(cls, rules: InformationRules | None = None) -> "Self":
112
- return cls(Graph(identifier=DEFAULT_NAMESPACE), rules)
113
+ return cls(Dataset(), rules)
113
114
 
114
115
  @classmethod
115
116
  def from_sparql_store(
@@ -127,7 +128,7 @@ class NeatGraphStore:
127
128
  postAsEncoded=False,
128
129
  autocommit=False,
129
130
  )
130
- graph = Graph(store=store, identifier=DEFAULT_NAMESPACE)
131
+ graph = Dataset(store=store)
131
132
  return cls(graph, rules)
132
133
 
133
134
  @classmethod
@@ -150,9 +151,8 @@ class NeatGraphStore:
150
151
  else:
151
152
  raise Exception("Error initializing Oxigraph store")
152
153
 
153
- graph = Graph(
154
+ graph = Dataset(
154
155
  store=oxrdflib.OxigraphStore(store=oxi_store),
155
- identifier=DEFAULT_NAMESPACE,
156
156
  )
157
157
 
158
158
  return cls(graph, rules)
@@ -162,7 +162,7 @@ class NeatGraphStore:
162
162
  success = True
163
163
 
164
164
  if isinstance(extractor, RdfFileExtractor) and not extractor.issue_list.has_errors:
165
- self._parse_file(extractor.filepath, cast(str, extractor.mime_type), extractor.base_uri)
165
+ self._parse_file(extractor.filepath, cast(str, extractor.format), extractor.base_uri)
166
166
  elif isinstance(extractor, RdfFileExtractor):
167
167
  success = False
168
168
  issue_text = "\n".join([issue.as_message() for issue in extractor.issue_list])
@@ -244,33 +244,36 @@ class NeatGraphStore:
244
244
  def _parse_file(
245
245
  self,
246
246
  filepath: Path,
247
- mime_type: str = "application/rdf+xml",
247
+ format: str = "turtle",
248
248
  base_uri: URIRef | None = None,
249
249
  ) -> None:
250
250
  """Imports graph data from file.
251
251
 
252
252
  Args:
253
253
  filepath : File path to file containing graph data, by default None
254
- mime_type : MIME type of graph data, by default "application/rdf+xml"
255
- base_uri : Add base IRI to graph, by default True
254
+ format : rdflib format file containing RDF graph, by default "turtle"
255
+ base_uri : base URI to add to graph in case of relative URIs, by default None
256
+
257
+ !!! note "Oxigraph store"
258
+ By default we are using non-transactional mode for parsing RDF files.
259
+ This gives us a significant performance boost when importing large RDF files.
260
+ Underhood of rdflib we are triggering oxrdflib plugin which in respect
261
+ calls `bulk_load` method from oxigraph store. See more at:
262
+ https://pyoxigraph.readthedocs.io/en/stable/store.html#pyoxigraph.Store.bulk_load
256
263
  """
257
264
 
258
265
  # Oxigraph store, do not want to type hint this as it is an optional dependency
259
266
  if type(self.graph.store).__name__ == "OxigraphStore":
260
-
261
- def parse_to_oxi_store():
262
- local_import("pyoxigraph", "oxi")
263
- import pyoxigraph
264
-
265
- cast(pyoxigraph.Store, self.graph.store._store).bulk_load(
266
- str(filepath),
267
- mime_type,
268
- base_iri=base_uri,
269
- to_graph=pyoxigraph.NamedNode(self.graph.identifier),
270
- )
271
- cast(pyoxigraph.Store, self.graph.store._store).optimize()
272
-
273
- parse_to_oxi_store()
267
+ local_import("pyoxigraph", "oxi")
268
+
269
+ # this is necessary to trigger rdflib oxigraph plugin
270
+ self.graph.parse(
271
+ filepath,
272
+ format=rdflib_to_oxi_type(format),
273
+ transactional=False,
274
+ publicID=base_uri,
275
+ )
276
+ self.graph.store._store.optimize() # type: ignore[attr-defined]
274
277
 
275
278
  # All other stores
276
279
  else:
@@ -115,13 +115,15 @@ def as_neat_compliant_uri(uri: URIRef) -> URIRef:
115
115
  return URIRef(f"{namespace}{compliant_uri}")
116
116
 
117
117
 
118
- def convert_rdflib_content(content: RdfLiteral | URIRef | dict | list) -> Any:
119
- if isinstance(content, RdfLiteral) or isinstance(content, URIRef):
118
+ def convert_rdflib_content(content: RdfLiteral | URIRef | dict | list, remove_namespace: bool = False) -> Any:
119
+ if isinstance(content, RdfLiteral):
120
120
  return content.toPython()
121
+ elif isinstance(content, URIRef):
122
+ return remove_namespace_from_uri(content) if remove_namespace else content.toPython()
121
123
  elif isinstance(content, dict):
122
- return {key: convert_rdflib_content(value) for key, value in content.items()}
124
+ return {key: convert_rdflib_content(value, remove_namespace) for key, value in content.items()}
123
125
  elif isinstance(content, list):
124
- return [convert_rdflib_content(item) for item in content]
126
+ return [convert_rdflib_content(item, remove_namespace) for item in content]
125
127
  else:
126
128
  return content
127
129
 
@@ -173,7 +175,7 @@ def get_inheritance_path(child: Any, child_parent: dict[Any, list[Any]]) -> list
173
175
  return path
174
176
 
175
177
 
176
- def add_triples_in_batch(graph: Graph, triples: Iterable[Triple], batch_size: int = 10_000):
178
+ def add_triples_in_batch(graph: Graph, triples: Iterable[Triple], batch_size: int = 10_000) -> None:
177
179
  """Adds triples to the graph store in batches.
178
180
 
179
181
  Args:
@@ -204,3 +206,30 @@ def add_triples_in_batch(graph: Graph, triples: Iterable[Triple], batch_size: in
204
206
  check_commit()
205
207
 
206
208
  check_commit(force_commit=True)
209
+
210
+
211
+ def remove_instance_ids_in_batch(graph: Graph, instance_ids: Iterable[URIRef], batch_size: int = 1_000) -> None:
212
+ """Removes all triples related to the given instances in the graph store in batches.
213
+
214
+ Args:
215
+ graph: The graph store to remove triples from
216
+ instance_ids: list of instances to remove triples from
217
+ batch_size: Batch size of triples per commit, by default 10_000
218
+
219
+ """
220
+ batch_count = 0
221
+
222
+ def check_commit(force_commit: bool = False):
223
+ """Commit nodes to the graph if batch counter is reached or if force_commit is True"""
224
+ nonlocal batch_count
225
+ batch_count += 1
226
+ if force_commit or batch_count >= batch_size:
227
+ graph.commit()
228
+ batch_count = 0
229
+ return
230
+
231
+ for instance_id in instance_ids:
232
+ graph.remove((instance_id, None, None))
233
+ check_commit()
234
+
235
+ check_commit(force_commit=True)
cognite/neat/_version.py CHANGED
@@ -1,2 +1,2 @@
1
- __version__ = "0.98.0"
1
+ __version__ = "0.99.1"
2
2
  __engine__ = "^1.0.3"