cognite-neat 0.98.0__py3-none-any.whl → 0.99.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (72) hide show
  1. cognite/neat/_client/__init__.py +4 -0
  2. cognite/neat/_client/_api/data_modeling_loaders.py +512 -0
  3. cognite/neat/_client/_api/schema.py +50 -0
  4. cognite/neat/_client/_api_client.py +17 -0
  5. cognite/neat/_client/data_classes/__init__.py +0 -0
  6. cognite/neat/{_utils/cdf/data_classes.py → _client/data_classes/data_modeling.py} +8 -135
  7. cognite/neat/{_rules/models/dms/_schema.py → _client/data_classes/schema.py} +21 -281
  8. cognite/neat/_graph/_shared.py +14 -15
  9. cognite/neat/_graph/extractors/_classic_cdf/_assets.py +14 -154
  10. cognite/neat/_graph/extractors/_classic_cdf/_base.py +154 -7
  11. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +23 -12
  12. cognite/neat/_graph/extractors/_classic_cdf/_data_sets.py +17 -92
  13. cognite/neat/_graph/extractors/_classic_cdf/_events.py +13 -162
  14. cognite/neat/_graph/extractors/_classic_cdf/_files.py +15 -179
  15. cognite/neat/_graph/extractors/_classic_cdf/_labels.py +32 -100
  16. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +27 -178
  17. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +14 -139
  18. cognite/neat/_graph/extractors/_classic_cdf/_timeseries.py +15 -173
  19. cognite/neat/_graph/extractors/_rdf_file.py +6 -7
  20. cognite/neat/_graph/queries/_base.py +17 -1
  21. cognite/neat/_graph/transformers/_classic_cdf.py +50 -134
  22. cognite/neat/_graph/transformers/_prune_graph.py +1 -1
  23. cognite/neat/_graph/transformers/_rdfpath.py +1 -1
  24. cognite/neat/_issues/warnings/__init__.py +6 -0
  25. cognite/neat/_issues/warnings/_external.py +8 -0
  26. cognite/neat/_issues/warnings/_properties.py +16 -0
  27. cognite/neat/_rules/_constants.py +7 -6
  28. cognite/neat/_rules/analysis/_base.py +8 -4
  29. cognite/neat/_rules/exporters/_base.py +3 -4
  30. cognite/neat/_rules/exporters/_rules2dms.py +29 -40
  31. cognite/neat/_rules/importers/_dms2rules.py +4 -5
  32. cognite/neat/_rules/importers/_rdf/_inference2rules.py +25 -33
  33. cognite/neat/_rules/models/__init__.py +1 -1
  34. cognite/neat/_rules/models/_base_rules.py +22 -12
  35. cognite/neat/_rules/models/dms/__init__.py +2 -2
  36. cognite/neat/_rules/models/dms/_exporter.py +15 -20
  37. cognite/neat/_rules/models/dms/_rules.py +48 -3
  38. cognite/neat/_rules/models/dms/_rules_input.py +52 -8
  39. cognite/neat/_rules/models/dms/_validation.py +10 -5
  40. cognite/neat/_rules/models/entities/_single_value.py +32 -4
  41. cognite/neat/_rules/models/information/_rules.py +0 -8
  42. cognite/neat/_rules/models/mapping/__init__.py +2 -3
  43. cognite/neat/_rules/models/mapping/_classic2core.py +36 -146
  44. cognite/neat/_rules/models/mapping/_classic2core.yaml +339 -0
  45. cognite/neat/_rules/transformers/__init__.py +2 -2
  46. cognite/neat/_rules/transformers/_converters.py +110 -11
  47. cognite/neat/_rules/transformers/_mapping.py +105 -30
  48. cognite/neat/_rules/transformers/_verification.py +5 -2
  49. cognite/neat/_session/_base.py +49 -8
  50. cognite/neat/_session/_drop.py +35 -0
  51. cognite/neat/_session/_inspect.py +17 -5
  52. cognite/neat/_session/_mapping.py +39 -0
  53. cognite/neat/_session/_prepare.py +218 -23
  54. cognite/neat/_session/_read.py +49 -12
  55. cognite/neat/_session/_to.py +3 -3
  56. cognite/neat/_store/_base.py +27 -24
  57. cognite/neat/_utils/rdf_.py +28 -1
  58. cognite/neat/_version.py +1 -1
  59. cognite/neat/_workflows/steps/lib/current/rules_exporter.py +8 -3
  60. cognite/neat/_workflows/steps/lib/current/rules_importer.py +4 -1
  61. cognite/neat/_workflows/steps/lib/current/rules_validator.py +3 -2
  62. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/METADATA +3 -3
  63. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/RECORD +67 -64
  64. cognite/neat/_rules/models/mapping/_base.py +0 -131
  65. cognite/neat/_utils/cdf/loaders/__init__.py +0 -25
  66. cognite/neat/_utils/cdf/loaders/_base.py +0 -54
  67. cognite/neat/_utils/cdf/loaders/_data_modeling.py +0 -339
  68. cognite/neat/_utils/cdf/loaders/_ingestion.py +0 -167
  69. /cognite/neat/{_utils/cdf → _client/_api}/__init__.py +0 -0
  70. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/LICENSE +0 -0
  71. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/WHEEL +0 -0
  72. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.0.dist-info}/entry_points.txt +0 -0
@@ -1,26 +1,44 @@
1
+ import copy
1
2
  from collections.abc import Collection
2
3
  from datetime import datetime, timezone
3
- from typing import Literal
4
+ from typing import Literal, cast
4
5
 
5
6
  from cognite.client.data_classes.data_modeling import DataModelIdentifier
6
7
  from rdflib import URIRef
7
8
 
9
+ from cognite.neat._client import NeatClient
10
+ from cognite.neat._constants import DEFAULT_NAMESPACE
11
+ from cognite.neat._graph.transformers import RelationshipToSchemaTransformer
8
12
  from cognite.neat._graph.transformers._rdfpath import MakeConnectionOnExactMatch
9
- from cognite.neat._rules._shared import ReadRules
13
+ from cognite.neat._rules._shared import InputRules, ReadRules
14
+ from cognite.neat._rules.importers import DMSImporter
15
+ from cognite.neat._rules.models import DMSRules
10
16
  from cognite.neat._rules.models.information._rules_input import InformationInputRules
11
- from cognite.neat._rules.transformers import ReduceCogniteModel, ToCompliantEntities, ToExtension
17
+ from cognite.neat._rules.transformers import (
18
+ PrefixEntities,
19
+ ReduceCogniteModel,
20
+ ToCompliantEntities,
21
+ ToExtension,
22
+ VerifyDMSRules,
23
+ )
24
+ from cognite.neat._store._provenance import Agent as ProvenanceAgent
12
25
  from cognite.neat._store._provenance import Change
13
26
 
14
27
  from ._state import SessionState
15
28
  from .exceptions import NeatSessionError, session_class_wrapper
16
29
 
30
+ try:
31
+ from rich import print
32
+ except ImportError:
33
+ ...
34
+
17
35
 
18
36
  @session_class_wrapper
19
37
  class PrepareAPI:
20
- def __init__(self, state: SessionState, verbose: bool) -> None:
38
+ def __init__(self, client: NeatClient | None, state: SessionState, verbose: bool) -> None:
21
39
  self._state = state
22
40
  self._verbose = verbose
23
- self.data_model = DataModelPrepareAPI(state, verbose)
41
+ self.data_model = DataModelPrepareAPI(client, state, verbose)
24
42
  self.instances = InstancePrepareAPI(state, verbose)
25
43
 
26
44
 
@@ -94,34 +112,77 @@ class InstancePrepareAPI:
94
112
  raise NeatSessionError(f"Property {property_} is not defined for type {type_}. Cannot make connection")
95
113
  return type_uri[0], property_uri[0]
96
114
 
115
+ def relationships_as_connections(self, limit: int = 1) -> None:
116
+ """This assumes that you have read a classic CDF knowledge graph including relationships.
117
+
118
+ This transformer analyzes the relationships in the graph and modifies them to be part of the schema
119
+ for Assets, Events, Files, Sequences, and TimeSeries. Relationships without any properties
120
+ are replaced by a simple relationship between the source and target nodes. Relationships with
121
+ properties are replaced by a schema that contains the properties as attributes.
122
+
123
+ Args:
124
+ limit: The minimum number of relationships that need to be present for it
125
+ to be converted into a schema. Default is 1.
126
+
127
+ """
128
+ transformer = RelationshipToSchemaTransformer(limit=limit)
129
+ self._state.instances.store.transform(transformer)
130
+
97
131
 
98
132
  @session_class_wrapper
99
133
  class DataModelPrepareAPI:
100
- def __init__(self, state: SessionState, verbose: bool) -> None:
134
+ def __init__(self, client: NeatClient | None, state: SessionState, verbose: bool) -> None:
135
+ self._client = client
101
136
  self._state = state
102
137
  self._verbose = verbose
103
138
 
104
139
  def cdf_compliant_external_ids(self) -> None:
105
140
  """Convert data model component external ids to CDF compliant entities."""
106
- if input := self._state.data_model.last_info_unverified_rule:
107
- source_id, rules = input
141
+ source_id, rules = self._state.data_model.last_info_unverified_rule
142
+
143
+ start = datetime.now(timezone.utc)
144
+ transformer = ToCompliantEntities()
145
+ output: ReadRules[InformationInputRules] = transformer.transform(rules)
146
+ end = datetime.now(timezone.utc)
147
+
148
+ change = Change.from_rules_activity(
149
+ output,
150
+ transformer.agent,
151
+ start,
152
+ end,
153
+ "Converted external ids to CDF compliant entities",
154
+ self._state.data_model.provenance.source_entity(source_id)
155
+ or self._state.data_model.provenance.target_entity(source_id),
156
+ )
108
157
 
109
- start = datetime.now(timezone.utc)
110
- transformer = ToCompliantEntities()
111
- output: ReadRules[InformationInputRules] = transformer.transform(rules)
112
- end = datetime.now(timezone.utc)
158
+ self._state.data_model.write(output, change)
113
159
 
114
- change = Change.from_rules_activity(
115
- output,
116
- transformer.agent,
117
- start,
118
- end,
119
- "Converted external ids to CDF compliant entities",
120
- self._state.data_model.provenance.source_entity(source_id)
121
- or self._state.data_model.provenance.target_entity(source_id),
122
- )
160
+ def prefix(self, prefix: str) -> None:
161
+ """Prefix all views in the data model with the given prefix.
123
162
 
124
- self._state.data_model.write(output, change)
163
+ Args:
164
+ prefix: The prefix to add to the views in the data model.
165
+
166
+ """
167
+ source_id, rules = self._state.data_model.last_unverified_rule
168
+
169
+ start = datetime.now(timezone.utc)
170
+ transformer = PrefixEntities(prefix)
171
+ new_rules = cast(InputRules, copy.deepcopy(rules.get_rules()))
172
+ output = transformer.transform(new_rules)
173
+ end = datetime.now(timezone.utc)
174
+
175
+ change = Change.from_rules_activity(
176
+ output,
177
+ transformer.agent,
178
+ start,
179
+ end,
180
+ "Added prefix to the data model views",
181
+ self._state.data_model.provenance.source_entity(source_id)
182
+ or self._state.data_model.provenance.target_entity(source_id),
183
+ )
184
+
185
+ self._state.data_model.write(output, change)
125
186
 
126
187
  def to_enterprise(
127
188
  self,
@@ -185,7 +246,7 @@ class DataModelPrepareAPI:
185
246
  data_model_id: DataModelIdentifier,
186
247
  org_name: str = "My",
187
248
  mode: Literal["read", "write"] = "read",
188
- dummy_property: str = "dummy",
249
+ dummy_property: str = "GUID",
189
250
  ) -> None:
190
251
  """Uses the current data model as a basis to create solution data model
191
252
 
@@ -235,6 +296,81 @@ class DataModelPrepareAPI:
235
296
 
236
297
  self._state.data_model.write(output.rules, change)
237
298
 
299
+ def to_data_product(
300
+ self,
301
+ data_model_id: DataModelIdentifier,
302
+ org_name: str = "",
303
+ include: Literal["same-space", "all"] = "same-space",
304
+ ) -> None:
305
+ """Uses the current data model as a basis to create data product data model.
306
+
307
+ A data product model is a data model that ONLY maps to containers and do not use implements. This is
308
+ typically used for defining the data in a data product.
309
+
310
+ Args:
311
+ data_model_id: The data product data model id that is being created.
312
+ org_name: Organization name to use for the views in the new data model.
313
+ include: The views to include in the data product data model. Can be either "same-space" or "all".
314
+ If you set same-space, only the views in the same space as the data model will be included.
315
+ """
316
+ source_id, rules = self._state.data_model.last_verified_dms_rules
317
+
318
+ dms_ref: DMSRules | None = None
319
+ view_ids, container_ids = rules.imported_views_and_containers_ids(include_model_views_with_no_properties=True)
320
+ if view_ids or container_ids:
321
+ if self._client is None:
322
+ raise NeatSessionError(
323
+ "No client provided. You are referencing unknown views and containers in your data model, "
324
+ "NEAT needs a client to lookup the definitions. "
325
+ "Please set the client in the session, NeatSession(client=client)."
326
+ )
327
+ schema = self._client.schema.retrieve(list(view_ids), list(container_ids))
328
+
329
+ importer = DMSImporter(schema)
330
+ reference_rules = importer.to_rules().rules
331
+ if reference_rules is not None:
332
+ imported = VerifyDMSRules("continue").transform(reference_rules)
333
+ if dms_ref := imported.rules:
334
+ rules = rules.model_copy(deep=True)
335
+ if rules.containers is None:
336
+ rules.containers = dms_ref.containers
337
+ else:
338
+ existing_containers = {c.container for c in rules.containers}
339
+ rules.containers.extend(
340
+ [c for c in dms_ref.containers or [] if c.container not in existing_containers]
341
+ )
342
+ existing_views = {v.view for v in rules.views}
343
+ rules.views.extend([v for v in dms_ref.views if v.view not in existing_views])
344
+ existing_properties = {(p.view, p.view_property) for p in rules.properties}
345
+ rules.properties.extend(
346
+ [p for p in dms_ref.properties if (p.view, p.view_property) not in existing_properties]
347
+ )
348
+
349
+ start = datetime.now(timezone.utc)
350
+ transformer = ToExtension(
351
+ new_model_id=data_model_id,
352
+ org_name=org_name,
353
+ type_="data_product",
354
+ include=include,
355
+ )
356
+ output = transformer.transform(rules)
357
+ end = datetime.now(timezone.utc)
358
+
359
+ change = Change.from_rules_activity(
360
+ output,
361
+ transformer.agent,
362
+ start,
363
+ end,
364
+ (
365
+ f"Prepared data model {data_model_id} to be data product model "
366
+ f"on top of {rules.metadata.as_data_model_id()}"
367
+ ),
368
+ self._state.data_model.provenance.source_entity(source_id)
369
+ or self._state.data_model.provenance.target_entity(source_id),
370
+ )
371
+
372
+ self._state.data_model.write(output.rules, change)
373
+
238
374
  def reduce(self, drop: Collection[Literal["3D", "Annotation", "BaseViews"] | str]) -> None:
239
375
  """This is a special method that allow you to drop parts of the data model.
240
376
  This only applies to Cognite Data Models.
@@ -267,3 +403,62 @@ class DataModelPrepareAPI:
267
403
  )
268
404
 
269
405
  self._state.data_model.write(output.rules, change)
406
+
407
+ def include_referenced(self) -> None:
408
+ """Include referenced views and containers in the data model."""
409
+ start = datetime.now(timezone.utc)
410
+
411
+ source_id, rules = self._state.data_model.last_verified_dms_rules
412
+ view_ids, container_ids = rules.imported_views_and_containers_ids(include_model_views_with_no_properties=True)
413
+ if not (view_ids or container_ids):
414
+ print(
415
+ f"Data model {rules.metadata.as_data_model_id()} does not have any referenced views or containers."
416
+ f"that is not already included in the data model."
417
+ )
418
+ return
419
+ if self._client is None:
420
+ raise NeatSessionError(
421
+ "No client provided. You are referencing unknown views and containers in your data model, "
422
+ "NEAT needs a client to lookup the definitions. "
423
+ "Please set the client in the session, NeatSession(client=client)."
424
+ )
425
+ schema = self._client.schema.retrieve(list(view_ids), list(container_ids))
426
+ copy_ = rules.model_copy(deep=True)
427
+ copy_.metadata.version = f"{rules.metadata.version}_completed"
428
+ importer = DMSImporter(schema)
429
+ imported = importer.to_rules()
430
+ if imported.rules is None:
431
+ self._state.data_model.issue_lists.append(imported.issues)
432
+ raise NeatSessionError(
433
+ "Could not import the referenced views and containers. "
434
+ "See `neat.inspect.issues()` for more information."
435
+ )
436
+ verified = VerifyDMSRules("continue", post_validate=False).transform(imported.rules)
437
+ if verified.rules is None:
438
+ self._state.data_model.issue_lists.append(verified.issues)
439
+ raise NeatSessionError(
440
+ "Could not verify the referenced views and containers. "
441
+ "See `neat.inspect.issues()` for more information."
442
+ )
443
+ if copy_.containers is None:
444
+ copy_.containers = verified.rules.containers
445
+ else:
446
+ existing_containers = {c.container for c in copy_.containers}
447
+ copy_.containers.extend(
448
+ [c for c in verified.rules.containers or [] if c.container not in existing_containers]
449
+ )
450
+ existing_views = {v.view for v in copy_.views}
451
+ copy_.views.extend([v for v in verified.rules.views if v.view not in existing_views])
452
+ end = datetime.now(timezone.utc)
453
+
454
+ change = Change.from_rules_activity(
455
+ copy_,
456
+ ProvenanceAgent(id_=DEFAULT_NAMESPACE["agent/"]),
457
+ start,
458
+ end,
459
+ (f"Included referenced views and containers in the data model {rules.metadata.as_data_model_id()}"),
460
+ self._state.data_model.provenance.source_entity(source_id)
461
+ or self._state.data_model.provenance.target_entity(source_id),
462
+ )
463
+
464
+ self._state.data_model.write(copy_, change)
@@ -3,9 +3,9 @@ from datetime import datetime, timezone
3
3
  from pathlib import Path
4
4
  from typing import Any, Literal
5
5
 
6
- from cognite.client import CogniteClient
7
6
  from cognite.client.data_classes.data_modeling import DataModelId, DataModelIdentifier
8
7
 
8
+ from cognite.neat._client import NeatClient
9
9
  from cognite.neat._constants import COGNITE_SPACES
10
10
  from cognite.neat._graph import examples as instances_examples
11
11
  from cognite.neat._graph import extractors
@@ -27,7 +27,7 @@ from .exceptions import NeatSessionError, session_class_wrapper
27
27
 
28
28
  @session_class_wrapper
29
29
  class ReadAPI:
30
- def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
30
+ def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
31
31
  self._state = state
32
32
  self._verbose = verbose
33
33
  self.cdf = CDFReadAPI(state, client, verbose)
@@ -39,7 +39,7 @@ class ReadAPI:
39
39
 
40
40
  @session_class_wrapper
41
41
  class BaseReadAPI:
42
- def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
42
+ def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
43
43
  self._state = state
44
44
  self._verbose = verbose
45
45
  self._client = client
@@ -67,12 +67,12 @@ class BaseReadAPI:
67
67
 
68
68
  @session_class_wrapper
69
69
  class CDFReadAPI(BaseReadAPI):
70
- def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
70
+ def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
71
71
  super().__init__(state, client, verbose)
72
72
  self.classic = CDFClassicAPI(state, client, verbose)
73
73
 
74
74
  @property
75
- def _get_client(self) -> CogniteClient:
75
+ def _get_client(self) -> NeatClient:
76
76
  if self._client is None:
77
77
  raise NeatValueError("No client provided. Please provide a client to read a data model.")
78
78
  return self._client
@@ -113,16 +113,53 @@ class CDFReadAPI(BaseReadAPI):
113
113
  @session_class_wrapper
114
114
  class CDFClassicAPI(BaseReadAPI):
115
115
  @property
116
- def _get_client(self) -> CogniteClient:
116
+ def _get_client(self) -> NeatClient:
117
117
  if self._client is None:
118
118
  raise ValueError("No client provided. Please provide a client to read a data model.")
119
119
  return self._client
120
120
 
121
- def assets(self, root_asset_external_id: str) -> None:
122
- extractor = extractors.AssetsExtractor.from_hierarchy(self._get_client, root_asset_external_id)
121
+ def graph(self, root_asset_external_id: str) -> None:
122
+ """Reads the classic knowledge graph from CDF.
123
+
124
+ The Classic Graph consists of the following core resource type.
125
+
126
+ Classic Node CDF Resources:
127
+ - Assets
128
+ - TimeSeries
129
+ - Sequences
130
+ - Events
131
+ - Files
132
+
133
+ All the classic node CDF resources can have one or more connections to one or more assets. This
134
+ will match a direct relationship in the data modeling of CDF.
135
+
136
+ In addition, you have relationships between the classic node CDF resources. This matches an edge
137
+ in the data modeling of CDF.
138
+
139
+ Finally, you have labels and data sets that to organize the graph. In which data sets have a similar,
140
+ but different, role as a space in data modeling. While labels can be compared to node types in data modeling,
141
+ used to quickly filter and find nodes/edges.
142
+
143
+ This extractor will extract the classic CDF graph into Neat starting from either a data set or a root asset.
144
+
145
+ It works as follows:
146
+
147
+ 1. Extract all core nodes (assets, time series, sequences, events, files) filtered by the given data set or
148
+ root asset.
149
+ 2. Extract all relationships starting from any of the extracted core nodes.
150
+ 3. Extract all core nodes that are targets of the relationships that are not already extracted.
151
+ 4. Extract all labels that are connected to the extracted core nodes/relationships.
152
+ 5. Extract all data sets that are connected to the extracted core nodes/relationships.
153
+
154
+ Args:
155
+ root_asset_external_id: The external id of the root asset
156
+
157
+ """
158
+ extractor = extractors.ClassicGraphExtractor(self._get_client, root_asset_external_id=root_asset_external_id)
159
+
123
160
  self._state.instances.store.write(extractor)
124
161
  if self._verbose:
125
- print(f"Asset hierarchy {root_asset_external_id} read successfully")
162
+ print(f"Classic Graph {root_asset_external_id} read successfully")
126
163
 
127
164
 
128
165
  @session_class_wrapper
@@ -145,7 +182,7 @@ class ExcelReadAPI(BaseReadAPI):
145
182
  description=f"Excel file {reader!s} read as unverified data model",
146
183
  )
147
184
  self._store_rules(input_rules, change)
148
-
185
+ self._state.data_model.issue_lists.append(input_rules.issues)
149
186
  return input_rules.issues
150
187
 
151
188
 
@@ -176,7 +213,7 @@ class YamlReadAPI(BaseReadAPI):
176
213
  "NEAT needs a client to lookup the container definitions. "
177
214
  "Please set the client in the session, NeatSession(client=client)."
178
215
  )
179
- system_containers = self._client.data_modeling.containers.retrieve(system_container_ids)
216
+ system_containers = self._client.loaders.containers.retrieve(system_container_ids)
180
217
  dms_importer.update_referenced_containers(system_containers)
181
218
 
182
219
  importer = dms_importer
@@ -222,7 +259,7 @@ class CSVReadAPI(BaseReadAPI):
222
259
 
223
260
  @session_class_wrapper
224
261
  class RDFReadAPI(BaseReadAPI):
225
- def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
262
+ def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
226
263
  super().__init__(state, client, verbose)
227
264
  self.examples = RDFExamples(state)
228
265
 
@@ -1,9 +1,9 @@
1
1
  from pathlib import Path
2
2
  from typing import Any, Literal, overload
3
3
 
4
- from cognite.client import CogniteClient
5
4
  from cognite.client.data_classes.data_modeling import SpaceApply
6
5
 
6
+ from cognite.neat._client import NeatClient
7
7
  from cognite.neat._graph import loaders
8
8
  from cognite.neat._issues import IssueList, catch_warnings
9
9
  from cognite.neat._rules import exporters
@@ -17,7 +17,7 @@ from .exceptions import NeatSessionError, session_class_wrapper
17
17
 
18
18
  @session_class_wrapper
19
19
  class ToAPI:
20
- def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
20
+ def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
21
21
  self._state = state
22
22
  self._verbose = verbose
23
23
  self.cdf = CDFToAPI(state, client, verbose)
@@ -75,7 +75,7 @@ class ToAPI:
75
75
 
76
76
  @session_class_wrapper
77
77
  class CDFToAPI:
78
- def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
78
+ def __init__(self, state: SessionState, client: NeatClient | None, verbose: bool) -> None:
79
79
  self._client = client
80
80
  self._state = state
81
81
  self._verbose = verbose
@@ -7,10 +7,11 @@ from typing import cast
7
7
 
8
8
  import pandas as pd
9
9
  from pandas import Index
10
- from rdflib import Graph, Namespace, URIRef
10
+ from rdflib import Dataset, Namespace, URIRef
11
11
  from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
12
12
 
13
13
  from cognite.neat._constants import DEFAULT_NAMESPACE
14
+ from cognite.neat._graph._shared import rdflib_to_oxi_type
14
15
  from cognite.neat._graph.extractors import RdfFileExtractor, TripleExtractors
15
16
  from cognite.neat._graph.queries import Queries
16
17
  from cognite.neat._graph.transformers import Transformers
@@ -42,7 +43,7 @@ class NeatGraphStore:
42
43
 
43
44
  def __init__(
44
45
  self,
45
- graph: Graph,
46
+ graph: Dataset,
46
47
  rules: InformationRules | None = None,
47
48
  ):
48
49
  self.rules: InformationRules | None = None
@@ -109,7 +110,7 @@ class NeatGraphStore:
109
110
 
110
111
  @classmethod
111
112
  def from_memory_store(cls, rules: InformationRules | None = None) -> "Self":
112
- return cls(Graph(identifier=DEFAULT_NAMESPACE), rules)
113
+ return cls(Dataset(), rules)
113
114
 
114
115
  @classmethod
115
116
  def from_sparql_store(
@@ -127,7 +128,7 @@ class NeatGraphStore:
127
128
  postAsEncoded=False,
128
129
  autocommit=False,
129
130
  )
130
- graph = Graph(store=store, identifier=DEFAULT_NAMESPACE)
131
+ graph = Dataset(store=store)
131
132
  return cls(graph, rules)
132
133
 
133
134
  @classmethod
@@ -150,9 +151,8 @@ class NeatGraphStore:
150
151
  else:
151
152
  raise Exception("Error initializing Oxigraph store")
152
153
 
153
- graph = Graph(
154
+ graph = Dataset(
154
155
  store=oxrdflib.OxigraphStore(store=oxi_store),
155
- identifier=DEFAULT_NAMESPACE,
156
156
  )
157
157
 
158
158
  return cls(graph, rules)
@@ -162,7 +162,7 @@ class NeatGraphStore:
162
162
  success = True
163
163
 
164
164
  if isinstance(extractor, RdfFileExtractor) and not extractor.issue_list.has_errors:
165
- self._parse_file(extractor.filepath, cast(str, extractor.mime_type), extractor.base_uri)
165
+ self._parse_file(extractor.filepath, cast(str, extractor.format), extractor.base_uri)
166
166
  elif isinstance(extractor, RdfFileExtractor):
167
167
  success = False
168
168
  issue_text = "\n".join([issue.as_message() for issue in extractor.issue_list])
@@ -244,33 +244,36 @@ class NeatGraphStore:
244
244
  def _parse_file(
245
245
  self,
246
246
  filepath: Path,
247
- mime_type: str = "application/rdf+xml",
247
+ format: str = "turtle",
248
248
  base_uri: URIRef | None = None,
249
249
  ) -> None:
250
250
  """Imports graph data from file.
251
251
 
252
252
  Args:
253
253
  filepath : File path to file containing graph data, by default None
254
- mime_type : MIME type of graph data, by default "application/rdf+xml"
255
- base_uri : Add base IRI to graph, by default True
254
+ format : rdflib format file containing RDF graph, by default "turtle"
255
+ base_uri : base URI to add to graph in case of relative URIs, by default None
256
+
257
+ !!! note "Oxigraph store"
258
+ By default we are using non-transactional mode for parsing RDF files.
259
+ This gives us a significant performance boost when importing large RDF files.
260
+ Underhood of rdflib we are triggering oxrdflib plugin which in respect
261
+ calls `bulk_load` method from oxigraph store. See more at:
262
+ https://pyoxigraph.readthedocs.io/en/stable/store.html#pyoxigraph.Store.bulk_load
256
263
  """
257
264
 
258
265
  # Oxigraph store, do not want to type hint this as it is an optional dependency
259
266
  if type(self.graph.store).__name__ == "OxigraphStore":
260
-
261
- def parse_to_oxi_store():
262
- local_import("pyoxigraph", "oxi")
263
- import pyoxigraph
264
-
265
- cast(pyoxigraph.Store, self.graph.store._store).bulk_load(
266
- str(filepath),
267
- mime_type,
268
- base_iri=base_uri,
269
- to_graph=pyoxigraph.NamedNode(self.graph.identifier),
270
- )
271
- cast(pyoxigraph.Store, self.graph.store._store).optimize()
272
-
273
- parse_to_oxi_store()
267
+ local_import("pyoxigraph", "oxi")
268
+
269
+ # this is necessary to trigger rdflib oxigraph plugin
270
+ self.graph.parse(
271
+ filepath,
272
+ format=rdflib_to_oxi_type(format),
273
+ transactional=False,
274
+ publicID=base_uri,
275
+ )
276
+ self.graph.store._store.optimize() # type: ignore[attr-defined]
274
277
 
275
278
  # All other stores
276
279
  else:
@@ -173,7 +173,7 @@ def get_inheritance_path(child: Any, child_parent: dict[Any, list[Any]]) -> list
173
173
  return path
174
174
 
175
175
 
176
- def add_triples_in_batch(graph: Graph, triples: Iterable[Triple], batch_size: int = 10_000):
176
+ def add_triples_in_batch(graph: Graph, triples: Iterable[Triple], batch_size: int = 10_000) -> None:
177
177
  """Adds triples to the graph store in batches.
178
178
 
179
179
  Args:
@@ -204,3 +204,30 @@ def add_triples_in_batch(graph: Graph, triples: Iterable[Triple], batch_size: in
204
204
  check_commit()
205
205
 
206
206
  check_commit(force_commit=True)
207
+
208
+
209
+ def remove_instance_ids_in_batch(graph: Graph, instance_ids: Iterable[URIRef], batch_size: int = 1_000) -> None:
210
+ """Removes all triples related to the given instances in the graph store in batches.
211
+
212
+ Args:
213
+ graph: The graph store to remove triples from
214
+ instance_ids: list of instances to remove triples from
215
+ batch_size: Batch size of triples per commit, by default 10_000
216
+
217
+ """
218
+ batch_count = 0
219
+
220
+ def check_commit(force_commit: bool = False):
221
+ """Commit nodes to the graph if batch counter is reached or if force_commit is True"""
222
+ nonlocal batch_count
223
+ batch_count += 1
224
+ if force_commit or batch_count >= batch_size:
225
+ graph.commit()
226
+ batch_count = 0
227
+ return
228
+
229
+ for instance_id in instance_ids:
230
+ graph.remove((instance_id, None, None))
231
+ check_commit()
232
+
233
+ check_commit(force_commit=True)
cognite/neat/_version.py CHANGED
@@ -1,2 +1,2 @@
1
- __version__ = "0.98.0"
1
+ __version__ = "0.99.0"
2
2
  __engine__ = "^1.0.3"
@@ -2,6 +2,7 @@ import time
2
2
  from pathlib import Path
3
3
  from typing import ClassVar, Literal, cast
4
4
 
5
+ from cognite.neat._client import NeatClient
5
6
  from cognite.neat._issues.errors import WorkflowStepNotInitializedError
6
7
  from cognite.neat._rules import exporters
7
8
  from cognite.neat._rules._shared import DMSRules, InformationRules, VerifiedRules
@@ -100,7 +101,7 @@ class DeleteDataModelFromCDF(Step):
100
101
 
101
102
  report_lines = ["# Data Model Deletion from CDF\n\n"]
102
103
  errors = []
103
- for result in dms_exporter.delete_from_cdf(rules=dms_rules, client=cdf_client, dry_run=dry_run):
104
+ for result in dms_exporter.delete_from_cdf(rules=dms_rules, client=NeatClient(cdf_client), dry_run=dry_run):
104
105
  report_lines.append(str(result))
105
106
  errors.extend(result.error_messages)
106
107
 
@@ -220,7 +221,9 @@ class RulesToDMS(Step):
220
221
 
221
222
  report_lines = ["# DMS Schema Export to CDF\n\n"]
222
223
  errors = []
223
- for result in dms_exporter.export_to_cdf_iterable(rules=dms_rules, client=cdf_client, dry_run=dry_run):
224
+ for result in dms_exporter.export_to_cdf_iterable(
225
+ rules=dms_rules, client=NeatClient(cdf_client), dry_run=dry_run
226
+ ):
224
227
  report_lines.append(str(result))
225
228
  errors.extend(result.error_messages)
226
229
 
@@ -584,7 +587,9 @@ class RulesToCDFTransformations(Step):
584
587
 
585
588
  report_lines = ["# DMS Schema Export to CDF\n\n"]
586
589
  errors = []
587
- for result in dms_exporter.export_to_cdf_iterable(rules=dms_rules, client=cdf_client, dry_run=dry_run):
590
+ for result in dms_exporter.export_to_cdf_iterable(
591
+ rules=dms_rules, client=NeatClient(cdf_client), dry_run=dry_run
592
+ ):
588
593
  report_lines.append(str(result))
589
594
  errors.extend(result.error_messages)
590
595
 
@@ -5,6 +5,7 @@ from typing import ClassVar
5
5
  from cognite.client import CogniteClient
6
6
  from cognite.client.data_classes.data_modeling import DataModelId
7
7
 
8
+ from cognite.neat._client import NeatClient
8
9
  from cognite.neat._issues.errors import WorkflowStepNotInitializedError
9
10
  from cognite.neat._issues.formatters import FORMATTER_BY_NAME
10
11
  from cognite.neat._rules import importers
@@ -299,7 +300,9 @@ class DMSToRules(Step):
299
300
  return FlowMessage(error_text=error_text, step_execution_status=StepExecutionStatus.ABORT_AND_FAIL)
300
301
  ref_model_id = ref_model.as_id()
301
302
 
302
- dms_importer = importers.DMSImporter.from_data_model_id(cdf_client, datamodel_entity.as_id(), ref_model_id)
303
+ dms_importer = importers.DMSImporter.from_data_model_id(
304
+ NeatClient(cdf_client), datamodel_entity.as_id(), ref_model_id
305
+ )
303
306
 
304
307
  # if role is None, it will be inferred from the rules file
305
308
  role = self.configs.get("Role")