cognite-neat 0.109.4__py3-none-any.whl → 0.111.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (88) hide show
  1. cognite/neat/_alpha.py +8 -0
  2. cognite/neat/_client/_api/schema.py +43 -1
  3. cognite/neat/_client/data_classes/schema.py +4 -4
  4. cognite/neat/_constants.py +15 -1
  5. cognite/neat/_graph/extractors/__init__.py +4 -0
  6. cognite/neat/_graph/extractors/_classic_cdf/_base.py +8 -16
  7. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +48 -19
  8. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +23 -17
  9. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +15 -17
  10. cognite/neat/_graph/extractors/_dict.py +102 -0
  11. cognite/neat/_graph/extractors/_dms.py +27 -40
  12. cognite/neat/_graph/extractors/_dms_graph.py +30 -3
  13. cognite/neat/_graph/extractors/_iodd.py +3 -3
  14. cognite/neat/_graph/extractors/_mock_graph_generator.py +9 -7
  15. cognite/neat/_graph/extractors/_raw.py +67 -0
  16. cognite/neat/_graph/loaders/_base.py +20 -4
  17. cognite/neat/_graph/loaders/_rdf2dms.py +476 -383
  18. cognite/neat/_graph/queries/_base.py +163 -133
  19. cognite/neat/_graph/transformers/__init__.py +1 -3
  20. cognite/neat/_graph/transformers/_classic_cdf.py +6 -22
  21. cognite/neat/_graph/transformers/_rdfpath.py +2 -49
  22. cognite/neat/_issues/__init__.py +1 -6
  23. cognite/neat/_issues/_base.py +21 -252
  24. cognite/neat/_issues/_contextmanagers.py +46 -0
  25. cognite/neat/_issues/_factory.py +69 -0
  26. cognite/neat/_issues/errors/__init__.py +20 -4
  27. cognite/neat/_issues/errors/_external.py +7 -0
  28. cognite/neat/_issues/errors/_wrapper.py +81 -3
  29. cognite/neat/_issues/formatters.py +4 -4
  30. cognite/neat/_issues/warnings/__init__.py +3 -2
  31. cognite/neat/_issues/warnings/_properties.py +8 -0
  32. cognite/neat/_issues/warnings/user_modeling.py +12 -0
  33. cognite/neat/_rules/_constants.py +12 -0
  34. cognite/neat/_rules/_shared.py +3 -2
  35. cognite/neat/_rules/analysis/__init__.py +2 -3
  36. cognite/neat/_rules/analysis/_base.py +430 -259
  37. cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
  38. cognite/neat/_rules/exporters/_rules2excel.py +3 -9
  39. cognite/neat/_rules/exporters/_rules2instance_template.py +2 -2
  40. cognite/neat/_rules/exporters/_rules2ontology.py +5 -4
  41. cognite/neat/_rules/importers/_base.py +2 -47
  42. cognite/neat/_rules/importers/_dms2rules.py +7 -10
  43. cognite/neat/_rules/importers/_dtdl2rules/dtdl_importer.py +2 -2
  44. cognite/neat/_rules/importers/_rdf/_inference2rules.py +66 -26
  45. cognite/neat/_rules/importers/_rdf/_shared.py +1 -1
  46. cognite/neat/_rules/importers/_spreadsheet2rules.py +12 -9
  47. cognite/neat/_rules/models/_base_rules.py +0 -2
  48. cognite/neat/_rules/models/data_types.py +7 -0
  49. cognite/neat/_rules/models/dms/_exporter.py +9 -8
  50. cognite/neat/_rules/models/dms/_rules.py +29 -2
  51. cognite/neat/_rules/models/dms/_rules_input.py +9 -1
  52. cognite/neat/_rules/models/dms/_validation.py +115 -5
  53. cognite/neat/_rules/models/entities/_loaders.py +1 -1
  54. cognite/neat/_rules/models/entities/_multi_value.py +2 -2
  55. cognite/neat/_rules/models/entities/_single_value.py +8 -3
  56. cognite/neat/_rules/models/entities/_wrapped.py +2 -2
  57. cognite/neat/_rules/models/information/_rules.py +18 -17
  58. cognite/neat/_rules/models/information/_rules_input.py +3 -1
  59. cognite/neat/_rules/models/information/_validation.py +66 -17
  60. cognite/neat/_rules/transformers/__init__.py +8 -2
  61. cognite/neat/_rules/transformers/_converters.py +234 -44
  62. cognite/neat/_rules/transformers/_verification.py +5 -10
  63. cognite/neat/_session/_base.py +6 -4
  64. cognite/neat/_session/_explore.py +39 -0
  65. cognite/neat/_session/_inspect.py +25 -6
  66. cognite/neat/_session/_prepare.py +12 -0
  67. cognite/neat/_session/_read.py +88 -20
  68. cognite/neat/_session/_set.py +7 -1
  69. cognite/neat/_session/_show.py +11 -123
  70. cognite/neat/_session/_state.py +6 -2
  71. cognite/neat/_session/_subset.py +64 -0
  72. cognite/neat/_session/_to.py +177 -19
  73. cognite/neat/_store/_graph_store.py +9 -246
  74. cognite/neat/_utils/rdf_.py +36 -5
  75. cognite/neat/_utils/spreadsheet.py +44 -1
  76. cognite/neat/_utils/text.py +124 -37
  77. cognite/neat/_utils/upload.py +2 -0
  78. cognite/neat/_version.py +2 -2
  79. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/METADATA +1 -1
  80. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/RECORD +83 -82
  81. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/WHEEL +1 -1
  82. cognite/neat/_graph/queries/_construct.py +0 -187
  83. cognite/neat/_graph/queries/_shared.py +0 -173
  84. cognite/neat/_rules/analysis/_dms.py +0 -57
  85. cognite/neat/_rules/analysis/_information.py +0 -249
  86. cognite/neat/_rules/models/_rdfpath.py +0 -372
  87. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/LICENSE +0 -0
  88. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,102 @@
1
+ import json
2
+ import urllib.parse
3
+ from collections.abc import Callable, Iterable, Mapping, Set
4
+ from typing import Any
5
+
6
+ from cognite.client import data_modeling as dm
7
+ from cognite.client.data_classes.data_modeling.instances import Instance
8
+ from rdflib import XSD, Literal, Namespace, URIRef
9
+
10
+ from cognite.neat._shared import Triple
11
+ from cognite.neat._utils.auxiliary import string_to_ideal_type
12
+
13
+ from ._base import BaseExtractor
14
+
15
+ DEFAULT_EMPTY_VALUES = frozenset({"nan", "null", "none", "", " ", "nil", "n/a", "na", "unknown", "undefined"})
16
+
17
+
18
+ class DictExtractor(BaseExtractor):
19
+ def __init__(
20
+ self,
21
+ id_: URIRef,
22
+ data: Mapping[str, Any],
23
+ namespace: Namespace,
24
+ uri_ref_keys: set[str] | None = None,
25
+ empty_values: Set[str] = DEFAULT_EMPTY_VALUES,
26
+ str_to_ideal_type: bool = False,
27
+ unpack_json: bool = False,
28
+ ) -> None:
29
+ self.id_ = id_
30
+ self.namespace = namespace
31
+ self.data = data
32
+ self.uri_ref_keys = uri_ref_keys or set()
33
+ self.empty_values = empty_values
34
+ self.str_to_ideal_type = str_to_ideal_type
35
+ self.unpack_json = unpack_json
36
+
37
+ def extract(self) -> Iterable[Triple]:
38
+ for key, value in self.data.items():
39
+ for predicate_str, object_ in self._get_predicate_objects_pair(key, value, self.unpack_json):
40
+ yield self.id_, self.namespace[urllib.parse.quote(predicate_str)], object_
41
+
42
+ def _get_predicate_objects_pair(
43
+ self, key: str, value: Any, unpack_json: bool
44
+ ) -> Iterable[tuple[str, Literal | URIRef]]:
45
+ if key in self.uri_ref_keys and not isinstance(value, dict | list):
46
+ yield key, URIRef(self.namespace[urllib.parse.quote(value)])
47
+ if isinstance(value, str | float | bool | int):
48
+ yield key, Literal(value)
49
+ elif isinstance(value, dict) and unpack_json:
50
+ yield from self._unpack_json(value)
51
+ elif isinstance(value, dict):
52
+ # This object is a json object.
53
+ yield key, Literal(json.dumps(value), datatype=XSD._NS["json"])
54
+ elif isinstance(value, list):
55
+ for item in value:
56
+ yield from self._get_predicate_objects_pair(key, item, False)
57
+
58
+ def _unpack_json(self, value: dict, parent: str | None = None) -> Iterable[tuple[str, Literal | URIRef]]:
59
+ for sub_key, sub_value in value.items():
60
+ key = f"{parent}_{sub_key}" if parent else sub_key
61
+ if isinstance(sub_value, str):
62
+ if sub_value.casefold() in self.empty_values:
63
+ continue
64
+ if self.str_to_ideal_type:
65
+ yield key, Literal(string_to_ideal_type(sub_value))
66
+ else:
67
+ yield key, Literal(sub_value)
68
+ elif isinstance(sub_value, int | float | bool):
69
+ yield key, Literal(sub_value)
70
+ elif isinstance(sub_value, dict):
71
+ yield from self._unpack_json(sub_value, key)
72
+ elif isinstance(sub_value, list):
73
+ for no, item in enumerate(sub_value, 1):
74
+ if isinstance(item, dict):
75
+ yield from self._unpack_json(item, f"{key}_{no}")
76
+ else:
77
+ yield from self._get_predicate_objects_pair(key, item, self.unpack_json)
78
+ else:
79
+ yield key, Literal(str(sub_value))
80
+
81
+
82
+ class DMSPropertyExtractor(DictExtractor):
83
+ def __init__(
84
+ self,
85
+ id_: URIRef,
86
+ data: Mapping[str, Any],
87
+ namespace: Namespace,
88
+ as_uri_ref: Callable[[Instance | dm.DirectRelationReference], URIRef],
89
+ empty_values: Set[str] = DEFAULT_EMPTY_VALUES,
90
+ str_to_ideal_type: bool = False,
91
+ unpack_json: bool = False,
92
+ ) -> None:
93
+ super().__init__(id_, data, namespace, None, empty_values, str_to_ideal_type, unpack_json)
94
+ self.as_uri_ref = as_uri_ref
95
+
96
+ def _get_predicate_objects_pair(
97
+ self, key: str, value: Any, unpack_json: bool
98
+ ) -> Iterable[tuple[str, Literal | URIRef]]:
99
+ if isinstance(value, dict) and "space" in value and "externalId" in value:
100
+ yield key, self.as_uri_ref(dm.DirectRelationReference.load(value))
101
+ else:
102
+ yield from super()._get_predicate_objects_pair(key, value, unpack_json)
@@ -6,20 +6,18 @@ from typing import cast
6
6
  from cognite.client import CogniteClient
7
7
  from cognite.client import data_modeling as dm
8
8
  from cognite.client.data_classes.data_modeling import DataModelIdentifier
9
- from cognite.client.data_classes.data_modeling.instances import Instance, PropertyValue
9
+ from cognite.client.data_classes.data_modeling.instances import Instance, InstanceSort
10
10
  from cognite.client.utils.useful_types import SequenceNotStr
11
- from rdflib import RDF, XSD, Literal, Namespace, URIRef
11
+ from rdflib import RDF, Literal, Namespace, URIRef
12
12
 
13
13
  from cognite.neat._config import GLOBAL_CONFIG
14
14
  from cognite.neat._constants import DEFAULT_SPACE_URI, is_readonly_property
15
15
  from cognite.neat._issues.errors import ResourceRetrievalError
16
16
  from cognite.neat._shared import Triple
17
- from cognite.neat._utils.auxiliary import string_to_ideal_type
18
17
  from cognite.neat._utils.collection_ import iterate_progress_bar
19
18
 
20
19
  from ._base import BaseExtractor
21
-
22
- DEFAULT_EMPTY_VALUES = frozenset({"nan", "null", "none", "", " ", "nil", "n/a", "na", "unknown", "undefined"})
20
+ from ._dict import DEFAULT_EMPTY_VALUES, DMSPropertyExtractor
23
21
 
24
22
 
25
23
  class DMSExtractor(BaseExtractor):
@@ -188,39 +186,15 @@ class DMSExtractor(BaseExtractor):
188
186
 
189
187
  for view_id, properties in instance.properties.items():
190
188
  namespace = self._get_namespace(view_id.space)
191
- for key, value in properties.items():
192
- for predicate_str, object_ in self._get_predicate_objects_pair(key, value):
193
- yield id_, namespace[urllib.parse.quote(predicate_str)], object_
194
-
195
- def _get_predicate_objects_pair(self, key: str, value: PropertyValue) -> Iterable[tuple[str, Literal | URIRef]]:
196
- if isinstance(value, str | float | bool | int):
197
- yield key, Literal(value)
198
- elif isinstance(value, dict) and "space" in value and "externalId" in value:
199
- yield key, self._as_uri_ref(dm.DirectRelationReference.load(value))
200
- elif isinstance(value, dict) and self.unpack_json:
201
- for sub_key, sub_value in value.items():
202
- if isinstance(sub_value, str):
203
- if sub_value.casefold() in self.empty_values:
204
- continue
205
- if self.str_to_ideal_type:
206
- yield sub_key, Literal(string_to_ideal_type(sub_value))
207
- else:
208
- yield sub_key, Literal(sub_value)
209
- elif isinstance(sub_value, int | float | bool):
210
- yield sub_key, Literal(sub_value)
211
- elif isinstance(sub_value, dict):
212
- yield from self._get_predicate_objects_pair(f"{key}_{sub_key}", sub_value)
213
- elif isinstance(sub_value, list):
214
- for item in sub_value:
215
- yield from self._get_predicate_objects_pair(f"{key}_{sub_key}", item)
216
- else:
217
- yield sub_key, Literal(str(sub_value))
218
- elif isinstance(value, dict):
219
- # This object is a json object.
220
- yield key, Literal(str(value), datatype=XSD._NS["json"])
221
- elif isinstance(value, list):
222
- for item in value:
223
- yield from self._get_predicate_objects_pair(key, item)
189
+ yield from DMSPropertyExtractor(
190
+ id_,
191
+ properties,
192
+ namespace,
193
+ self._as_uri_ref,
194
+ self.empty_values,
195
+ self.str_to_ideal_type,
196
+ self.unpack_json,
197
+ ).extract()
224
198
 
225
199
  def _as_uri_ref(self, instance: Instance | dm.DirectRelationReference) -> URIRef:
226
200
  return self._get_namespace(instance.space)[urllib.parse.quote(instance.external_id)]
@@ -270,8 +244,16 @@ class _ViewInstanceIterator(Iterable[Instance]):
270
244
  }
271
245
  # All nodes and edges with properties
272
246
  if self.view.used_for in ("node", "all"):
247
+ # Without a sort, the sort is implicitly by the internal id, as cursoring needs a stable sort.
248
+ # By making the sort be on external_id, Postgres should pick the index
249
+ # that's on (project_id, space, external_id)
250
+ # WHERE deleted_at IS NULL. In other words, avoiding soft deleted instances.
273
251
  node_iterable: Iterable[Instance] = self.client.data_modeling.instances(
274
- chunk_size=None, instance_type="node", sources=[view_id], space=self.instance_space
252
+ chunk_size=None,
253
+ instance_type="node",
254
+ sources=[view_id],
255
+ space=self.instance_space,
256
+ sort=InstanceSort(["node", "externalId"]),
275
257
  )
276
258
  if read_only_properties:
277
259
  node_iterable = self._remove_read_only_properties(node_iterable, read_only_properties, view_id)
@@ -279,7 +261,11 @@ class _ViewInstanceIterator(Iterable[Instance]):
279
261
 
280
262
  if self.view.used_for in ("edge", "all"):
281
263
  yield from self.client.data_modeling.instances(
282
- chunk_size=None, instance_type="edge", sources=[view_id], space=self.instance_space
264
+ chunk_size=None,
265
+ instance_type="edge",
266
+ sources=[view_id],
267
+ space=self.instance_space,
268
+ sort=InstanceSort(["edge", "externalId"]),
283
269
  )
284
270
 
285
271
  for prop in self.view.properties.values():
@@ -294,6 +280,7 @@ class _ViewInstanceIterator(Iterable[Instance]):
294
280
  ["edge", "type"], {"space": prop.type.space, "externalId": prop.type.external_id}
295
281
  ),
296
282
  space=self.instance_space,
283
+ sort=InstanceSort(["edge", "externalId"]),
297
284
  )
298
285
 
299
286
  @staticmethod
@@ -12,6 +12,8 @@ from cognite.neat._issues.warnings import CDFAuthWarning, ResourceNotFoundWarnin
12
12
  from cognite.neat._rules.importers import DMSImporter
13
13
  from cognite.neat._rules.models import DMSRules, InformationRules
14
14
  from cognite.neat._rules.models.data_types import Json
15
+ from cognite.neat._rules.models.entities import UnknownEntity
16
+ from cognite.neat._rules.models.information import InformationProperty
15
17
  from cognite.neat._rules.transformers import DMSToInformation, VerifyDMSRules
16
18
  from cognite.neat._shared import Triple
17
19
 
@@ -131,7 +133,6 @@ class DMSGraphExtractor(KnowledgeGraphExtractor):
131
133
  yield from DMSExtractor.from_views(
132
134
  self._client,
133
135
  views,
134
- overwrite_namespace=self._namespace,
135
136
  instance_space=self._instance_space,
136
137
  unpack_json=self._unpack_json,
137
138
  str_to_ideal_type=self._str_to_ideal_type,
@@ -186,8 +187,12 @@ class DMSGraphExtractor(KnowledgeGraphExtractor):
186
187
  prop
187
188
  for prop in dms_rules.properties
188
189
  if not (
189
- isinstance(prop.value_type, Json)
190
- or (isinstance(prop.value_type, str) and prop.value_type == json_name)
190
+ (
191
+ isinstance(prop.value_type, Json)
192
+ or (isinstance(prop.value_type, str) and prop.value_type == json_name)
193
+ )
194
+ # We are not unpacking list of JSONs.
195
+ and prop.is_list is not True
191
196
  )
192
197
  ]
193
198
 
@@ -195,5 +200,27 @@ class DMSGraphExtractor(KnowledgeGraphExtractor):
195
200
  # Any errors occur will be raised and caught outside the extractor.
196
201
  verified_dms = VerifyDMSRules(client=self._client).transform(unverified_dms)
197
202
  information_rules = DMSToInformation(self._namespace).transform(verified_dms)
203
+
204
+ # We need to sync the metadata between the two rules, such that the `.sync_with_info_rules` method works.
205
+ information_rules.metadata.physical = verified_dms.metadata.identifier
206
+ verified_dms.metadata.logical = information_rules.metadata.identifier
207
+ verified_dms.sync_with_info_rules(information_rules)
208
+
209
+ # Adding startNode and endNode to the information rules for views that are used for edges.
210
+ classes_by_prefix = {cls_.class_.prefix: cls_ for cls_ in information_rules.classes}
211
+ for view in self._model_views:
212
+ if view.used_for == "edge" and view.external_id in classes_by_prefix:
213
+ cls_ = classes_by_prefix[view.external_id]
214
+ for property_ in ("startNode", "endNode"):
215
+ information_rules.properties.append(
216
+ InformationProperty(
217
+ class_=cls_.class_,
218
+ property_=property_,
219
+ value_type=UnknownEntity(),
220
+ min_count=0,
221
+ max_count=1,
222
+ )
223
+ )
224
+
198
225
  self._issues.extend(issues)
199
226
  return information_rules, verified_dms
@@ -14,7 +14,7 @@ from cognite.neat._graph.extractors._base import BaseExtractor
14
14
  from cognite.neat._issues.errors import FileReadError, NeatValueError
15
15
  from cognite.neat._shared import Triple
16
16
  from cognite.neat._utils.rdf_ import remove_namespace_from_uri
17
- from cognite.neat._utils.text import to_camel
17
+ from cognite.neat._utils.text import to_camel_case
18
18
  from cognite.neat._utils.xml_ import get_children
19
19
 
20
20
  IODD = Namespace("http://www.io-link.com/IODD/2010/10/")
@@ -170,7 +170,7 @@ class IODDExtractor(BaseExtractor):
170
170
  ):
171
171
  if text_id := child[0].attrib.get("textId"):
172
172
  # Create connection from device to textId node
173
- element_tag = to_camel(element_tag)
173
+ element_tag = to_camel_case(element_tag)
174
174
  triples.append((id, IODD[element_tag], namespace[text_id]))
175
175
 
176
176
  return triples
@@ -216,7 +216,7 @@ class IODDExtractor(BaseExtractor):
216
216
  if id := element.attrib.get("id"):
217
217
  if id in cls.std_variable_elements_to_extract:
218
218
  if object := element.attrib.get("defaultValue"):
219
- predicate = to_camel(id.replace("V_", ""))
219
+ predicate = to_camel_case(id.replace("V_", ""))
220
220
  triples.append((device_id, IODD[predicate], Literal(object)))
221
221
  return triples
222
222
 
@@ -12,11 +12,12 @@ import pandas as pd
12
12
  from rdflib import RDF, Literal, Namespace, URIRef
13
13
 
14
14
  from cognite.neat._rules._constants import EntityTypes
15
- from cognite.neat._rules.analysis import InformationAnalysis
15
+ from cognite.neat._rules.analysis import RulesAnalysis
16
16
  from cognite.neat._rules.models import DMSRules, InformationRules
17
17
  from cognite.neat._rules.models.data_types import DataType
18
18
  from cognite.neat._rules.models.entities import ClassEntity
19
19
  from cognite.neat._rules.models.information import InformationProperty
20
+ from cognite.neat._rules.transformers import SubsetInformationRules
20
21
  from cognite.neat._shared import Triple
21
22
  from cognite.neat._utils.rdf_ import remove_namespace_from_uri
22
23
 
@@ -54,7 +55,7 @@ class MockGraphGenerator(BaseExtractor):
54
55
 
55
56
  if not class_count:
56
57
  self.class_count = {
57
- class_: 1 for class_ in InformationAnalysis(self.rules).defined_classes(consider_inheritance=True)
58
+ class_: 1 for class_ in RulesAnalysis(self.rules).defined_classes(include_ancestors=True)
58
59
  }
59
60
  elif all(isinstance(key, str) for key in class_count.keys()):
60
61
  self.class_count = {
@@ -104,7 +105,8 @@ def generate_triples(
104
105
  """
105
106
 
106
107
  namespace = rules.metadata.namespace
107
- defined_classes = InformationAnalysis(rules).defined_classes(consider_inheritance=True)
108
+ analysis = RulesAnalysis(rules)
109
+ defined_classes = analysis.defined_classes(include_ancestors=True)
108
110
 
109
111
  if non_existing_classes := set(class_count.keys()) - defined_classes:
110
112
  msg = f"Class count contains classes {non_existing_classes} for which properties are not defined in Data Model!"
@@ -118,16 +120,16 @@ def generate_triples(
118
120
 
119
121
  # Subset data model to only classes that are defined in class count
120
122
  rules = (
121
- InformationAnalysis(rules).subset_rules(set(class_count.keys()))
123
+ SubsetInformationRules(classes=set(class_count.keys())).transform(rules)
122
124
  if defined_classes != set(class_count.keys())
123
125
  else rules
124
126
  )
125
127
 
126
- class_linkage = InformationAnalysis(rules).class_linkage().to_pandas()
128
+ class_linkage = analysis.class_linkage().to_pandas()
127
129
 
128
130
  # Remove one of symmetric pairs from class linkage to maintain proper linking
129
131
  # among instances of symmetrically linked classes
130
- if sym_pairs := InformationAnalysis(rules).symmetrically_connected_classes():
132
+ if sym_pairs := analysis.symmetrically_connected_classes():
131
133
  class_linkage = _remove_higher_occurring_sym_pair(class_linkage, sym_pairs)
132
134
 
133
135
  # Remove any of symmetric pairs containing classes that are not present class count
@@ -137,7 +139,7 @@ def generate_triples(
137
139
  generation_order = _prettify_generation_order(_get_generation_order(class_linkage))
138
140
 
139
141
  # Generated simple view of data model
140
- class_property_pairs = InformationAnalysis(rules).classes_with_properties(consider_inheritance=True)
142
+ class_property_pairs = analysis.properties_by_class(include_ancestors=True)
141
143
 
142
144
  # pregenerate instance ids for each remaining class
143
145
  instance_ids = {
@@ -0,0 +1,67 @@
1
+ import urllib.parse
2
+ from collections.abc import Iterable, Set
3
+ from typing import Any, cast
4
+
5
+ from cognite.client.data_classes import Row, RowList
6
+ from cognite.client.utils.useful_types import SequenceNotStr
7
+ from rdflib import RDF, Namespace, URIRef
8
+
9
+ from cognite.neat._client import NeatClient
10
+ from cognite.neat._constants import DEFAULT_RAW_URI
11
+ from cognite.neat._shared import Triple
12
+
13
+ from ._base import BaseExtractor
14
+ from ._dict import DEFAULT_EMPTY_VALUES, DictExtractor
15
+
16
+
17
+ class RAWExtractor(BaseExtractor):
18
+ def __init__(
19
+ self,
20
+ client: NeatClient,
21
+ db_name: str,
22
+ table_name: str,
23
+ table_type: str | None = None,
24
+ foreign_keys: str | SequenceNotStr[str] | None = None,
25
+ namespace: Namespace | None = None,
26
+ empty_values: Set[str] = DEFAULT_EMPTY_VALUES,
27
+ str_to_ideal_type: bool = False,
28
+ unpack_json: bool = False,
29
+ ) -> None:
30
+ self.client = client
31
+ self.db_name = db_name
32
+ self.table_name = table_name
33
+ self.table_type = table_type
34
+ self.foreign_keys = {foreign_keys} if isinstance(foreign_keys, str) else set(foreign_keys or [])
35
+ self.namespace = namespace or Namespace(DEFAULT_RAW_URI)
36
+ self.empty_values = empty_values
37
+ self.str_to_ideal_type = str_to_ideal_type
38
+ self.unpack_json = unpack_json
39
+
40
+ @property
41
+ def _rdf_type(self) -> URIRef:
42
+ return self.namespace[urllib.parse.quote(self.table_type or self.table_name)]
43
+
44
+ def extract(self) -> Iterable[Triple]:
45
+ for row in self.client.raw.rows(self.db_name, self.table_name, partitions=10, chunk_size=None):
46
+ if isinstance(row, Row):
47
+ yield from self._row2triples(row)
48
+ elif isinstance(row, RowList):
49
+ # Bug in SDK returning row list with chunk_size= None
50
+ for item in row:
51
+ yield from self._row2triples(item)
52
+
53
+ def _row2triples(self, row: Row) -> Iterable[Triple]:
54
+ # The row is always set. It is just the PySDK that have it as str | None
55
+ key, data = cast(tuple[str, dict[str, Any]], (row.key, row.columns))
56
+ identifier = self.namespace[urllib.parse.quote(key)]
57
+ yield identifier, RDF.type, self._rdf_type
58
+
59
+ yield from DictExtractor(
60
+ identifier,
61
+ data,
62
+ self.namespace,
63
+ self.foreign_keys,
64
+ self.empty_values,
65
+ self.str_to_ideal_type,
66
+ self.unpack_json,
67
+ ).extract()
@@ -20,6 +20,11 @@ T_Output = TypeVar("T_Output")
20
20
  class _END_OF_CLASS: ...
21
21
 
22
22
 
23
+ class _START_OF_CLASS:
24
+ def __init__(self, class_name: str | None = None):
25
+ self.class_name = class_name
26
+
27
+
23
28
  class BaseLoader(ABC, Generic[T_Output]):
24
29
  _new_line = "\n"
25
30
  _encoding = "utf-8"
@@ -33,10 +38,16 @@ class BaseLoader(ABC, Generic[T_Output]):
33
38
 
34
39
  def load(self, stop_on_exception: bool = False) -> Iterable[T_Output | NeatIssue]:
35
40
  """Load the graph with data."""
36
- return (item for item in self._load(stop_on_exception) if item is not _END_OF_CLASS) # type: ignore[misc]
41
+ return (
42
+ item # type: ignore[misc]
43
+ for item in self._load(stop_on_exception)
44
+ if not (item is _END_OF_CLASS or isinstance(item, _START_OF_CLASS))
45
+ )
37
46
 
38
47
  @abstractmethod
39
- def _load(self, stop_on_exception: bool = False) -> Iterable[T_Output | NeatIssue | type[_END_OF_CLASS]]:
48
+ def _load(
49
+ self, stop_on_exception: bool = False
50
+ ) -> Iterable[T_Output | NeatIssue | type[_END_OF_CLASS] | _START_OF_CLASS]:
40
51
  """Load the graph with data."""
41
52
  pass
42
53
 
@@ -75,21 +86,25 @@ class CDFLoader(BaseLoader[T_Output]):
75
86
 
76
87
  issues = IssueList()
77
88
  items: list[T_Output] = []
89
+ last_class_name: str | None = None
78
90
  for result in self._load(stop_on_exception=False):
79
91
  if isinstance(result, NeatIssue):
80
92
  issues.append(result)
81
93
  elif result is _END_OF_CLASS:
82
94
  ...
95
+ elif isinstance(result, _START_OF_CLASS):
96
+ last_class_name = result.class_name
97
+ continue
83
98
  else:
84
99
  # MyPy does not understand that 'else' means the item will be of type T_Output
85
100
  items.append(result) # type: ignore[arg-type]
86
101
 
87
102
  if len(items) >= self._UPLOAD_BATCH_SIZE or result is _END_OF_CLASS:
88
- yield from self._upload_to_cdf(client, items, dry_run, issues)
103
+ yield from self._upload_to_cdf(client, items, dry_run, issues, last_class_name)
89
104
  issues = IssueList()
90
105
  items = []
91
106
  if items:
92
- yield from self._upload_to_cdf(client, items, dry_run, issues)
107
+ yield from self._upload_to_cdf(client, items, dry_run, issues, last_class_name)
93
108
 
94
109
  @abstractmethod
95
110
  def _get_required_capabilities(self) -> list[Capability]:
@@ -102,5 +117,6 @@ class CDFLoader(BaseLoader[T_Output]):
102
117
  items: list[T_Output],
103
118
  dry_run: bool,
104
119
  read_issues: IssueList,
120
+ class_name: str | None = None,
105
121
  ) -> Iterable[UploadResult]:
106
122
  raise NotImplementedError