cognite-neat 0.110.0__py3-none-any.whl → 0.111.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (53) hide show
  1. cognite/neat/_alpha.py +6 -0
  2. cognite/neat/_client/_api/schema.py +26 -0
  3. cognite/neat/_client/data_classes/schema.py +1 -1
  4. cognite/neat/_constants.py +4 -1
  5. cognite/neat/_graph/extractors/__init__.py +4 -0
  6. cognite/neat/_graph/extractors/_classic_cdf/_base.py +8 -16
  7. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +39 -9
  8. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +23 -17
  9. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +15 -17
  10. cognite/neat/_graph/extractors/_dict.py +102 -0
  11. cognite/neat/_graph/extractors/_dms.py +27 -40
  12. cognite/neat/_graph/extractors/_dms_graph.py +30 -3
  13. cognite/neat/_graph/extractors/_raw.py +67 -0
  14. cognite/neat/_graph/loaders/_base.py +20 -4
  15. cognite/neat/_graph/loaders/_rdf2dms.py +243 -89
  16. cognite/neat/_graph/queries/_base.py +137 -43
  17. cognite/neat/_graph/transformers/_classic_cdf.py +6 -22
  18. cognite/neat/_issues/_factory.py +9 -1
  19. cognite/neat/_issues/errors/__init__.py +2 -0
  20. cognite/neat/_issues/errors/_external.py +7 -0
  21. cognite/neat/_issues/warnings/user_modeling.py +12 -0
  22. cognite/neat/_rules/_constants.py +3 -0
  23. cognite/neat/_rules/analysis/_base.py +29 -50
  24. cognite/neat/_rules/exporters/_rules2excel.py +1 -1
  25. cognite/neat/_rules/importers/_rdf/_inference2rules.py +16 -10
  26. cognite/neat/_rules/models/_base_rules.py +0 -2
  27. cognite/neat/_rules/models/data_types.py +7 -0
  28. cognite/neat/_rules/models/dms/_exporter.py +9 -8
  29. cognite/neat/_rules/models/dms/_rules.py +26 -1
  30. cognite/neat/_rules/models/dms/_rules_input.py +5 -1
  31. cognite/neat/_rules/models/dms/_validation.py +101 -1
  32. cognite/neat/_rules/models/entities/_single_value.py +8 -3
  33. cognite/neat/_rules/models/entities/_wrapped.py +2 -2
  34. cognite/neat/_rules/models/information/_rules_input.py +1 -0
  35. cognite/neat/_rules/models/information/_validation.py +64 -17
  36. cognite/neat/_rules/transformers/_converters.py +7 -2
  37. cognite/neat/_session/_base.py +2 -0
  38. cognite/neat/_session/_explore.py +39 -0
  39. cognite/neat/_session/_inspect.py +25 -6
  40. cognite/neat/_session/_read.py +67 -3
  41. cognite/neat/_session/_set.py +7 -1
  42. cognite/neat/_session/_state.py +6 -0
  43. cognite/neat/_session/_to.py +115 -8
  44. cognite/neat/_store/_graph_store.py +8 -4
  45. cognite/neat/_utils/rdf_.py +34 -3
  46. cognite/neat/_utils/text.py +72 -4
  47. cognite/neat/_utils/upload.py +2 -0
  48. cognite/neat/_version.py +2 -2
  49. {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/METADATA +1 -1
  50. {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/RECORD +53 -50
  51. {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/LICENSE +0 -0
  52. {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/WHEEL +0 -0
  53. {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/entry_points.txt +0 -0
@@ -1,8 +1,9 @@
1
+ import urllib.parse
1
2
  from collections import defaultdict
2
3
  from collections.abc import Iterable
3
- from typing import Literal, cast, overload
4
+ from typing import Any, Literal, cast, overload
4
5
 
5
- from rdflib import RDF, Dataset, Graph, Namespace, URIRef
6
+ from rdflib import RDF, XSD, Dataset, Graph, Namespace, URIRef
6
7
  from rdflib import Literal as RdfLiteral
7
8
  from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
8
9
  from rdflib.query import ResultRow
@@ -27,9 +28,8 @@ class Queries:
27
28
  """Get named graph from the dataset to query over"""
28
29
  return self.dataset.graph(named_graph or self.default_named_graph)
29
30
 
30
- def summarize_instances(self, named_graph: URIRef | None = None) -> list[tuple]:
31
+ def summarize_instances(self, named_graph: URIRef | None = None) -> list[tuple[str, int]]:
31
32
  """Summarize instances in the graph store by class and count"""
32
-
33
33
  query_statement = """ SELECT ?class (COUNT(?instance) AS ?instanceCount)
34
34
  WHERE {
35
35
  ?instance a ?class .
@@ -37,12 +37,12 @@ class Queries:
37
37
  GROUP BY ?class
38
38
  ORDER BY DESC(?instanceCount) """
39
39
 
40
- return [
40
+ return [ # type: ignore[misc]
41
41
  (
42
- remove_namespace_from_uri(cast(URIRef, cast(tuple, res)[0])),
43
- cast(RdfLiteral, cast(tuple, res)[1]).value,
42
+ remove_namespace_from_uri(cast(URIRef, class_)),
43
+ cast(RdfLiteral, count).value,
44
44
  )
45
- for res in list(self.graph(named_graph=named_graph).query(query_statement))
45
+ for class_, count in self.graph(named_graph=named_graph).query(query_statement)
46
46
  ]
47
47
 
48
48
  def types(self, named_graph: URIRef | None = None) -> dict[URIRef, str]:
@@ -73,6 +73,20 @@ class Queries:
73
73
  for (type_,) in list(self.graph(named_graph).query(query))
74
74
  }
75
75
 
76
+ def properties_by_type(self, named_graph: URIRef | None = None) -> dict[URIRef, dict[URIRef, str]]:
77
+ """Properties and their short form in the graph by type
78
+
79
+ Args:
80
+ named_graph: Named graph to query over, default None (default graph)
81
+
82
+ """
83
+ query = """SELECT DISTINCT ?type ?property
84
+ WHERE {?s a ?type . ?s ?property ?o . FILTER(?property != rdf:type)}"""
85
+ properties_by_type: dict[URIRef, dict[URIRef, str]] = defaultdict(dict)
86
+ for type_, property_ in cast(ResultRow, list(self.graph(named_graph).query(query))):
87
+ properties_by_type[type_][property_] = remove_namespace_from_uri(property_) # type: ignore[index]
88
+ return properties_by_type
89
+
76
90
  def property_uri(self, property_: str, named_graph: URIRef | None = None) -> list[URIRef]:
77
91
  """Get the URIRef of a property
78
92
 
@@ -82,41 +96,38 @@ class Queries:
82
96
  """
83
97
  return [k for k, v in self.properties(named_graph).items() if v == property_]
84
98
 
85
- def list_instances_ids_of_class(
99
+ @overload
100
+ def list_instances_ids(
101
+ self, class_uri: None = None, limit: int = -1, named_graph: URIRef | None = None
102
+ ) -> Iterable[tuple[URIRef, URIRef]]: ...
103
+
104
+ @overload
105
+ def list_instances_ids(
86
106
  self, class_uri: URIRef, limit: int = -1, named_graph: URIRef | None = None
87
- ) -> list[URIRef]:
88
- """Get instances ids for a given class
107
+ ) -> Iterable[URIRef]: ...
108
+
109
+ def list_instances_ids(
110
+ self, class_uri: URIRef | None = None, limit: int = -1, named_graph: URIRef | None = None
111
+ ) -> Iterable[URIRef] | Iterable[tuple[URIRef, URIRef]]:
112
+ """List all instance IDs
89
113
 
90
114
  Args:
91
- class_uri: Class for which instances are to be found
115
+ class_uri: Class for which instances are to be found, default None (all instances)
92
116
  limit: Max number of instances to return, by default -1 meaning all instances
93
117
  named_graph: Named graph to query over, default None (default graph)
94
118
 
95
119
  Returns:
96
120
  List of class instance URIs
97
121
  """
98
- query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .} LIMIT X".replace(
99
- "class", class_uri
100
- ).replace("LIMIT X", "" if limit == -1 else f"LIMIT {limit}")
101
- return [cast(tuple, res)[0] for res in list(self.graph(named_graph).query(query_statement))]
102
-
103
- def list_instances_of_type(self, class_uri: URIRef, named_graph: URIRef | None = None) -> list[ResultRow]:
104
- """Get all triples for instances of a given class
105
-
106
- Args:
107
- class_uri: Class for which instances are to be found
108
- named_graph: Named graph to query over, default None (default graph)
109
-
110
- Returns:
111
- List of triples for instances of the given class in the named graph
112
- """
113
- query = (
114
- f"SELECT ?instance ?prop ?value "
115
- f"WHERE {{ ?instance rdf:type <{class_uri}> . ?instance ?prop ?value . }} order by ?instance "
116
- )
117
-
118
- # Select queries gives an iterable of result rows
119
- return cast(list[ResultRow], list(self.graph(named_graph).query(query)))
122
+ query = "SELECT DISTINCT ?subject"
123
+ if class_uri:
124
+ query += f" WHERE {{ ?subject a <{class_uri}> .}}"
125
+ else:
126
+ query += " ?type WHERE {{ ?subject a ?type .}}"
127
+ if limit != -1:
128
+ query += f" LIMIT {limit}"
129
+ # MyPy is not very happy with RDFLib, so just ignore the type hinting here
130
+ return (tuple(res) if class_uri is None else res[0] for res in self.graph(named_graph).query(query)) # type: ignore[index, return-value, arg-type]
120
131
 
121
132
  def type_with_property(self, type_: URIRef, property_uri: URIRef, named_graph: URIRef | None = None) -> bool:
122
133
  """Check if a property exists in the graph store
@@ -168,7 +179,8 @@ class Queries:
168
179
  instance_type: URIRef | None = None,
169
180
  property_renaming_config: dict | None = None,
170
181
  named_graph: URIRef | None = None,
171
- ) -> tuple[str, dict[str | InstanceType, list[str]]] | None:
182
+ remove_uri_namespace: bool = True,
183
+ ) -> tuple[URIRef, dict[str | InstanceType, list[Any]]] | None:
172
184
  """DESCRIBE instance for a given class from the graph store
173
185
 
174
186
  Args:
@@ -176,13 +188,13 @@ class Queries:
176
188
  instance_type: Type of the instance, default None (will be inferred from triples)
177
189
  property_renaming_config: Dictionary to rename properties, default None (no renaming)
178
190
  named_graph: Named graph to query over, default None (default graph)
191
+ remove_uri_namespace: Whether to remove the namespace from the URI, by default True
179
192
 
180
193
 
181
194
  Returns:
182
195
  Dictionary of instance properties
183
196
  """
184
- property_values: dict[str, list[str]] = defaultdict(list)
185
- identifier = remove_namespace_from_uri(instance_id, validation="prefix")
197
+ property_values: dict[str, list[str] | list[URIRef]] = defaultdict(list)
186
198
  for _, predicate, object_ in cast(list[ResultRow], self.graph(named_graph).query(f"DESCRIBE <{instance_id}>")):
187
199
  if object_.lower() in [
188
200
  "",
@@ -205,29 +217,37 @@ class Queries:
205
217
  property_ = RDF.type
206
218
  renamed_property_ = property_
207
219
 
208
- if isinstance(object_, URIRef):
220
+ value: Any
221
+ if isinstance(object_, URIRef) and remove_uri_namespace:
222
+ # These properties contain the space in the Namespace.
209
223
  value = remove_namespace_from_uri(object_, validation="prefix")
224
+ elif isinstance(object_, URIRef):
225
+ value = object_
210
226
  elif isinstance(object_, RdfLiteral):
211
- value = object_.toPython()
227
+ if object_.datatype == XSD._NS["json"]:
228
+ # For JSON literals, the .toPython() returns a Literal object.
229
+ value = str(object_)
230
+ else:
231
+ value = object_.toPython()
212
232
  else:
213
233
  # It is a blank node
214
234
  value = str(object_)
215
235
 
216
236
  # add type to the dictionary
217
237
  if predicate != RDF.type:
218
- property_values[renamed_property_].append(value)
238
+ property_values[renamed_property_].append(value) # type: ignore[arg-type]
219
239
  else:
220
240
  # guarding against multiple rdf:type values as this is not allowed in CDF
221
241
  if RDF.type not in property_values:
222
242
  property_values[RDF.type].append(
223
- remove_namespace_from_uri(instance_type, validation="prefix") if instance_type else value
243
+ remove_namespace_from_uri(instance_type, validation="prefix") if instance_type else value # type: ignore[arg-type]
224
244
  )
225
245
  else:
226
246
  # we should not have multiple rdf:type values
227
247
  continue
228
248
  if property_values:
229
249
  return (
230
- identifier,
250
+ instance_id,
231
251
  property_values,
232
252
  )
233
253
  else:
@@ -327,7 +347,7 @@ class Queries:
327
347
  """
328
348
  dropped_types: dict[URIRef, int] = {}
329
349
  for t in type_:
330
- instance_ids = self.list_instances_ids_of_class(t)
350
+ instance_ids = list(self.list_instances_ids(t))
331
351
  dropped_types[t] = len(instance_ids)
332
352
  remove_instance_ids_in_batch(self.graph(named_graph), instance_ids)
333
353
  return dropped_types
@@ -359,6 +379,80 @@ class Queries:
359
379
  query = f"SELECT (COUNT(?instance) AS ?instanceCount) WHERE {{ ?instance a <{class_uri}> }}"
360
380
  return int(next(iter(self.graph(named_graph).query(query)))[0]) # type: ignore[arg-type, index]
361
381
 
382
+ def types_with_instance_and_property_count(
383
+ self, remove_namespace: bool = True, named_graph: URIRef | None = None
384
+ ) -> list[dict[str, Any]]:
385
+ query = """
386
+ SELECT ?type (COUNT(DISTINCT ?instance) AS ?instanceCount) (COUNT(DISTINCT ?property) AS ?propertyCount)
387
+ WHERE {
388
+ ?instance a ?type .
389
+ ?instance ?property ?value .
390
+ FILTER(?property != rdf:type)
391
+ }
392
+ GROUP BY ?type
393
+ ORDER BY DESC(?instanceCount)"""
394
+ return [
395
+ {
396
+ "type": urllib.parse.unquote(remove_namespace_from_uri(type_)) if remove_namespace else type_,
397
+ "instanceCount": cast(RdfLiteral, instance_count).toPython(),
398
+ "propertyCount": cast(RdfLiteral, property_count).toPython(),
399
+ }
400
+ for type_, instance_count, property_count in list(
401
+ cast(list[ResultRow], self.graph(named_graph).query(query))
402
+ )
403
+ ]
404
+
405
+ def properties_with_count(
406
+ self, remove_namespace: bool = True, named_graph: URIRef | None = None
407
+ ) -> list[dict[str, Any]]:
408
+ instance_count_by_type = {
409
+ entry["type"]: entry["instanceCount"]
410
+ for entry in self.types_with_instance_and_property_count(remove_namespace=False, named_graph=named_graph)
411
+ }
412
+ query = """SELECT ?type ?property (COUNT(DISTINCT ?instance) AS ?instanceCount)
413
+ WHERE {
414
+ ?instance a ?type .
415
+ ?instance ?property ?value .
416
+ FILTER(?property != rdf:type)
417
+ }
418
+ GROUP BY ?type ?property
419
+ ORDER BY ASC(?type) ASC(?property)"""
420
+ return [
421
+ {
422
+ "type": urllib.parse.unquote(remove_namespace_from_uri(type_)) if remove_namespace else type_,
423
+ "property": urllib.parse.unquote(remove_namespace_from_uri(property)) if remove_namespace else property,
424
+ "instanceCount": cast(RdfLiteral, instance_count).toPython(),
425
+ "total": instance_count_by_type[type_],
426
+ }
427
+ for type_, property, instance_count in list(cast(list[ResultRow], self.graph(named_graph).query(query)))
428
+ ]
429
+
430
+ @overload
431
+ def instances_with_properties(
432
+ self, type: URIRef, remove_namespace: Literal[False], named_graph: URIRef | None = None
433
+ ) -> dict[URIRef, set[URIRef]]: ...
434
+
435
+ @overload
436
+ def instances_with_properties(
437
+ self, type: URIRef, remove_namespace: Literal[True], named_graph: URIRef | None = None
438
+ ) -> dict[str, set[str]]: ...
439
+
440
+ def instances_with_properties(
441
+ self, type: URIRef, remove_namespace: bool = True, named_graph: URIRef | None = None
442
+ ) -> dict[str, set[str]] | dict[URIRef, set[URIRef]]:
443
+ query = """SELECT DISTINCT ?instance ?property
444
+ WHERE {{
445
+ ?instance a <{type}> .
446
+ ?instance ?property ?value .
447
+ FILTER(?property != rdf:type)
448
+ }}"""
449
+ result = defaultdict(set)
450
+ for instance, property_ in cast(Iterable[ResultRow], self.graph(named_graph).query(query.format(type=type))):
451
+ instance_str = urllib.parse.unquote(remove_namespace_from_uri(instance)) if remove_namespace else instance
452
+ property_str = urllib.parse.unquote(remove_namespace_from_uri(property_)) if remove_namespace else property_
453
+ result[instance_str].add(property_str)
454
+ return result
455
+
362
456
  def list_instances_ids_by_space(
363
457
  self, space_property: URIRef, named_graph: URIRef | None = None
364
458
  ) -> Iterable[tuple[URIRef, str]]:
@@ -240,50 +240,36 @@ class AssetRelationshipConnector(BaseTransformerStandardised):
240
240
  str(extractors.RelationshipsExtractor.__name__),
241
241
  }
242
242
  )
243
- _asset_template: str = """SELECT ?source ?target WHERE {{
244
- <{relationship_id}> <{relationship_source_xid_prop}> ?source_xid .
245
- ?source <{asset_xid_property}> ?source_xid .
246
- ?source a <{asset_type}> .
247
-
248
- <{relationship_id}> <{relationship_target_xid_prop}> ?target_xid .
249
- ?target <{asset_xid_property}> ?target_xid .
250
- ?target a <{asset_type}> .}}"""
251
243
 
252
244
  def _count_query(self) -> str:
253
- query = """SELECT (COUNT(?target) as ?count) WHERE {{
245
+ query = """SELECT (COUNT(?target_xid) as ?count) WHERE {{
254
246
  ?relationship a <{relationship_type}> .
255
247
  ?relationship <{relationship_source_xid_prop}> ?source_xid .
256
- ?source <{asset_xid_property}> ?source_xid .
257
- ?source a <{asset_type}> .
248
+ ?source_xid a <{asset_type}> .
258
249
 
259
250
  ?relationship <{relationship_target_xid_prop}> ?target_xid .
260
- ?target <{asset_xid_property}> ?target_xid .
261
- ?target a <{asset_type}> .}}"""
251
+ ?target_xid a <{asset_type}> .}}"""
262
252
 
263
253
  return query.format(
264
254
  relationship_type=self.relationship_type,
265
255
  relationship_source_xid_prop=self.relationship_source_xid_prop,
266
256
  relationship_target_xid_prop=self.relationship_target_xid_prop,
267
- asset_xid_property=self.asset_xid_property,
268
257
  asset_type=self.asset_type,
269
258
  )
270
259
 
271
260
  def _iterate_query(self) -> str:
272
- query = """SELECT ?source ?relationship ?target WHERE {{
261
+ query = """SELECT ?source_xid ?relationship ?target_xid WHERE {{
273
262
  ?relationship a <{relationship_type}> .
274
263
  ?relationship <{relationship_source_xid_prop}> ?source_xid .
275
- ?source <{asset_xid_property}> ?source_xid .
276
- ?source a <{asset_type}> .
264
+ ?source_xid a <{asset_type}> .
277
265
 
278
266
  ?relationship <{relationship_target_xid_prop}> ?target_xid .
279
- ?target <{asset_xid_property}> ?target_xid .
280
- ?target a <{asset_type}> .}}"""
267
+ ?target_xid a <{asset_type}> .}}"""
281
268
 
282
269
  return query.format(
283
270
  relationship_type=self.relationship_type,
284
271
  relationship_source_xid_prop=self.relationship_source_xid_prop,
285
272
  relationship_target_xid_prop=self.relationship_target_xid_prop,
286
- asset_xid_property=self.asset_xid_property,
287
273
  asset_type=self.asset_type,
288
274
  )
289
275
 
@@ -293,13 +279,11 @@ class AssetRelationshipConnector(BaseTransformerStandardised):
293
279
  relationship_type: URIRef | None = None,
294
280
  relationship_source_xid_prop: URIRef | None = None,
295
281
  relationship_target_xid_prop: URIRef | None = None,
296
- asset_xid_property: URIRef | None = None,
297
282
  ):
298
283
  self.asset_type = asset_type or DEFAULT_NAMESPACE.Asset
299
284
  self.relationship_type = relationship_type or DEFAULT_NAMESPACE.Relationship
300
285
  self.relationship_source_xid_prop = relationship_source_xid_prop or DEFAULT_NAMESPACE.sourceExternalId
301
286
  self.relationship_target_xid_prop = relationship_target_xid_prop or DEFAULT_NAMESPACE.targetExternalId
302
- self.asset_xid_property = asset_xid_property or DEFAULT_NAMESPACE.externalId
303
287
 
304
288
  def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
305
289
  row_output = RowTransformationOutput()
@@ -35,7 +35,15 @@ def from_warning(warning: WarningMessage) -> NeatWarning:
35
35
  def _from_pydantic_error(error: ErrorDetails, read_info_by_sheet: dict[str, SpreadsheetRead]) -> NeatError:
36
36
  neat_error = _create_neat_value_error(error)
37
37
  location = error["loc"]
38
- return SpreadsheetError.create(location, neat_error, read_info_by_sheet.get(cast(str, location[0])))
38
+
39
+ # only errors caused in model_validate will have location information
40
+ if location:
41
+ return SpreadsheetError.create(location, neat_error, read_info_by_sheet.get(cast(str, location[0])))
42
+
43
+ # errors that occur while for example parsing spreadsheet in input rules
44
+ # will not have location information so we return neat_error as is
45
+ # this is workaround until more elegant solution is found
46
+ return neat_error
39
47
 
40
48
 
41
49
  def _create_neat_value_error(error: ErrorDetails) -> NeatValueError:
@@ -3,6 +3,7 @@ from cognite.neat._issues._base import NeatError, _get_subclasses
3
3
  from ._external import (
4
4
  AuthorizationError,
5
5
  CDFMissingClientError,
6
+ CDFMissingResourcesError,
6
7
  FileMissingRequiredFieldError,
7
8
  FileNotAFileError,
8
9
  FileNotFoundNeatError,
@@ -45,6 +46,7 @@ from ._wrapper import (
45
46
  __all__ = [
46
47
  "AuthorizationError",
47
48
  "CDFMissingClientError",
49
+ "CDFMissingResourcesError",
48
50
  "ClassValueError",
49
51
  "ContainerValueError",
50
52
  "EnumValueError",
@@ -80,3 +80,10 @@ class CDFMissingClientError(NeatError, RuntimeError):
80
80
  """CDF client is required: {reason}"""
81
81
 
82
82
  reason: str
83
+
84
+
85
+ @dataclass(unsafe_hash=True)
86
+ class CDFMissingResourcesError(NeatError, RuntimeError):
87
+ """Following CDF resources are missing: {resources}"""
88
+
89
+ resources: str
@@ -20,9 +20,21 @@ __all__ = [
20
20
  "NodeTypeFilterOnParentViewWarning",
21
21
  "NotNeatSupportedFilterWarning",
22
22
  "ParentInDifferentSpaceWarning",
23
+ "ViewsAndDataModelNotInSameSpaceWarning",
23
24
  ]
24
25
 
25
26
 
27
+ @dataclass(unsafe_hash=True)
28
+ class ViewsAndDataModelNotInSameSpaceWarning(UserModelingWarning):
29
+ """The data model is in {data_model_space}, while views are in {views_spaces} space(s).
30
+ This is strongly discouraged as it can lead to confusion and unnecessary complexity.
31
+ """
32
+
33
+ fix = "Ensure that views and data model are in the same space and have same version"
34
+ data_model_space: str
35
+ views_spaces: str
36
+
37
+
26
38
  @dataclass(unsafe_hash=True)
27
39
  class DirectRelationMissingSourceWarning(UserModelingWarning):
28
40
  """The view {view_id}.{prop_name} is a direct relation without a source.
@@ -117,6 +117,9 @@ SPLIT_ON_COMMA_PATTERN = re.compile(r",(?![^(]*\))")
117
117
  # This pattern ignores equal signs inside brackets
118
118
  SPLIT_ON_EQUAL_PATTERN = re.compile(r"=(?![^(]*\))")
119
119
 
120
+ # Very special Edge Entity parsing
121
+ SPLIT_ON_EDGE_ENTITY_ARGS_PATTERN = re.compile(r"(\btype\b|\bproperties\b|\bdirection\b)\s*=\s*([^,]+)")
122
+
120
123
 
121
124
  class _Patterns:
122
125
  @cached_property
@@ -487,43 +487,26 @@ class RulesAnalysis:
487
487
 
488
488
  rules = self.dms
489
489
 
490
- # Views with properties or used as ValueType
491
- # If a view is not used in properties or as ValueType, it is not added to the graph
492
- # as we typically do not have the properties for it.
493
- used_views = {prop_.view for prop_ in rules.properties} | {
494
- prop_.value_type for prop_ in rules.properties if isinstance(prop_.value_type, ViewEntity)
495
- }
496
-
497
490
  # Add nodes and edges from Views sheet
498
491
  for view in rules.views:
499
- if view.view not in used_views:
500
- continue
501
- # if possible use human-readable label coming from the view name
502
- if not di_graph.has_node(view.view.suffix):
503
- di_graph.add_node(view.view.suffix, label=view.view.suffix)
504
-
505
- if format == "implements" and view.implements:
506
- for implement in view.implements:
507
- if not di_graph.has_node(implement.suffix):
508
- di_graph.add_node(implement.suffix, label=implement.suffix)
509
-
510
- di_graph.add_edge(
511
- view.view.suffix,
512
- implement.suffix,
513
- label="implements",
514
- dashes=True,
515
- )
492
+ di_graph.add_node(view.view.suffix, label=view.view.suffix)
493
+
494
+ if format == "implements" and view.implements:
495
+ for implement in view.implements:
496
+ di_graph.add_node(implement.suffix, label=implement.suffix)
497
+ di_graph.add_edge(
498
+ view.view.suffix,
499
+ implement.suffix,
500
+ label="implements",
501
+ dashes=True,
502
+ )
516
503
 
517
504
  if format == "data-model":
518
505
  # Add nodes and edges from Properties sheet
519
506
  for prop_ in rules.properties:
520
507
  if prop_.connection and isinstance(prop_.value_type, ViewEntity):
521
- if not di_graph.has_node(prop_.view.suffix):
522
- di_graph.add_node(prop_.view.suffix, label=prop_.view.suffix)
523
-
524
- if not di_graph.has_node(prop_.value_type.suffix):
525
- di_graph.add_node(prop_.value_type.suffix, label=prop_.value_type.suffix)
526
-
508
+ di_graph.add_node(prop_.view.suffix, label=prop_.view.suffix)
509
+ di_graph.add_node(prop_.value_type.suffix, label=prop_.value_type.suffix)
527
510
  di_graph.add_edge(
528
511
  prop_.view.suffix,
529
512
  prop_.value_type.suffix,
@@ -541,32 +524,28 @@ class RulesAnalysis:
541
524
  # Add nodes and edges from Views sheet
542
525
  for class_ in rules.classes:
543
526
  # if possible use human readable label coming from the view name
544
- if not di_graph.has_node(class_.class_.suffix):
545
- di_graph.add_node(
546
- class_.class_.suffix,
547
- label=class_.name or class_.class_.suffix,
548
- )
549
527
 
550
- if format == "implements" and class_.implements:
551
- for parent in class_.implements:
552
- if not di_graph.has_node(parent.suffix):
553
- di_graph.add_node(parent.suffix, label=parent.suffix)
554
- di_graph.add_edge(
555
- class_.class_.suffix,
556
- parent.suffix,
557
- label="implements",
558
- dashes=True,
559
- )
528
+ di_graph.add_node(
529
+ class_.class_.suffix,
530
+ label=class_.name or class_.class_.suffix,
531
+ )
532
+
533
+ if format == "implements" and class_.implements:
534
+ for parent in class_.implements:
535
+ di_graph.add_node(parent.suffix, label=parent.suffix)
536
+ di_graph.add_edge(
537
+ class_.class_.suffix,
538
+ parent.suffix,
539
+ label="implements",
540
+ dashes=True,
541
+ )
560
542
 
561
543
  if format == "data-model":
562
544
  # Add nodes and edges from Properties sheet
563
545
  for prop_ in rules.properties:
564
546
  if isinstance(prop_.value_type, ClassEntity) and not isinstance(prop_.value_type, UnknownEntity):
565
- if not di_graph.has_node(prop_.class_.suffix):
566
- di_graph.add_node(prop_.class_.suffix, label=prop_.class_.suffix)
567
-
568
- if not di_graph.has_node(prop_.value_type.suffix):
569
- di_graph.add_node(prop_.value_type.suffix, label=prop_.value_type.suffix)
547
+ di_graph.add_node(prop_.class_.suffix, label=prop_.class_.suffix)
548
+ di_graph.add_node(prop_.value_type.suffix, label=prop_.value_type.suffix)
570
549
 
571
550
  di_graph.add_edge(
572
551
  prop_.class_.suffix,
@@ -55,7 +55,7 @@ class ExcelExporter(BaseExporter[VerifiedRules, Workbook]):
55
55
  Style = Literal["none", "minimal", "default", "maximal"]
56
56
  DumpOptions = Literal["user", "last", "reference"]
57
57
  _main_header_by_sheet_name: ClassVar[dict[str, str]] = {
58
- "Properties": "Definition of Properties per Class",
58
+ "Properties": "Definition of Properties",
59
59
  "Classes": "Definition of Classes",
60
60
  "Views": "Definition of Views",
61
61
  "Containers": "Definition of Containers",
@@ -10,6 +10,7 @@ from cognite.client import data_modeling as dm
10
10
  from rdflib import RDF, RDFS, Graph, Namespace, URIRef
11
11
  from rdflib import Literal as RdfLiteral
12
12
 
13
+ from cognite.neat._config import GLOBAL_CONFIG
13
14
  from cognite.neat._constants import NEAT, get_default_prefixes_and_namespaces
14
15
  from cognite.neat._issues import IssueList
15
16
  from cognite.neat._issues.warnings import PropertyValueTypeUndefinedWarning
@@ -27,6 +28,7 @@ from cognite.neat._store import NeatGraphStore
27
28
  from cognite.neat._store._provenance import INSTANCES_ENTITY
28
29
  from cognite.neat._utils.collection_ import iterate_progress_bar
29
30
  from cognite.neat._utils.rdf_ import remove_namespace_from_uri, uri_to_short_form
31
+ from cognite.neat._utils.text import NamingStandardization
30
32
 
31
33
  from ._base import DEFAULT_NON_EXISTING_NODE_TYPE, BaseRDFImporter
32
34
 
@@ -403,7 +405,7 @@ class SubclassInferenceImporter(BaseRDFImporter):
403
405
  else:
404
406
  existing_classes = {}
405
407
  classes: list[InformationInputClass] = []
406
- properties_by_class_suffix_by_property_id_lowered: dict[str, dict[str, InformationInputProperty]] = {}
408
+ properties_by_class_suffix_by_property_id: dict[str, dict[str, InformationInputProperty]] = {}
407
409
 
408
410
  # Help for IDE
409
411
  type_uri: URIRef
@@ -455,7 +457,8 @@ class SubclassInferenceImporter(BaseRDFImporter):
455
457
  continue
456
458
  property_id = remove_namespace_from_uri(property_uri)
457
459
  self._add_uri_namespace_to_prefixes(property_uri, prefixes)
458
- if existing_prop := properties_by_id.get(property_id.casefold()):
460
+ property_id_standardized = NamingStandardization.standardize_property_str(property_uri)
461
+ if existing_prop := properties_by_id.get(property_id_standardized):
459
462
  if not isinstance(existing_prop.instance_source, list):
460
463
  existing_prop.instance_source = (
461
464
  [existing_prop.instance_source] if existing_prop.instance_source else []
@@ -463,29 +466,28 @@ class SubclassInferenceImporter(BaseRDFImporter):
463
466
  existing_prop.instance_source.append(property_uri)
464
467
  continue
465
468
  else:
466
- properties_by_id[property_id.casefold()] = self._create_property(
469
+ properties_by_id[property_id_standardized] = self._create_property(
467
470
  read_properties, class_suffix, property_uri, property_id, prefixes
468
471
  )
469
- properties_by_class_suffix_by_property_id_lowered[class_suffix] = properties_by_id
472
+ properties_by_class_suffix_by_property_id[class_suffix] = properties_by_id
470
473
  if parent_suffix:
471
474
  properties_by_id = {}
472
475
  for property_uri, read_properties in shared_properties.items():
473
476
  property_id = remove_namespace_from_uri(property_uri)
474
477
  self._add_uri_namespace_to_prefixes(property_uri, prefixes)
475
- if existing_prop := properties_by_id.get(property_id.casefold()):
478
+ property_id_standardized = NamingStandardization.standardize_property_str(property_uri)
479
+ if existing_prop := properties_by_id.get(property_id_standardized):
476
480
  if not isinstance(existing_prop.instance_source, list):
477
481
  existing_prop.instance_source = (
478
482
  [existing_prop.instance_source] if existing_prop.instance_source else []
479
483
  )
480
484
  existing_prop.instance_source.append(property_uri)
481
485
  else:
482
- properties_by_id[property_uri.casefold()] = self._create_property(
486
+ properties_by_id[property_id_standardized] = self._create_property(
483
487
  read_properties, parent_suffix, property_uri, property_id, prefixes
484
488
  )
485
489
  return classes, [
486
- prop
487
- for properties in properties_by_class_suffix_by_property_id_lowered.values()
488
- for prop in properties.values()
490
+ prop for properties in properties_by_class_suffix_by_property_id.values() for prop in properties.values()
489
491
  ]
490
492
 
491
493
  @staticmethod
@@ -522,7 +524,11 @@ class SubclassInferenceImporter(BaseRDFImporter):
522
524
  existing_classes = {}
523
525
  properties_by_class_by_subclass: list[_ReadProperties] = []
524
526
  existing_class: InformationClass | None
525
- for type_uri, instance_count in count_by_type.items():
527
+ total_instance_count = sum(count_by_type.values())
528
+ iterable = count_by_type.items()
529
+ if GLOBAL_CONFIG.use_iterate_bar_threshold and total_instance_count > GLOBAL_CONFIG.use_iterate_bar_threshold:
530
+ iterable = iterate_progress_bar(iterable, len(count_by_type), "Inferring types...") # type: ignore[assignment]
531
+ for type_uri, instance_count in iterable:
526
532
  property_query = self._properties_query.format(type=type_uri, unknown_type=NEAT.UnknownType)
527
533
  class_suffix = remove_namespace_from_uri(type_uri)
528
534
  if (existing_class := existing_classes.get(class_suffix)) and existing_class.instance_source is None:
@@ -126,7 +126,6 @@ class SchemaModel(BaseModel):
126
126
  extra="ignore",
127
127
  use_enum_values=True,
128
128
  )
129
- validators_to_skip: set[str] = Field(default_factory=set, exclude=True)
130
129
 
131
130
  @classmethod
132
131
  def mandatory_fields(cls, use_alias=False) -> set[str]:
@@ -256,7 +255,6 @@ class BaseRules(SchemaModel, ABC):
256
255
 
257
256
  Args:
258
257
  metadata: Data model metadata
259
- validators_to_skip: List of validators to skip. Defaults to []
260
258
  """
261
259
 
262
260
  metadata: BaseMetadata