cognite-neat 0.107.0__py3-none-any.whl → 0.109.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (69) hide show
  1. cognite/neat/_constants.py +35 -1
  2. cognite/neat/_graph/_shared.py +4 -0
  3. cognite/neat/_graph/extractors/_classic_cdf/_base.py +115 -14
  4. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +87 -6
  5. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +48 -12
  6. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +19 -1
  7. cognite/neat/_graph/extractors/_dms.py +162 -47
  8. cognite/neat/_graph/extractors/_dms_graph.py +54 -4
  9. cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
  10. cognite/neat/_graph/extractors/_rdf_file.py +3 -2
  11. cognite/neat/_graph/loaders/__init__.py +1 -3
  12. cognite/neat/_graph/loaders/_rdf2dms.py +20 -10
  13. cognite/neat/_graph/queries/_base.py +144 -84
  14. cognite/neat/_graph/queries/_construct.py +1 -1
  15. cognite/neat/_graph/transformers/__init__.py +3 -1
  16. cognite/neat/_graph/transformers/_base.py +4 -4
  17. cognite/neat/_graph/transformers/_classic_cdf.py +13 -13
  18. cognite/neat/_graph/transformers/_prune_graph.py +3 -3
  19. cognite/neat/_graph/transformers/_rdfpath.py +3 -4
  20. cognite/neat/_graph/transformers/_value_type.py +71 -13
  21. cognite/neat/_issues/errors/__init__.py +2 -0
  22. cognite/neat/_issues/errors/_external.py +8 -0
  23. cognite/neat/_issues/errors/_resources.py +1 -1
  24. cognite/neat/_issues/warnings/__init__.py +0 -2
  25. cognite/neat/_issues/warnings/_models.py +1 -1
  26. cognite/neat/_issues/warnings/_properties.py +0 -8
  27. cognite/neat/_issues/warnings/_resources.py +1 -1
  28. cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
  29. cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
  30. cognite/neat/_rules/exporters/_rules2yaml.py +1 -1
  31. cognite/neat/_rules/importers/__init__.py +3 -1
  32. cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
  33. cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
  34. cognite/neat/_rules/importers/_rdf/_base.py +2 -2
  35. cognite/neat/_rules/importers/_rdf/_inference2rules.py +310 -26
  36. cognite/neat/_rules/models/_base_rules.py +22 -11
  37. cognite/neat/_rules/models/dms/_exporter.py +5 -4
  38. cognite/neat/_rules/models/dms/_rules.py +1 -8
  39. cognite/neat/_rules/models/dms/_rules_input.py +4 -0
  40. cognite/neat/_rules/models/information/_rules_input.py +5 -0
  41. cognite/neat/_rules/transformers/__init__.py +10 -3
  42. cognite/neat/_rules/transformers/_base.py +6 -1
  43. cognite/neat/_rules/transformers/_converters.py +530 -364
  44. cognite/neat/_rules/transformers/_mapping.py +4 -4
  45. cognite/neat/_session/_base.py +100 -47
  46. cognite/neat/_session/_create.py +133 -0
  47. cognite/neat/_session/_drop.py +60 -2
  48. cognite/neat/_session/_fix.py +28 -0
  49. cognite/neat/_session/_inspect.py +22 -7
  50. cognite/neat/_session/_mapping.py +8 -8
  51. cognite/neat/_session/_prepare.py +3 -247
  52. cognite/neat/_session/_read.py +138 -17
  53. cognite/neat/_session/_set.py +50 -1
  54. cognite/neat/_session/_show.py +16 -43
  55. cognite/neat/_session/_state.py +53 -52
  56. cognite/neat/_session/_to.py +11 -4
  57. cognite/neat/_session/_wizard.py +1 -1
  58. cognite/neat/_session/exceptions.py +8 -1
  59. cognite/neat/_store/_graph_store.py +301 -146
  60. cognite/neat/_store/_provenance.py +36 -20
  61. cognite/neat/_store/_rules_store.py +253 -267
  62. cognite/neat/_store/exceptions.py +40 -4
  63. cognite/neat/_utils/auth.py +5 -3
  64. cognite/neat/_version.py +1 -1
  65. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/METADATA +1 -1
  66. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/RECORD +69 -67
  67. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/LICENSE +0 -0
  68. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/WHEEL +0 -0
  69. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/entry_points.txt +0 -0
@@ -4,10 +4,10 @@ from typing import Any, cast
4
4
  from urllib.parse import quote
5
5
 
6
6
  import rdflib
7
- from rdflib import RDF, Namespace, URIRef
7
+ from rdflib import RDF, RDFS, Literal, Namespace, URIRef
8
8
  from rdflib.query import ResultRow
9
9
 
10
- from cognite.neat._constants import UNKNOWN_TYPE
10
+ from cognite.neat._constants import NEAT
11
11
  from cognite.neat._issues.warnings import PropertyDataTypeConversionWarning
12
12
  from cognite.neat._utils.auxiliary import string_to_ideal_type
13
13
  from cognite.neat._utils.rdf_ import Triple, get_namespace, remove_namespace_from_uri
@@ -24,7 +24,7 @@ class SplitMultiValueProperty(BaseTransformerStandardised):
24
24
  _need_changes = frozenset({})
25
25
 
26
26
  def __init__(self, unknown_type: URIRef | None = None) -> None:
27
- self.unknown_type = unknown_type or UNKNOWN_TYPE
27
+ self.unknown_type = unknown_type or NEAT.UnknownType
28
28
 
29
29
  def _iterate_query(self) -> str:
30
30
  query = """SELECT ?subjectType ?property
@@ -78,8 +78,8 @@ class SplitMultiValueProperty(BaseTransformerStandardised):
78
78
 
79
79
  new_property = URIRef(f"{old_property}_{remove_namespace_from_uri(value_type)}")
80
80
 
81
- row_output.add_triples.append(cast(Triple, (subject, new_property, object)))
82
- row_output.remove_triples.append(cast(Triple, (subject, old_property, object)))
81
+ row_output.add_triples.add(cast(Triple, (subject, new_property, object)))
82
+ row_output.remove_triples.add(cast(Triple, (subject, old_property, object)))
83
83
 
84
84
  row_output.instances_modified_count += 1
85
85
 
@@ -143,8 +143,8 @@ class ConvertLiteral(BaseTransformerStandardised):
143
143
  PropertyDataTypeConversionWarning(str(instance), self._type_name, self._property_name, str(e)),
144
144
  stacklevel=2,
145
145
  )
146
- row_output.add_triples.append((instance, self.subject_predicate, rdflib.Literal(converted_value))) # type: ignore[arg-type]
147
- row_output.remove_triples.append((instance, self.subject_predicate, literal)) # type: ignore[arg-type]
146
+ row_output.add_triples.add((instance, self.subject_predicate, rdflib.Literal(converted_value))) # type: ignore[arg-type]
147
+ row_output.remove_triples.add((instance, self.subject_predicate, literal)) # type: ignore[arg-type]
148
148
  row_output.instances_modified_count += 1
149
149
 
150
150
  return row_output
@@ -221,15 +221,15 @@ class LiteralToEntity(BaseTransformerStandardised):
221
221
  namespace = Namespace(get_namespace(instance)) # type: ignore[arg-type]
222
222
  entity_type = namespace[self.entity_type]
223
223
  new_entity = namespace[f"{self.entity_type}_{quote(value)!s}"]
224
- row_output.add_triples.append((new_entity, RDF.type, entity_type))
224
+ row_output.add_triples.add((new_entity, RDF.type, entity_type))
225
225
  row_output.instances_added_count += 1 # we add one new entity
226
226
 
227
227
  if self.new_property is not None:
228
- row_output.add_triples.append((new_entity, namespace[self.new_property], rdflib.Literal(value))) # type: ignore[arg-type]
228
+ row_output.add_triples.add((new_entity, namespace[self.new_property], rdflib.Literal(value))) # type: ignore[arg-type]
229
229
  row_output.instances_modified_count += 1 # we modify the new entity
230
230
 
231
- row_output.add_triples.append((instance, self.subject_predicate, new_entity)) # type: ignore[arg-type]
232
- row_output.remove_triples.append((instance, self.subject_predicate, literal)) # type: ignore[arg-type]
231
+ row_output.add_triples.add((instance, self.subject_predicate, new_entity)) # type: ignore[arg-type]
232
+ row_output.remove_triples.add((instance, self.subject_predicate, literal)) # type: ignore[arg-type]
233
233
  row_output.instances_modified_count += 1 # we modify the old entity
234
234
 
235
235
  return row_output
@@ -300,8 +300,66 @@ class ConnectionToLiteral(BaseTransformerStandardised):
300
300
  instance, object_entity = cast(tuple[URIRef, URIRef], query_result_row)
301
301
  value = remove_namespace_from_uri(object_entity)
302
302
 
303
- row_output.add_triples.append((instance, self.subject_predicate, rdflib.Literal(value)))
304
- row_output.remove_triples.append((instance, self.subject_predicate, object_entity))
303
+ row_output.add_triples.add((instance, self.subject_predicate, rdflib.Literal(value)))
304
+ row_output.remove_triples.add((instance, self.subject_predicate, object_entity))
305
+ row_output.instances_modified_count += 1
306
+
307
+ return row_output
308
+
309
+
310
+ class SetType(BaseTransformerStandardised):
311
+ description = "Set the type of an instance based on a property"
312
+
313
+ def __init__(
314
+ self,
315
+ subject_type: URIRef,
316
+ subject_predicate: URIRef,
317
+ drop_property: bool = False,
318
+ namespace: Namespace | None = None,
319
+ ) -> None:
320
+ self.subject_type = subject_type
321
+ self.subject_predicate = subject_predicate
322
+ self.drop_property = drop_property
323
+ self._namespace = namespace or Namespace(get_namespace(subject_type))
324
+
325
+ def _count_query(self) -> str:
326
+ query = """SELECT (COUNT(?object) AS ?objectCount)
327
+ WHERE {{
328
+ ?instance a <{subject_type}> .
329
+ ?instance <{subject_predicate}> ?object
330
+ FILTER(isLiteral(?object))
331
+ }}"""
332
+ return query.format(subject_type=self.subject_type, subject_predicate=self.subject_predicate)
333
+
334
+ def _skip_count_query(self) -> str:
335
+ query = """SELECT (COUNT(?object) AS ?objectCount)
336
+ WHERE {{
337
+ ?instance a <{subject_type}> .
338
+ ?instance <{subject_predicate}> ?object
339
+ FILTER(isIRI(?object))
340
+ }}"""
341
+ return query.format(subject_type=self.subject_type, subject_predicate=self.subject_predicate)
342
+
343
+ def _iterate_query(self) -> str:
344
+ query = """SELECT ?instance ?object
345
+ WHERE {{
346
+ ?instance a <{subject_type}> .
347
+ ?instance <{subject_predicate}> ?object
348
+ FILTER(isLiteral(?object))
349
+ }}"""
350
+ return query.format(subject_type=self.subject_type, subject_predicate=self.subject_predicate)
351
+
352
+ def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
353
+ row_output = RowTransformationOutput()
354
+
355
+ instance, object_literal = cast(tuple[URIRef, Literal], query_result_row)
356
+ if self.drop_property:
357
+ row_output.remove_triples.add((instance, self.subject_predicate, object_literal))
358
+
359
+ row_output.remove_triples.add((instance, RDF.type, self.subject_type))
360
+ new_type = self._namespace[quote(object_literal.toPython())]
361
+ row_output.add_triples.add((instance, RDF.type, new_type))
362
+ row_output.add_triples.add((new_type, RDFS.subClassOf, self.subject_type))
305
363
  row_output.instances_modified_count += 1
306
364
 
307
365
  return row_output
@@ -9,6 +9,7 @@ from ._external import (
9
9
  FileReadError,
10
10
  FileTypeUnexpectedError,
11
11
  NeatYamlError,
12
+ OxigraphStorageLockedError,
12
13
  )
13
14
  from ._general import NeatImportError, NeatTypeError, NeatValueError, RegexViolationError
14
15
  from ._properties import (
@@ -51,6 +52,7 @@ __all__ = [
51
52
  "NeatTypeError",
52
53
  "NeatValueError",
53
54
  "NeatYamlError",
55
+ "OxigraphStorageLockedError",
54
56
  "PropertyDefinitionDuplicatedError",
55
57
  "PropertyDefinitionError",
56
58
  "PropertyMappingDuplicatedError",
@@ -23,6 +23,14 @@ class FileReadError(NeatError, RuntimeError):
23
23
  reason: str
24
24
 
25
25
 
26
+ @dataclass(unsafe_hash=True)
27
+ class OxigraphStorageLockedError(NeatError, RuntimeError):
28
+ """Oxigraph on-disk storage locked at the path {filepath}"""
29
+
30
+ fix = "Delete LOCK file in the {filepath} folder to unlock the storage or use another storage path"
31
+ filepath: Path
32
+
33
+
26
34
  @dataclass(unsafe_hash=True)
27
35
  class FileNotFoundNeatError(NeatError, FileNotFoundError):
28
36
  """File {filepath} not found"""
@@ -30,7 +30,7 @@ class ResourceRetrievalError(ResourceError[T_Identifier]):
30
30
 
31
31
  @dataclass(unsafe_hash=True)
32
32
  class ResourceNotFoundError(ResourceError, Generic[T_Identifier, T_ReferenceIdentifier]):
33
- """The {resource_type} with identifier {identifier} does not exist"""
33
+ """The {resource_type} with identifier '{identifier}' does not exist"""
34
34
 
35
35
  extra = " This is expected by {referred_type} {referred_by}."
36
36
 
@@ -31,7 +31,6 @@ from ._properties import (
31
31
  PropertyDirectRelationLimitWarning,
32
32
  PropertyNotFoundWarning,
33
33
  PropertyOverwritingWarning,
34
- PropertySkippedWarning,
35
34
  PropertyTypeNotSupportedWarning,
36
35
  PropertyValueTypeUndefinedWarning,
37
36
  )
@@ -68,7 +67,6 @@ __all__ = [
68
67
  "PropertyDirectRelationLimitWarning",
69
68
  "PropertyNotFoundWarning",
70
69
  "PropertyOverwritingWarning",
71
- "PropertySkippedWarning",
72
70
  "PropertyTypeNotSupportedWarning",
73
71
  "PropertyValueTypeUndefinedWarning",
74
72
  "RegexViolationWarning",
@@ -74,7 +74,7 @@ class CDFNotSupportedWarning(NeatWarning, ABC):
74
74
  class NotSupportedViewContainerLimitWarning(CDFNotSupportedWarning):
75
75
  """The view {view_id} maps, {count} containers, which is more than the limit {limit}."""
76
76
 
77
- fix = "Reduce the number of containers the view maps to." ""
77
+ fix = "Reduce the number of containers the view maps to."
78
78
 
79
79
  view_id: ViewId
80
80
  count: int
@@ -65,14 +65,6 @@ class PropertyOverwritingWarning(PropertyWarning[T_Identifier]):
65
65
  overwriting: tuple[str, ...]
66
66
 
67
67
 
68
- @dataclass(unsafe_hash=True)
69
- class PropertySkippedWarning(PropertyWarning[T_Identifier]):
70
- """The {resource_type} with identifier {identifier} has a property {property_name}
71
- which is skipped. {reason}."""
72
-
73
- reason: str
74
-
75
-
76
68
  @dataclass(unsafe_hash=True)
77
69
  class PropertyDataTypeConversionWarning(PropertyWarning[T_Identifier]):
78
70
  """The {resource_type} with identifier {identifier} failed to convert the property {property_name}: {error}"""
@@ -21,7 +21,7 @@ class ResourceRegexViolationWarning(ResourceNeatWarning):
21
21
 
22
22
  fix = (
23
23
  "Either export the data model and make the necessary changes manually"
24
- " or run prepare.data_model.cdf_compliant_external_ids."
24
+ " or run fix.data_model.cdf_compliant_external_ids."
25
25
  )
26
26
 
27
27
  location: str
@@ -96,13 +96,13 @@ class InstanceTemplateExporter(BaseExporter[InformationRules, Workbook]):
96
96
  def _add_index_identifiers(workbook: Workbook, sheet: str, no_rows: int):
97
97
  """Adds index-based auto identifier to a sheet identifier column"""
98
98
  for i in range(no_rows):
99
- workbook[sheet][f"A{i+2}"] = f'=IF(ISBLANK(B{i+2}), "","{sheet}-{i+1}")'
99
+ workbook[sheet][f"A{i + 2}"] = f'=IF(ISBLANK(B{i + 2}), "","{sheet}-{i + 1}")'
100
100
 
101
101
 
102
102
  def _add_uuid_identifiers(workbook: Workbook, sheet: str, no_rows: int):
103
103
  """Adds UUID-based auto identifier to a sheet identifier column"""
104
104
  for i in range(no_rows):
105
- workbook[sheet][f"A{i+2}"] = f'=IF(ISBLANK(B{i+2}), "","{sheet}-{uuid.uuid4()}")'
105
+ workbook[sheet][f"A{i + 2}"] = f'=IF(ISBLANK(B{i + 2}), "","{sheet}-{uuid.uuid4()}")'
106
106
 
107
107
 
108
108
  def _add_drop_down_list(
@@ -122,7 +122,7 @@ def _add_drop_down_list(
122
122
  workbook[sheet].add_data_validation(drop_down_list)
123
123
 
124
124
  for i in range(no_rows):
125
- drop_down_list.add(workbook[sheet][f"{column}{i+2}"])
125
+ drop_down_list.add(workbook[sheet][f"{column}{i + 2}"])
126
126
 
127
127
 
128
128
  def _adjust_column_width(workbook: Workbook):
@@ -72,7 +72,7 @@ class YAMLExporter(BaseExporter[VerifiedRules, str]):
72
72
  """
73
73
  # model_dump_json ensures that the output is in JSON format,
74
74
  # if we don't do this, we will get Enums and other types that are not serializable to YAML
75
- json_output = rules.dump(mode="json", exclude_none=True, exclude_unset=True)
75
+ json_output = rules.dump(mode="json", sort=True, exclude_none=True, exclude_unset=True)
76
76
  if self.output == "json":
77
77
  return json.dumps(json_output)
78
78
  elif self.output == "yaml":
@@ -1,7 +1,7 @@
1
1
  from ._base import BaseImporter
2
2
  from ._dms2rules import DMSImporter
3
3
  from ._dtdl2rules import DTDLImporter
4
- from ._rdf import IMFImporter, InferenceImporter, OWLImporter
4
+ from ._rdf import IMFImporter, InferenceImporter, OWLImporter, SubclassInferenceImporter
5
5
  from ._spreadsheet2rules import ExcelImporter, GoogleSheetImporter
6
6
  from ._yaml2rules import YAMLImporter
7
7
 
@@ -14,6 +14,7 @@ __all__ = [
14
14
  "IMFImporter",
15
15
  "InferenceImporter",
16
16
  "OWLImporter",
17
+ "SubclassInferenceImporter",
17
18
  "YAMLImporter",
18
19
  ]
19
20
 
@@ -26,6 +27,7 @@ RulesImporters = (
26
27
  | DTDLImporter
27
28
  | YAMLImporter
28
29
  | InferenceImporter
30
+ | SubclassInferenceImporter
29
31
  )
30
32
 
31
33
 
@@ -302,8 +302,7 @@ class Interface(DTDLBase):
302
302
  spec_version = frozenset(["2", "3"])
303
303
  default_context: ClassVar[IRI] = Field(
304
304
  "dtmi:dtdl:context;3",
305
- description="This can be set directly on the class to change the "
306
- "default context used when parsing a document.",
305
+ description="This can be set directly on the class to change the default context used when parsing a document.",
307
306
  )
308
307
  id_: DTMI = Field(alias="@id") # type: ignore[assignment]
309
308
  context: IRI | None = Field(alias="@context")
@@ -1,5 +1,5 @@
1
1
  from ._imf2rules import IMFImporter
2
- from ._inference2rules import InferenceImporter
2
+ from ._inference2rules import InferenceImporter, SubclassInferenceImporter
3
3
  from ._owl2rules import OWLImporter
4
4
 
5
- __all__ = ["IMFImporter", "InferenceImporter", "OWLImporter"]
5
+ __all__ = ["IMFImporter", "InferenceImporter", "OWLImporter", "SubclassInferenceImporter"]
@@ -72,7 +72,7 @@ class BaseRDFImporter(BaseImporter[InformationInputRules]):
72
72
  ):
73
73
  return cls(
74
74
  IssueList(title=f"{cls.__name__} issues"),
75
- store.graph,
75
+ store.dataset,
76
76
  data_model_id=data_model_id,
77
77
  max_number_of_instance=max_number_of_instance,
78
78
  non_existing_node_type=non_existing_node_type,
@@ -140,7 +140,7 @@ class BaseRDFImporter(BaseImporter[InformationInputRules]):
140
140
  prefixes: Dict of prefixes and namespaces
141
141
  """
142
142
  if Namespace(get_namespace(URI)) not in prefixes.values():
143
- prefixes[f"prefix_{len(prefixes)+1}"] = Namespace(get_namespace(URI))
143
+ prefixes[f"prefix_{len(prefixes) + 1}"] = Namespace(get_namespace(URI))
144
144
 
145
145
  @property
146
146
  def _metadata(self) -> dict: