cognite-neat 0.88.0__py3-none-any.whl → 0.88.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (99) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/routers/configuration.py +1 -1
  3. cognite/neat/app/ui/neat-app/build/asset-manifest.json +7 -7
  4. cognite/neat/app/ui/neat-app/build/index.html +1 -1
  5. cognite/neat/app/ui/neat-app/build/static/css/{main.38a62222.css → main.72e3d92e.css} +2 -2
  6. cognite/neat/app/ui/neat-app/build/static/css/main.72e3d92e.css.map +1 -0
  7. cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js +3 -0
  8. cognite/neat/app/ui/neat-app/build/static/js/{main.ec7f72e2.js.LICENSE.txt → main.5a52cf09.js.LICENSE.txt} +0 -9
  9. cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js.map +1 -0
  10. cognite/neat/config.py +44 -27
  11. cognite/neat/exceptions.py +8 -2
  12. cognite/neat/graph/extractors/_classic_cdf/_assets.py +21 -73
  13. cognite/neat/graph/extractors/_classic_cdf/_base.py +102 -0
  14. cognite/neat/graph/extractors/_classic_cdf/_events.py +46 -42
  15. cognite/neat/graph/extractors/_classic_cdf/_files.py +41 -45
  16. cognite/neat/graph/extractors/_classic_cdf/_labels.py +75 -52
  17. cognite/neat/graph/extractors/_classic_cdf/_relationships.py +49 -27
  18. cognite/neat/graph/extractors/_classic_cdf/_sequences.py +47 -50
  19. cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +47 -49
  20. cognite/neat/graph/loaders/_base.py +4 -4
  21. cognite/neat/graph/loaders/_rdf2asset.py +12 -14
  22. cognite/neat/graph/loaders/_rdf2dms.py +14 -10
  23. cognite/neat/graph/queries/_base.py +22 -29
  24. cognite/neat/graph/queries/_shared.py +1 -1
  25. cognite/neat/graph/stores/_base.py +19 -11
  26. cognite/neat/graph/transformers/_rdfpath.py +3 -2
  27. cognite/neat/issues/__init__.py +16 -0
  28. cognite/neat/{issues.py → issues/_base.py} +78 -2
  29. cognite/neat/issues/errors/external.py +21 -0
  30. cognite/neat/issues/errors/properties.py +75 -0
  31. cognite/neat/issues/errors/resources.py +123 -0
  32. cognite/neat/issues/errors/schema.py +0 -0
  33. cognite/neat/{rules/issues → issues}/formatters.py +9 -9
  34. cognite/neat/issues/neat_warnings/__init__.py +2 -0
  35. cognite/neat/issues/neat_warnings/identifier.py +27 -0
  36. cognite/neat/issues/neat_warnings/models.py +22 -0
  37. cognite/neat/issues/neat_warnings/properties.py +77 -0
  38. cognite/neat/issues/neat_warnings/resources.py +125 -0
  39. cognite/neat/rules/exporters/_rules2dms.py +3 -2
  40. cognite/neat/rules/exporters/_rules2ontology.py +28 -20
  41. cognite/neat/rules/exporters/_validation.py +15 -21
  42. cognite/neat/rules/importers/__init__.py +7 -3
  43. cognite/neat/rules/importers/_base.py +3 -3
  44. cognite/neat/rules/importers/_dms2rules.py +39 -18
  45. cognite/neat/rules/importers/_dtdl2rules/dtdl_converter.py +44 -53
  46. cognite/neat/rules/importers/_dtdl2rules/dtdl_importer.py +6 -5
  47. cognite/neat/rules/importers/_rdf/__init__.py +0 -0
  48. cognite/neat/rules/importers/_rdf/_imf2rules/__init__.py +3 -0
  49. cognite/neat/rules/importers/_rdf/_imf2rules/_imf2classes.py +82 -0
  50. cognite/neat/rules/importers/_rdf/_imf2rules/_imf2metadata.py +34 -0
  51. cognite/neat/rules/importers/_rdf/_imf2rules/_imf2properties.py +123 -0
  52. cognite/neat/rules/importers/{_owl2rules/_owl2rules.py → _rdf/_imf2rules/_imf2rules.py} +15 -11
  53. cognite/neat/rules/importers/{_inference2rules.py → _rdf/_inference2rules.py} +1 -1
  54. cognite/neat/rules/importers/_rdf/_owl2rules/_owl2classes.py +57 -0
  55. cognite/neat/rules/importers/_rdf/_owl2rules/_owl2metadata.py +68 -0
  56. cognite/neat/rules/importers/_rdf/_owl2rules/_owl2properties.py +59 -0
  57. cognite/neat/rules/importers/_rdf/_owl2rules/_owl2rules.py +76 -0
  58. cognite/neat/rules/importers/_rdf/_shared.py +586 -0
  59. cognite/neat/rules/importers/_spreadsheet2rules.py +31 -28
  60. cognite/neat/rules/importers/_yaml2rules.py +2 -1
  61. cognite/neat/rules/issues/__init__.py +1 -5
  62. cognite/neat/rules/issues/base.py +2 -21
  63. cognite/neat/rules/issues/dms.py +20 -134
  64. cognite/neat/rules/issues/ontology.py +298 -0
  65. cognite/neat/rules/issues/spreadsheet.py +51 -3
  66. cognite/neat/rules/issues/tables.py +72 -0
  67. cognite/neat/rules/models/_rdfpath.py +4 -4
  68. cognite/neat/rules/models/_types/_field.py +14 -21
  69. cognite/neat/rules/models/asset/_validation.py +1 -1
  70. cognite/neat/rules/models/dms/_schema.py +53 -30
  71. cognite/neat/rules/models/dms/_validation.py +2 -2
  72. cognite/neat/rules/models/entities.py +3 -0
  73. cognite/neat/rules/models/information/_rules.py +5 -4
  74. cognite/neat/rules/models/information/_validation.py +1 -1
  75. cognite/neat/utils/rdf_.py +17 -9
  76. cognite/neat/utils/regex_patterns.py +52 -0
  77. cognite/neat/workflows/steps/lib/current/rules_importer.py +73 -1
  78. cognite/neat/workflows/steps/lib/current/rules_validator.py +19 -7
  79. {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/METADATA +2 -6
  80. {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/RECORD +85 -72
  81. cognite/neat/app/ui/neat-app/build/static/css/main.38a62222.css.map +0 -1
  82. cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js +0 -3
  83. cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js.map +0 -1
  84. cognite/neat/graph/issues/loader.py +0 -104
  85. cognite/neat/graph/stores/_oxrdflib.py +0 -247
  86. cognite/neat/rules/exceptions.py +0 -2972
  87. cognite/neat/rules/importers/_owl2rules/_owl2classes.py +0 -215
  88. cognite/neat/rules/importers/_owl2rules/_owl2metadata.py +0 -213
  89. cognite/neat/rules/importers/_owl2rules/_owl2properties.py +0 -203
  90. cognite/neat/rules/issues/importing.py +0 -408
  91. cognite/neat/rules/models/_types/_base.py +0 -16
  92. cognite/neat/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  93. cognite/neat/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  94. cognite/neat/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  95. /cognite/neat/{graph/issues → issues/errors}/__init__.py +0 -0
  96. /cognite/neat/rules/importers/{_owl2rules → _rdf/_owl2rules}/__init__.py +0 -0
  97. {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/LICENSE +0 -0
  98. {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/WHEEL +0 -0
  99. {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/entry_points.txt +0 -0
@@ -1,39 +1,37 @@
1
- import json
2
- from collections.abc import Iterable
1
+ from collections.abc import Callable, Set
3
2
  from datetime import datetime, timezone
4
3
  from pathlib import Path
5
- from typing import cast
6
4
 
7
5
  from cognite.client import CogniteClient
8
- from cognite.client.data_classes import TimeSeries, TimeSeriesList
6
+ from cognite.client.data_classes import TimeSeries, TimeSeriesFilter, TimeSeriesList
9
7
  from pydantic import AnyHttpUrl, ValidationError
10
8
  from rdflib import RDF, Literal, Namespace, URIRef
11
9
 
12
- from cognite.neat.constants import DEFAULT_NAMESPACE
13
- from cognite.neat.graph.extractors._base import BaseExtractor
14
10
  from cognite.neat.graph.models import Triple
15
- from cognite.neat.utils.auxiliary import string_to_ideal_type
16
11
 
12
+ from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
17
13
 
18
- class TimeSeriesExtractor(BaseExtractor):
14
+
15
+ class TimeSeriesExtractor(ClassicCDFExtractor[TimeSeries]):
19
16
  """Extract data from Cognite Data Fusions TimeSeries into Neat.
20
17
 
21
18
  Args:
22
- timeseries (Iterable[TimeSeries]): An iterable of timeseries.
19
+ items (Iterable[TimeSeries]): An iterable of items.
23
20
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
21
+ to_type (Callable[[TimeSeries], str | None], optional): A function to convert an item to a type.
22
+ Defaults to None. If None or if the function returns None, the asset will be set to the default type.
23
+ total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
24
+ is installed. Defaults to None.
25
+ limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
26
+ testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
27
+ limit the extraction to 1000 assets to test the setup.
24
28
  unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
25
29
  a JSON string.
30
+ skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
31
+ values in this set will be skipped.
26
32
  """
27
33
 
28
- def __init__(
29
- self,
30
- timeseries: Iterable[TimeSeries],
31
- namespace: Namespace | None = None,
32
- unpack_metadata: bool = True,
33
- ):
34
- self.namespace = namespace or DEFAULT_NAMESPACE
35
- self.timeseries = timeseries
36
- self.unpack_metadata = unpack_metadata
34
+ _default_rdf_type = "TimeSeries"
37
35
 
38
36
  @classmethod
39
37
  def from_dataset(
@@ -41,15 +39,23 @@ class TimeSeriesExtractor(BaseExtractor):
41
39
  client: CogniteClient,
42
40
  data_set_external_id: str,
43
41
  namespace: Namespace | None = None,
42
+ to_type: Callable[[TimeSeries], str | None] | None = None,
43
+ limit: int | None = None,
44
44
  unpack_metadata: bool = True,
45
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
45
46
  ):
47
+ total = client.time_series.aggregate_count(
48
+ filter=TimeSeriesFilter(data_set_ids=[{"externalId": data_set_external_id}])
49
+ )
50
+
46
51
  return cls(
47
- cast(
48
- Iterable[TimeSeries],
49
- client.time_series(data_set_external_ids=data_set_external_id),
50
- ),
51
- namespace,
52
- unpack_metadata,
52
+ client.time_series(data_set_external_ids=data_set_external_id),
53
+ total=total,
54
+ namespace=namespace,
55
+ to_type=to_type,
56
+ limit=limit,
57
+ unpack_metadata=unpack_metadata,
58
+ skip_metadata_values=skip_metadata_values,
53
59
  )
54
60
 
55
61
  @classmethod
@@ -57,23 +63,30 @@ class TimeSeriesExtractor(BaseExtractor):
57
63
  cls,
58
64
  file_path: str,
59
65
  namespace: Namespace | None = None,
66
+ to_type: Callable[[TimeSeries], str | None] | None = None,
67
+ limit: int | None = None,
60
68
  unpack_metadata: bool = True,
69
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
61
70
  ):
62
- return cls(TimeSeriesList.load(Path(file_path).read_text()), namespace, unpack_metadata)
63
-
64
- def extract(self) -> Iterable[Triple]:
65
- """Extract timeseries as triples."""
66
- for timeseries in self.timeseries:
67
- yield from self._timeseries2triples(timeseries)
71
+ timeseries = TimeSeriesList.load(Path(file_path).read_text())
72
+ return cls(
73
+ timeseries,
74
+ total=len(timeseries),
75
+ namespace=namespace,
76
+ to_type=to_type,
77
+ limit=limit,
78
+ unpack_metadata=unpack_metadata,
79
+ skip_metadata_values=skip_metadata_values,
80
+ )
68
81
 
69
- def _timeseries2triples(self, timeseries: TimeSeries) -> list[Triple]:
82
+ def _item2triples(self, timeseries: TimeSeries) -> list[Triple]:
70
83
  id_ = self.namespace[f"TimeSeries_{timeseries.id}"]
71
84
 
72
85
  # Set rdf type
73
- triples: list[Triple] = [(id_, RDF.type, self.namespace.TimeSeries)]
86
+ type_ = self._get_rdf_type(timeseries)
87
+ triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
74
88
 
75
89
  # Create attributes
76
-
77
90
  if timeseries.external_id:
78
91
  triples.append((id_, self.namespace.external_id, Literal(timeseries.external_id)))
79
92
 
@@ -84,22 +97,7 @@ class TimeSeriesExtractor(BaseExtractor):
84
97
  triples.append((id_, self.namespace.is_string, Literal(timeseries.is_string)))
85
98
 
86
99
  if timeseries.metadata:
87
- if self.unpack_metadata:
88
- for key, value in timeseries.metadata.items():
89
- if value:
90
- type_aware_value = string_to_ideal_type(value)
91
- try:
92
- triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
93
- except ValidationError:
94
- triples.append((id_, self.namespace[key], Literal(type_aware_value)))
95
- else:
96
- triples.append(
97
- (
98
- id_,
99
- self.namespace.metadata,
100
- Literal(json.dumps(timeseries.metadata)),
101
- )
102
- )
100
+ triples.extend(self._metadata_to_triples(id_, timeseries.metadata))
103
101
 
104
102
  if timeseries.unit:
105
103
  triples.append((id_, self.namespace.unit, Literal(timeseries.unit)))
@@ -7,8 +7,8 @@ from cognite.client import CogniteClient
7
7
  from cognite.client.data_classes.capabilities import Capability
8
8
 
9
9
  from cognite.neat.graph import NeatGraphStore
10
- from cognite.neat.graph.issues.loader import FailedAuthorizationError
11
- from cognite.neat.issues import NeatIssue, NeatIssueList
10
+ from cognite.neat.issues import IssueList, NeatIssue, NeatIssueList
11
+ from cognite.neat.issues.errors.external import FailedAuthorizationError
12
12
  from cognite.neat.utils.auxiliary import class_html_doc
13
13
  from cognite.neat.utils.upload import UploadResult, UploadResultList
14
14
 
@@ -66,7 +66,7 @@ class CDFLoader(BaseLoader[T_Output]):
66
66
  yield upload_result
67
67
  return
68
68
 
69
- issues = NeatIssueList[NeatIssue]()
69
+ issues = IssueList()
70
70
  items: list[T_Output] = []
71
71
  for result in self._load(stop_on_exception=False):
72
72
  if isinstance(result, NeatIssue):
@@ -79,7 +79,7 @@ class CDFLoader(BaseLoader[T_Output]):
79
79
 
80
80
  if len(items) >= self._UPLOAD_BATCH_SIZE or result is _END_OF_CLASS:
81
81
  yield from self._upload_to_cdf(client, items, dry_run, issues)
82
- issues = NeatIssueList[NeatIssue]()
82
+ issues = IssueList()
83
83
  items = []
84
84
  if items:
85
85
  yield from self._upload_to_cdf(client, items, dry_run, issues)
@@ -19,9 +19,9 @@ from cognite.client.exceptions import CogniteAPIError, CogniteDuplicatedError
19
19
 
20
20
  from cognite.neat.graph._tracking.base import Tracker
21
21
  from cognite.neat.graph._tracking.log import LogTracker
22
- from cognite.neat.graph.issues import loader as loader_issues
23
22
  from cognite.neat.graph.stores import NeatGraphStore
24
- from cognite.neat.issues import NeatIssue, NeatIssueList
23
+ from cognite.neat.issues import IssueList, NeatIssue, NeatIssueList
24
+ from cognite.neat.issues.errors.resources import InvalidResourceError
25
25
  from cognite.neat.rules.analysis._asset import AssetAnalysis
26
26
  from cognite.neat.rules.models import AssetRules
27
27
  from cognite.neat.rules.models.entities import ClassEntity, EntityTypes
@@ -80,7 +80,7 @@ class AssetLoader(CDFLoader[AssetWrite]):
80
80
  self.external_id_prefix = external_id_prefix
81
81
 
82
82
  self.processed_assets: set[str] = set()
83
- self._issues = NeatIssueList[NeatIssue](create_issues or [])
83
+ self._issues = IssueList(create_issues or [])
84
84
  self._tracker: type[Tracker] = tracker or LogTracker
85
85
 
86
86
  def _load(self, stop_on_exception: bool = False) -> Iterable[AssetWrite | NeatIssue | type[_END_OF_CLASS]]:
@@ -143,8 +143,8 @@ class AssetLoader(CDFLoader[AssetWrite]):
143
143
 
144
144
  # check on parent
145
145
  if "parentExternalId" in fields and fields["parentExternalId"] not in self.processed_assets:
146
- error = loader_issues.InvalidInstanceError(
147
- type_=EntityTypes.asset,
146
+ error = InvalidResourceError(
147
+ resource_type=EntityTypes.asset,
148
148
  identifier=identifier,
149
149
  reason=(
150
150
  f"Parent asset {fields['parentExternalId']} does not exist or failed creation"
@@ -171,9 +171,7 @@ class AssetLoader(CDFLoader[AssetWrite]):
171
171
  yield AssetWrite.load(fields)
172
172
  self.processed_assets.add(identifier)
173
173
  except KeyError as e:
174
- error = loader_issues.InvalidInstanceError(
175
- type_=EntityTypes.asset, identifier=identifier, reason=str(e)
176
- )
174
+ error = InvalidResourceError(resource_type=EntityTypes.asset, identifier=identifier, reason=str(e))
177
175
  tracker.issue(error)
178
176
  if stop_on_exception:
179
177
  raise error.as_exception() from e
@@ -203,8 +201,8 @@ class AssetLoader(CDFLoader[AssetWrite]):
203
201
 
204
202
  # check if source asset exists
205
203
  if source_external_id not in self.processed_assets:
206
- error = loader_issues.InvalidInstanceError(
207
- type_=EntityTypes.relationship,
204
+ error = InvalidResourceError(
205
+ resource_type=EntityTypes.relationship,
208
206
  identifier=source_external_id,
209
207
  reason=(
210
208
  f"Asset {source_external_id} does not exist! "
@@ -223,8 +221,8 @@ class AssetLoader(CDFLoader[AssetWrite]):
223
221
  target_external_id = f"{self.external_id_prefix or ''}{target_external_id}"
224
222
  # check if source asset exists
225
223
  if target_external_id not in self.processed_assets:
226
- error = loader_issues.InvalidInstanceError(
227
- type_=EntityTypes.relationship,
224
+ error = InvalidResourceError(
225
+ resource_type=EntityTypes.relationship,
228
226
  identifier=target_external_id,
229
227
  reason=(
230
228
  f"Asset {target_external_id} does not exist! "
@@ -250,8 +248,8 @@ class AssetLoader(CDFLoader[AssetWrite]):
250
248
  labels=[label] if self.use_labels else None,
251
249
  )
252
250
  except KeyError as e:
253
- error = loader_issues.InvalidInstanceError(
254
- type_=EntityTypes.relationship,
251
+ error = InvalidResourceError(
252
+ resource_type=EntityTypes.relationship,
255
253
  identifier=external_id,
256
254
  reason=str(e),
257
255
  )
@@ -14,10 +14,12 @@ from cognite.client.data_classes.data_modeling.views import SingleEdgeConnection
14
14
  from cognite.client.exceptions import CogniteAPIError
15
15
  from pydantic import BaseModel, ValidationInfo, create_model, field_validator
16
16
 
17
+ import cognite.neat.issues.errors.resources
17
18
  from cognite.neat.graph._tracking import LogTracker, Tracker
18
- from cognite.neat.graph.issues import loader as loader_issues
19
19
  from cognite.neat.graph.stores import NeatGraphStore
20
- from cognite.neat.issues import NeatIssue, NeatIssueList
20
+ from cognite.neat.issues import IssueList, NeatIssue, NeatIssueList
21
+ from cognite.neat.issues.errors.resources import FailedConvertError, InvalidResourceError, ResourceNotFoundError
22
+ from cognite.neat.issues.neat_warnings.models import InvalidClassWarning
21
23
  from cognite.neat.rules.models import DMSRules
22
24
  from cognite.neat.rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json
23
25
  from cognite.neat.utils.auxiliary import create_sha256_hash
@@ -51,7 +53,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
51
53
  self.data_model = data_model
52
54
  self.instance_space = instance_space
53
55
  self.class_by_view_id = class_by_view_id or {}
54
- self._issues = NeatIssueList[NeatIssue](create_issues or [])
56
+ self._issues = IssueList(create_issues or [])
55
57
  self._tracker: type[Tracker] = tracker or LogTracker
56
58
 
57
59
  @classmethod
@@ -67,7 +69,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
67
69
  try:
68
70
  data_model = client.data_modeling.data_models.retrieve(data_model_id, inline_views=True).latest_version()
69
71
  except Exception as e:
70
- issues.append(loader_issues.MissingDataModelError(identifier=repr(data_model_id), reason=str(e)))
72
+ issues.append(
73
+ ResourceNotFoundError(identifier=repr(data_model_id), resource_type="Data Model", reason=str(e))
74
+ )
71
75
 
72
76
  return cls(graph_store, data_model, instance_space, {}, issues)
73
77
 
@@ -79,7 +83,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
79
83
  data_model = rules.as_schema().as_read_model()
80
84
  except Exception as e:
81
85
  issues.append(
82
- loader_issues.FailedConvertError(
86
+ FailedConvertError(
83
87
  identifier=rules.metadata.as_identifier(),
84
88
  target_format="read DMS model",
85
89
  reason=str(e),
@@ -110,7 +114,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
110
114
  try:
111
115
  yield self._create_node(identifier, properties, pydantic_cls, view_id)
112
116
  except ValueError as e:
113
- error = loader_issues.InvalidInstanceError(type_="node", identifier=identifier, reason=str(e))
117
+ error = InvalidResourceError(resource_type="node", identifier=identifier, reason=str(e))
114
118
  tracker.issue(error)
115
119
  if stop_on_exception:
116
120
  raise error.as_exception() from e
@@ -141,7 +145,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
141
145
  def _create_validation_classes(
142
146
  self, view: dm.View
143
147
  ) -> tuple[type[BaseModel], dict[str, dm.EdgeConnection], NeatIssueList]:
144
- issues = NeatIssueList[NeatIssue]()
148
+ issues = IssueList()
145
149
  field_definitions: dict[str, tuple[type, Any]] = {}
146
150
  edge_by_property: dict[str, dm.EdgeConnection] = {}
147
151
  validators: dict[str, classmethod] = {}
@@ -158,7 +162,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
158
162
  data_type = _DATA_TYPE_BY_DMS_TYPE.get(prop.type._type)
159
163
  if not data_type:
160
164
  issues.append(
161
- loader_issues.InvalidClassWarning(
165
+ InvalidClassWarning(
162
166
  class_name=repr(view.as_id()),
163
167
  reason=f"Unknown data type for property {prop_name}: {prop.type._type}",
164
168
  )
@@ -247,8 +251,8 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
247
251
  continue
248
252
  edge = edge_by_properties[prop]
249
253
  if isinstance(edge, SingleEdgeConnection) and len(values) > 1:
250
- error = loader_issues.InvalidInstanceError(
251
- type_="edge",
254
+ error = cognite.neat.issues.errors.resources.InvalidResourceError(
255
+ resource_type="edge",
252
256
  identifier=identifier,
253
257
  reason=f"Multiple values for single edge {edge}. Expected only one.",
254
258
  )
@@ -98,47 +98,40 @@ class Queries:
98
98
  self,
99
99
  instance_id: URIRef,
100
100
  property_renaming_config: dict | None = None,
101
- ) -> tuple[str, dict[str, list[str]]]:
101
+ ) -> tuple[str, dict[str, list[str]]] | None:
102
102
  """DESCRIBE instance for a given class from the graph store
103
103
 
104
104
  Args:
105
105
  instance_id: Instance id for which we want to generate query
106
- property_rename_config: Dictionary to rename properties, default None
106
+ property_renaming_config: Dictionary to rename properties, default None
107
107
 
108
108
  Returns:
109
109
  Dictionary of instance properties
110
110
  """
111
-
112
111
  property_values: dict[str, list[str]] = defaultdict(list)
113
-
114
- for subject, predicate, object_ in cast(list[ResultRow], self.graph.query(f"DESCRIBE <{instance_id}>")):
115
- if object_.lower() not in [
112
+ identifier = remove_namespace_from_uri(instance_id, validation="prefix")
113
+ for _, predicate, object_ in cast(list[ResultRow], self.graph.query(f"DESCRIBE <{instance_id}>")):
114
+ if object_.lower() in [
116
115
  "",
117
116
  "none",
118
117
  "nan",
119
118
  "null",
120
119
  ]:
121
- # we are skipping deep validation with Pydantic to remove namespace here
122
- # as it reduces time to process triples by 10-15x
123
- identifier, value = cast( # type: ignore[misc]
124
- (str, str),
125
- remove_namespace_from_uri(*(subject, object_), validation="prefix"),
126
- ) # type: ignore[misc, index]
127
-
128
- # use-case: calling describe without renaming properties
129
- # losing the namespace from the predicate!
130
- if not property_renaming_config and predicate != RDF.type:
131
- property_values[remove_namespace_from_uri(predicate, validation="prefix")].append(value)
132
-
133
- # use-case: calling describe with renaming properties
134
- # renaming the property to the new name, if the property is defined
135
- # in the RULES sheet
136
- elif property_renaming_config and (property_ := property_renaming_config.get(predicate, None)):
137
- property_values[property_].append(value)
138
-
139
- # use-case: skip the property if it is not defined in property_renaming_config
140
- else:
141
- continue
120
+ continue
121
+ # we are skipping deep validation with Pydantic to remove namespace here
122
+ # as it reduces time to process triples by 10-15x
123
+ value = remove_namespace_from_uri(object_, validation="prefix")
124
+
125
+ # use-case: calling describe without renaming properties
126
+ # losing the namespace from the predicate!
127
+ if not property_renaming_config and predicate != RDF.type:
128
+ property_values[remove_namespace_from_uri(predicate, validation="prefix")].append(value)
129
+
130
+ # use-case: calling describe with renaming properties
131
+ # renaming the property to the new name, if the property is defined
132
+ # in the RULES sheet
133
+ elif property_renaming_config and (property_ := property_renaming_config.get(predicate, None)):
134
+ property_values[property_].append(value)
142
135
 
143
136
  if property_values:
144
137
  return (
@@ -146,7 +139,7 @@ class Queries:
146
139
  property_values,
147
140
  )
148
141
  else:
149
- return () # type: ignore [return-value]
142
+ return None
150
143
 
151
144
  def construct_instances_of_class(
152
145
  self,
@@ -177,7 +170,7 @@ class Queries:
177
170
  result = self.graph.query(query)
178
171
 
179
172
  # We cannot include the RDF.type in case there is a neat:type property
180
- return [remove_namespace_from_uri(*triple) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index]
173
+ return [remove_namespace_from_uri(cast(ResultRow, triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index]
181
174
  else:
182
175
  warnings.warn(
183
176
  "No rules found for the graph store, returning empty list.",
@@ -160,7 +160,7 @@ def triples2dictionary(triples: Iterable[tuple[URIRef, URIRef, str | URIRef]]) -
160
160
  value: str
161
161
  uri: URIRef
162
162
 
163
- id_, property_, value = remove_namespace_from_uri(*triple) # type: ignore[misc]
163
+ id_, property_, value = remove_namespace_from_uri(triple) # type: ignore[misc]
164
164
  uri = triple[0]
165
165
 
166
166
  if uri not in dictionary:
@@ -108,7 +108,7 @@ class NeatGraphStore:
108
108
 
109
109
  @classmethod
110
110
  def from_memory_store(cls, rules: InformationRules | None = None) -> "Self":
111
- return cls(Graph(), rules)
111
+ return cls(Graph(identifier=DEFAULT_NAMESPACE), rules)
112
112
 
113
113
  @classmethod
114
114
  def from_sparql_store(
@@ -126,17 +126,17 @@ class NeatGraphStore:
126
126
  postAsEncoded=False,
127
127
  autocommit=False,
128
128
  )
129
- graph = Graph(store=store)
129
+ graph = Graph(store=store, identifier=DEFAULT_NAMESPACE)
130
130
  return cls(graph, rules)
131
131
 
132
132
  @classmethod
133
133
  def from_oxi_store(cls, storage_dir: Path | None = None, rules: InformationRules | None = None) -> "Self":
134
134
  """Creates a NeatGraphStore from an Oxigraph store."""
135
135
  local_import("pyoxigraph", "oxi")
136
+ local_import("oxrdflib", "oxi")
137
+ import oxrdflib
136
138
  import pyoxigraph
137
139
 
138
- from cognite.neat.graph.stores._oxrdflib import OxigraphStore
139
-
140
140
  # Adding support for both oxigraph in-memory and file-based storage
141
141
  for i in range(4):
142
142
  try:
@@ -149,8 +149,10 @@ class NeatGraphStore:
149
149
  else:
150
150
  raise Exception("Error initializing Oxigraph store")
151
151
 
152
- graph = Graph(store=OxigraphStore(store=oxi_store))
153
- graph.default_union = True
152
+ graph = Graph(
153
+ store=oxrdflib.OxigraphStore(store=oxi_store),
154
+ identifier=DEFAULT_NAMESPACE,
155
+ )
154
156
 
155
157
  return cls(graph, rules)
156
158
 
@@ -208,7 +210,8 @@ class NeatGraphStore:
208
210
  property_renaming_config = InformationAnalysis(self.rules).define_property_renaming_config(class_entity)
209
211
 
210
212
  for instance_id in instance_ids:
211
- yield self.queries.describe(instance_id, property_renaming_config)
213
+ if res := self.queries.describe(instance_id, property_renaming_config):
214
+ yield res
212
215
 
213
216
  def _parse_file(
214
217
  self,
@@ -221,7 +224,7 @@ class NeatGraphStore:
221
224
  Args:
222
225
  filepath : File path to file containing graph data, by default None
223
226
  mime_type : MIME type of graph data, by default "application/rdf+xml"
224
- add_base_iri : Add base IRI to graph, by default True
227
+ base_uri : Add base IRI to graph, by default True
225
228
  """
226
229
 
227
230
  # Oxigraph store, do not want to type hint this as it is an optional dependency
@@ -229,10 +232,15 @@ class NeatGraphStore:
229
232
 
230
233
  def parse_to_oxi_store():
231
234
  local_import("pyoxigraph", "oxi")
232
- from cognite.neat.graph.stores._oxrdflib import OxigraphStore
235
+ import pyoxigraph
233
236
 
234
- cast(OxigraphStore, self.graph.store)._inner.bulk_load(str(filepath), mime_type, base_iri=base_uri) # type: ignore[attr-defined]
235
- cast(OxigraphStore, self.graph.store)._inner.optimize() # type: ignore[attr-defined]
237
+ cast(pyoxigraph.Store, self.graph.store._store).bulk_load(
238
+ str(filepath),
239
+ mime_type,
240
+ base_iri=base_uri,
241
+ to_graph=pyoxigraph.NamedNode(self.graph.identifier),
242
+ )
243
+ cast(pyoxigraph.Store, self.graph.store._store).optimize()
236
244
 
237
245
  parse_to_oxi_store()
238
246
 
@@ -1,4 +1,4 @@
1
- from rdflib import RDF, Graph
1
+ from rdflib import Graph
2
2
 
3
3
  from cognite.neat.rules.analysis import InformationAnalysis
4
4
  from cognite.neat.rules.models._rdfpath import RDFPath, SingleProperty
@@ -17,6 +17,7 @@ class AddSelfReferenceProperty(BaseTransformer):
17
17
  description: str = "Adds property that contains id of reference to all references of given class in Rules"
18
18
  _use_only_once: bool = True
19
19
  _need_changes = frozenset({})
20
+ _ref_template: str = """SELECT ?s WHERE {{?s a <{type_}>}}"""
20
21
 
21
22
  def __init__(
22
23
  self,
@@ -32,7 +33,7 @@ class AddSelfReferenceProperty(BaseTransformer):
32
33
 
33
34
  namespace = self.rules.prefixes[prefix] if prefix in self.rules.prefixes else self.rules.metadata.namespace
34
35
 
35
- for reference in graph.subjects(RDF.type, namespace[suffix]):
36
+ for (reference,) in graph.query(self._ref_template.format(type_=namespace[suffix])): # type: ignore [misc]
36
37
  graph.add(
37
38
  (
38
39
  reference,
@@ -0,0 +1,16 @@
1
+ """This is module contains all the Neat Exceptions (Errors) and Warnings as well
2
+ as
3
+
4
+ """
5
+
6
+ from ._base import DefaultWarning, IssueList, MultiValueError, NeatError, NeatIssue, NeatIssueList, NeatWarning
7
+
8
+ __all__ = [
9
+ "NeatIssue",
10
+ "NeatError",
11
+ "NeatWarning",
12
+ "DefaultWarning",
13
+ "NeatIssueList",
14
+ "IssueList",
15
+ "MultiValueError",
16
+ ]
@@ -9,6 +9,7 @@ from typing import Any, ClassVar, TypeVar
9
9
  from warnings import WarningMessage
10
10
 
11
11
  import pandas as pd
12
+ from pydantic_core import ErrorDetails, PydanticCustomError
12
13
 
13
14
  if sys.version_info < (3, 11):
14
15
  from exceptiongroup import ExceptionGroup
@@ -17,11 +18,24 @@ else:
17
18
  from typing import Self
18
19
 
19
20
 
21
+ __all__ = [
22
+ "NeatIssue",
23
+ "NeatError",
24
+ "NeatWarning",
25
+ "DefaultWarning",
26
+ "NeatIssueList",
27
+ "MultiValueError",
28
+ ]
29
+
30
+
20
31
  @total_ordering
21
32
  @dataclass(frozen=True)
22
33
  class NeatIssue(ABC):
34
+ """This is the base class for all exceptions and warnings (issues) used in Neat."""
35
+
23
36
  description: ClassVar[str]
24
- fix: ClassVar[str]
37
+ extra: ClassVar[str | None] = None
38
+ fix: ClassVar[str | None] = None
25
39
 
26
40
  def message(self) -> str:
27
41
  """Return a human-readable message for the issue.
@@ -30,7 +44,7 @@ class NeatIssue(ABC):
30
44
  It is recommended to override this method in subclasses with a more
31
45
  specific message.
32
46
  """
33
- return self.description
47
+ return self.__doc__ or "Missing"
34
48
 
35
49
  @abstractmethod
36
50
  def dump(self) -> dict[str, Any]:
@@ -56,6 +70,65 @@ class NeatError(NeatIssue, ABC):
56
70
  def as_exception(self) -> ValueError:
57
71
  return ValueError(self.message())
58
72
 
73
+ def as_pydantic_exception(self) -> PydanticCustomError:
74
+ return PydanticCustomError(
75
+ type(self).__name__,
76
+ self.message(),
77
+ dict(description=self.__doc__, fix=self.fix),
78
+ )
79
+
80
+ @classmethod
81
+ def from_pydantic_errors(cls, errors: list[ErrorDetails], **kwargs) -> "list[NeatError]":
82
+ """Convert a list of pydantic errors to a list of Error instances.
83
+
84
+ This is intended to be overridden in subclasses to handle specific error types.
85
+ """
86
+ all_errors: list[NeatError] = []
87
+ for error in errors:
88
+ if isinstance(ctx := error.get("ctx"), dict) and isinstance(
89
+ multi_error := ctx.get("error"), MultiValueError
90
+ ):
91
+ all_errors.extend(multi_error.errors) # type: ignore[arg-type]
92
+ else:
93
+ all_errors.append(DefaultPydanticError.from_pydantic_error(error))
94
+ return all_errors
95
+
96
+
97
+ @dataclass(frozen=True)
98
+ class DefaultPydanticError(NeatError):
99
+ type: str
100
+ loc: tuple[int | str, ...]
101
+ msg: str
102
+ input: Any
103
+ ctx: dict[str, Any] | None
104
+
105
+ @classmethod
106
+ def from_pydantic_error(cls, error: ErrorDetails) -> "DefaultPydanticError":
107
+ return cls(
108
+ type=error["type"],
109
+ loc=error["loc"],
110
+ msg=error["msg"],
111
+ input=error.get("input"),
112
+ ctx=error.get("ctx"),
113
+ )
114
+
115
+ def dump(self) -> dict[str, Any]:
116
+ output = super().dump()
117
+ output["type"] = self.type
118
+ output["loc"] = self.loc
119
+ output["msg"] = self.msg
120
+ output["input"] = self.input
121
+ output["ctx"] = self.ctx
122
+ return output
123
+
124
+ def message(self) -> str:
125
+ if self.loc and len(self.loc) == 1:
126
+ return f"{self.loc[0]} sheet: {self.msg}"
127
+ elif self.loc and len(self.loc) == 2:
128
+ return f"{self.loc[0]} sheet field/column <{self.loc[1]}>: {self.msg}"
129
+ else:
130
+ return self.msg
131
+
59
132
 
60
133
  @dataclass(frozen=True)
61
134
  class NeatWarning(NeatIssue, ABC, UserWarning):
@@ -148,3 +221,6 @@ class MultiValueError(ValueError):
148
221
 
149
222
  def __init__(self, errors: Sequence[T_NeatIssue]):
150
223
  self.errors = list(errors)
224
+
225
+
226
+ class IssueList(NeatIssueList[NeatIssue]): ...