cognite-neat 0.123.23__py3-none-any.whl → 0.123.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

cognite/neat/_version.py CHANGED
@@ -1,2 +1,2 @@
1
- __version__ = "0.123.23"
1
+ __version__ = "0.123.25"
2
2
  __engine__ = "^2.0.4"
@@ -250,6 +250,11 @@ def is_hierarchy_property(container: dm.ContainerId, property_: str) -> bool:
250
250
  )
251
251
 
252
252
 
253
+ def cognite_prefixes() -> dict[str, Namespace]:
254
+ """Returns the Cognite prefixes and namespaces."""
255
+ return {space: Namespace(CDF_NAMESPACE[space] + "/") for space in COGNITE_SPACES}
256
+
257
+
253
258
  DMS_RESERVED_PROPERTIES = frozenset(
254
259
  {
255
260
  "createdTime",
@@ -1,17 +1,21 @@
1
- from typing import cast
1
+ from typing import Any, Literal, cast
2
2
  from urllib.parse import quote
3
3
 
4
- from rdflib import BNode, Graph
4
+ from rdflib import BNode, Graph, Namespace, URIRef
5
+ from rdflib import Literal as RdfLiteral
5
6
  from rdflib.plugins.sparql import prepareQuery
6
7
  from rdflib.query import ResultRow
7
8
 
9
+ from cognite.neat.core._constants import cognite_prefixes
10
+ from cognite.neat.core._data_model.models.entities._constants import Unknown
11
+ from cognite.neat.core._data_model.models.entities._single_value import ConceptEntity
8
12
  from cognite.neat.core._issues._base import IssueList
9
13
  from cognite.neat.core._issues.errors._general import NeatValueError
10
14
  from cognite.neat.core._issues.warnings._resources import (
11
15
  ResourceRedefinedWarning,
12
16
  ResourceRetrievalWarning,
13
17
  )
14
- from cognite.neat.core._utils.rdf_ import convert_rdflib_content
18
+ from cognite.neat.core._utils.rdf_ import remove_namespace_from_uri, uri_to_entity_components
15
19
 
16
20
 
17
21
  def parse_concepts(
@@ -33,27 +37,31 @@ def parse_concepts(
33
37
  concepts: dict[str, dict] = {}
34
38
 
35
39
  query = prepareQuery(query.format(language=language), initNs={k: v for k, v in graph.namespaces()})
40
+ prefixes = cognite_prefixes()
36
41
 
37
42
  for raw in graph.query(query):
38
- res: dict = convert_rdflib_content(cast(ResultRow, raw).asdict(), True)
43
+ res: dict = convert_rdflib_content(
44
+ cast(ResultRow, raw).asdict(), uri_handling="as-concept-entity", prefixes=prefixes
45
+ )
39
46
  res = {key: res.get(key, None) for key in parameters}
40
47
 
41
- # Quote the concept id to ensure it is web-safe
42
- res["concept"] = quote(res["concept"], safe="")
43
-
44
- concept_id = res["concept"]
45
-
46
48
  # Safeguarding against incomplete semantic definitions
47
49
  if res["implements"] and isinstance(res["implements"], BNode):
48
50
  issue_list.append(
49
51
  ResourceRetrievalWarning(
50
- concept_id,
52
+ res["concept"],
51
53
  "implements",
52
54
  error=("Unable to determine concept that is being implemented"),
53
55
  )
54
56
  )
55
57
  continue
56
58
 
59
+ # sanitize the concept and implements
60
+ res["concept"] = sanitize_entity(res["concept"])
61
+ res["implements"] = sanitize_entity(res["implements"]) if res["implements"] else None
62
+
63
+ concept_id = res["concept"]
64
+
57
65
  if concept_id not in concepts:
58
66
  concepts[concept_id] = res
59
67
  else:
@@ -72,7 +80,6 @@ def parse_concepts(
72
80
 
73
81
  handle_meta("concept", concepts, concept_id, res, "name", issue_list)
74
82
  handle_meta("concept", concepts, concept_id, res, "description", issue_list)
75
-
76
83
  if not concepts:
77
84
  issue_list.append(NeatValueError("Unable to parse concepts"))
78
85
 
@@ -98,9 +105,12 @@ def parse_properties(
98
105
  properties: dict[str, dict] = {}
99
106
 
100
107
  query = prepareQuery(query.format(language=language), initNs={k: v for k, v in graph.namespaces()})
108
+ prefixes = cognite_prefixes()
101
109
 
102
110
  for raw in graph.query(query):
103
- res: dict = convert_rdflib_content(cast(ResultRow, raw).asdict(), True)
111
+ res: dict = convert_rdflib_content(
112
+ cast(ResultRow, raw).asdict(), uri_handling="as-concept-entity", prefixes=prefixes
113
+ )
104
114
  res = {key: res.get(key, None) for key in parameters}
105
115
 
106
116
  # Quote the concept id to ensure it is web-safe
@@ -129,9 +139,9 @@ def parse_properties(
129
139
  )
130
140
  continue
131
141
 
132
- # Quote the concept and value_type to ensure they are web-safe
133
- res["concept"] = quote(res["concept"], safe="") if res["concept"] else "#N/A"
134
- res["value_type"] = quote(res["value_type"], safe="") if res["value_type"] else "#N/A"
142
+ # Quote the concept and value_type if they exist if not signal neat that they are not available
143
+ res["concept"] = sanitize_entity(res["concept"]) if res["concept"] else str(Unknown)
144
+ res["value_type"] = sanitize_entity(res["value_type"]) if res["value_type"] else str(Unknown)
135
145
 
136
146
  id_ = f"{res['concept']}.{res['property_']}"
137
147
 
@@ -173,14 +183,73 @@ def handle_meta(
173
183
  if not resources[resource_id][feature] and res[feature]:
174
184
  resources[resource_id][feature] = res[feature]
175
185
 
176
- # RAISE warning only if the feature is being redefined
177
- elif resources[resource_id][feature] and res[feature]:
186
+ current_value = resources[resource_id][feature]
187
+ new_value = res[feature]
188
+
189
+ if not current_value and new_value:
190
+ resources[resource_id][feature] = new_value
191
+ elif current_value and new_value and current_value != new_value:
178
192
  issue_list.append(
179
193
  ResourceRedefinedWarning(
180
194
  identifier=resource_id,
181
195
  resource_type=resource_type,
182
196
  feature=feature,
183
- current_value=resources[resource_id][feature],
184
- new_value=res[feature],
197
+ current_value=current_value,
198
+ new_value=new_value,
185
199
  )
186
200
  )
201
+
202
+
203
+ def convert_rdflib_content(
204
+ content: RdfLiteral | URIRef | dict | list,
205
+ uri_handling: Literal["skip", "remove-namespace", "as-concept-entity"] = "skip",
206
+ prefixes: dict[str, Namespace] | None = None,
207
+ ) -> Any:
208
+ """Converts rdflib content to a more Python-friendly format.
209
+
210
+ Args:
211
+ content: The content to convert, can be a RdfLiteral, URIRef, dict, or list.
212
+ uri_handling: How to handle URIs. Options are:
213
+ - "skip": Leave URIs as is.
214
+ - "remove-namespace": Remove the namespace from URIs.
215
+ - "short-form": Convert URIs to a short form using prefixes.
216
+
217
+ """
218
+ if isinstance(content, RdfLiteral):
219
+ return content.toPython()
220
+ elif isinstance(content, URIRef):
221
+ if uri_handling == "remove-namespace":
222
+ return remove_namespace_from_uri(content)
223
+ elif uri_handling == "as-concept-entity":
224
+ if components := uri_to_entity_components(content, prefixes or {}):
225
+ return ConceptEntity(prefix=components[0], suffix=components[3], version=components[2])
226
+ # fallback to "remove-namespace"
227
+ else:
228
+ return convert_rdflib_content(content, uri_handling="remove-namespace", prefixes=prefixes)
229
+ else:
230
+ return content.toPython()
231
+ elif isinstance(content, dict):
232
+ return {key: convert_rdflib_content(value, uri_handling, prefixes) for key, value in content.items()}
233
+ elif isinstance(content, list):
234
+ return [convert_rdflib_content(item, uri_handling, prefixes) for item in content]
235
+ else:
236
+ return content
237
+
238
+
239
+ def sanitize_entity(entity: str | ConceptEntity, safe: str = "") -> str:
240
+ """Sanitize an entity to ensure it yields entity form that will pass downstream validation.
241
+
242
+ Args:
243
+ entity: The entity to sanitize. Can be a string or a ConceptEntity.
244
+ safe: Characters that should not be quoted during sanitization.
245
+
246
+ Returns:
247
+ A web-safe string representation of the entity
248
+ """
249
+ if isinstance(entity, str):
250
+ return quote(entity, safe=safe)
251
+ # if it already we dont need to quote it so we return its string representation
252
+ elif isinstance(entity, ConceptEntity):
253
+ return str(entity)
254
+ else:
255
+ raise ValueError(f"Invalid entity type: {type(entity)}. Expected str, ConceptEntity.")
@@ -147,19 +147,6 @@ def as_neat_compliant_uri(uri: URIRef) -> URIRef:
147
147
  return URIRef(f"{namespace}{compliant_uri}")
148
148
 
149
149
 
150
- def convert_rdflib_content(content: RdfLiteral | URIRef | dict | list, remove_namespace: bool = False) -> Any:
151
- if isinstance(content, RdfLiteral):
152
- return content.toPython()
153
- elif isinstance(content, URIRef):
154
- return remove_namespace_from_uri(content) if remove_namespace else content.toPython()
155
- elif isinstance(content, dict):
156
- return {key: convert_rdflib_content(value, remove_namespace) for key, value in content.items()}
157
- elif isinstance(content, list):
158
- return [convert_rdflib_content(item, remove_namespace) for item in content]
159
- else:
160
- return content
161
-
162
-
163
150
  def uri_to_short_form(URI: URIRef, prefixes: dict[str, Namespace]) -> str | URIRef:
164
151
  """Returns the short form of a URI if its namespace is present in the prefixes dict,
165
152
  otherwise returns the URI itself
@@ -179,6 +166,43 @@ def uri_to_short_form(URI: URIRef, prefixes: dict[str, Namespace]) -> str | URIR
179
166
  return min(uris, key=len)
180
167
 
181
168
 
169
+ def uri_to_entity_components(uri: URIRef, prefixes: dict[str, Namespace]) -> tuple[str, str, str, str] | None:
170
+ """Converts a URI to its components: space, data_model_id, version, and entity_id.
171
+ Args:
172
+ uri: URI to be converted
173
+ prefixes: dict of prefixes
174
+
175
+ Returns:
176
+ tuple of space, data_model_id, version, and entity_id if found,
177
+ otherwise None
178
+
179
+ !!! note "URI Format"
180
+ The URI is expected to be in the form of `.../<space>/<data_model_id>/<version>/<entity_id>` to
181
+ be able to extract the components correctly.
182
+
183
+ An example of a valid entity URI is:
184
+
185
+ `https://cognitedata.com/cdf_cdm/CogniteCore/v1/CogniteAsset` , where:
186
+
187
+ - space is `cdf_cdm`
188
+ - data_model_id is `CogniteCore`
189
+ - version is `v1`
190
+ - entity_id is `CogniteAsset`
191
+
192
+ to be able to parse the URI correctly, the prefixes dict must have
193
+ the corresponding prefix registered:
194
+ {'cdf_cdm': Namespace('https://cognitedata.com/cdf_cdm/CogniteCore/v1/')}
195
+
196
+ for this method to return the correct components.
197
+ """
198
+ for prefix, namespace in prefixes.items():
199
+ if uri.startswith(namespace):
200
+ remainder = str(uri)[len(str(namespace)) :]
201
+ if (components := remainder.split("/")) and len(components) == 3 and all(components):
202
+ return prefix, components[0], components[1], components[2]
203
+ return None
204
+
205
+
182
206
  def _traverse(hierarchy: dict, graph: dict, names: list[str]) -> dict:
183
207
  """traverse the graph and return the hierarchy"""
184
208
  for name in names:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cognite-neat
3
- Version: 0.123.23
3
+ Version: 0.123.25
4
4
  Summary: Knowledge graph transformation
5
5
  Project-URL: Documentation, https://cognite-neat.readthedocs-hosted.com/
6
6
  Project-URL: Homepage, https://cognite-neat.readthedocs-hosted.com/
@@ -1,9 +1,9 @@
1
1
  cognite/neat/__init__.py,sha256=12StS1dzH9_MElqxGvLWrNsxCJl9Hv8A2a9D0E5OD_U,193
2
- cognite/neat/_version.py,sha256=0ecHvh-Qq-gIDd9cT8TLmFo99So1sxYnycVfCgckPL8,47
2
+ cognite/neat/_version.py,sha256=r7KKTj1I5Kj3gInGNGwe1UoX8sdMd4-Z1v_2uzagy1s,47
3
3
  cognite/neat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  cognite/neat/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  cognite/neat/core/_config.py,sha256=WT1BS8uADcFvGoUYOOfwFOVq_VBl472TisdoA3wLick,280
6
- cognite/neat/core/_constants.py,sha256=Cc7p1QbpbzPSjlf-PB_Ow2pKRhWQL-K8VJma96HzU3A,8863
6
+ cognite/neat/core/_constants.py,sha256=wIpGOzZAKS2vhshXR1K51cbcsgq2TlvZaZ3zw91CU9I,9054
7
7
  cognite/neat/core/_shared.py,sha256=Ov59SWYboRRsncB_5V1ZC_BAoACfNLjo80vqE5Ru6wo,2325
8
8
  cognite/neat/core/_client/__init__.py,sha256=RQ7MwL8mwGqGHokRzsPqO3XStDzmI4-c12_gz1UPJ74,177
9
9
  cognite/neat/core/_client/_api_client.py,sha256=CqgG4kEArI9jiKAh82YrRZv_SzeMKA5guIZOvDg2R5I,860
@@ -45,7 +45,7 @@ cognite/neat/core/_data_model/importers/_rdf/__init__.py,sha256=1yOjV2PKCxwH7uCT
45
45
  cognite/neat/core/_data_model/importers/_rdf/_base.py,sha256=XTdKqN4nf0d2Vvk56LShrDoyCQCg9MXvKwxXT0uMnGg,6016
46
46
  cognite/neat/core/_data_model/importers/_rdf/_inference2rdata_model.py,sha256=PCgM9-qGSLlupN7tYCFLHjivgICtMiahNry1ub8JCYk,28934
47
47
  cognite/neat/core/_data_model/importers/_rdf/_owl2data_model.py,sha256=l_89N1LewZVjSttcJkFAJj63JW8wI-THKJYjAxcNiDg,3093
48
- cognite/neat/core/_data_model/importers/_rdf/_shared.py,sha256=qibq94dFoy_g1ED5nIFYwHC_LYI4jI67HFVcMy7abrM,6665
48
+ cognite/neat/core/_data_model/importers/_rdf/_shared.py,sha256=Cni9yaIKnKh6tHngxhxgseVveB7ZV_doA92fUbMd8gY,9774
49
49
  cognite/neat/core/_data_model/models/__init__.py,sha256=hmF7MDR1XmpLxYdMkOEuPuHUqOQKE4AgsuUqdc-ySSQ,1249
50
50
  cognite/neat/core/_data_model/models/_base_unverified.py,sha256=1Wfbp-tJaEF6hd1bFdp2FhTgPkInf-1ZokuEoVJRPxQ,6842
51
51
  cognite/neat/core/_data_model/models/_base_verified.py,sha256=nzPrlj7ZvYull_Fdh2zeDXz98hux-eQOdTGy9jhUtYA,15127
@@ -149,7 +149,7 @@ cognite/neat/core/_utils/auxiliary.py,sha256=FpeVlscVue4QFaUM3jt7LLiAMkiec6IvpYr
149
149
  cognite/neat/core/_utils/collection_.py,sha256=zVrSmm4045pjw6Pt6y4VPTIJ4dXdMJPyOV70LdFyDBM,2376
150
150
  cognite/neat/core/_utils/graph_transformations_report.py,sha256=ORVH7lw357TPOq4elU5lH46Qx6GCLVrSj-1nX6Ggk1U,1235
151
151
  cognite/neat/core/_utils/io_.py,sha256=D2Mg8sOxfBoDg3fC0jBzaxO3vkXmr0QvZSgYIv6xRkM,386
152
- cognite/neat/core/_utils/rdf_.py,sha256=8AALp8H_nXEDSBo6jZ1idyT_x3K4PJT5ZyBEyxPmgxI,10403
152
+ cognite/neat/core/_utils/rdf_.py,sha256=2O4yaOO2af6NBBCFcKm5YMQRxOhjspqS6N3p5T3sV5g,11223
153
153
  cognite/neat/core/_utils/spreadsheet.py,sha256=VdjcCh339cKu9UwxJkYrmmnrbLguD71tdFmyd3MlIZA,5951
154
154
  cognite/neat/core/_utils/tarjan.py,sha256=IZvwaIITryGVNbo9Bv5EA9_sW3DyfUNAe7uYyPOCL0g,1357
155
155
  cognite/neat/core/_utils/text.py,sha256=ON4ihfscFJkQqQ-Rj46XXtf-9tAobwXbbfa3wuekSu4,8519
@@ -195,7 +195,7 @@ cognite/neat/session/engine/__init__.py,sha256=D3MxUorEs6-NtgoICqtZ8PISQrjrr4dvc
195
195
  cognite/neat/session/engine/_import.py,sha256=1QxA2_EK613lXYAHKQbZyw2yjo5P9XuiX4Z6_6-WMNQ,169
196
196
  cognite/neat/session/engine/_interface.py,sha256=3W-cYr493c_mW3P5O6MKN1xEQg3cA7NHR_ev3zdF9Vk,533
197
197
  cognite/neat/session/engine/_load.py,sha256=g52uYakQM03VqHt_RDHtpHso1-mFFifH5M4T2ScuH8A,5198
198
- cognite_neat-0.123.23.dist-info/METADATA,sha256=f78SKr8-LucvAchjKK1niKDLVAWwdadAm5x4dOdl_5s,9172
199
- cognite_neat-0.123.23.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
200
- cognite_neat-0.123.23.dist-info/licenses/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
201
- cognite_neat-0.123.23.dist-info/RECORD,,
198
+ cognite_neat-0.123.25.dist-info/METADATA,sha256=UATxdmkTTf9TwT0BwXNWjPVRT1LKrWejsWN5S5DLFcM,9172
199
+ cognite_neat-0.123.25.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
200
+ cognite_neat-0.123.25.dist-info/licenses/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
201
+ cognite_neat-0.123.25.dist-info/RECORD,,