cognite-neat 0.110.0__py3-none-any.whl → 0.111.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (53) hide show
  1. cognite/neat/_alpha.py +6 -0
  2. cognite/neat/_client/_api/schema.py +26 -0
  3. cognite/neat/_client/data_classes/schema.py +1 -1
  4. cognite/neat/_constants.py +4 -1
  5. cognite/neat/_graph/extractors/__init__.py +4 -0
  6. cognite/neat/_graph/extractors/_classic_cdf/_base.py +8 -16
  7. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +39 -9
  8. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +23 -17
  9. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +15 -17
  10. cognite/neat/_graph/extractors/_dict.py +102 -0
  11. cognite/neat/_graph/extractors/_dms.py +27 -40
  12. cognite/neat/_graph/extractors/_dms_graph.py +30 -3
  13. cognite/neat/_graph/extractors/_raw.py +67 -0
  14. cognite/neat/_graph/loaders/_base.py +20 -4
  15. cognite/neat/_graph/loaders/_rdf2dms.py +243 -89
  16. cognite/neat/_graph/queries/_base.py +137 -43
  17. cognite/neat/_graph/transformers/_classic_cdf.py +6 -22
  18. cognite/neat/_issues/_factory.py +9 -1
  19. cognite/neat/_issues/errors/__init__.py +2 -0
  20. cognite/neat/_issues/errors/_external.py +7 -0
  21. cognite/neat/_issues/warnings/user_modeling.py +12 -0
  22. cognite/neat/_rules/_constants.py +3 -0
  23. cognite/neat/_rules/analysis/_base.py +29 -50
  24. cognite/neat/_rules/exporters/_rules2excel.py +1 -1
  25. cognite/neat/_rules/importers/_rdf/_inference2rules.py +16 -10
  26. cognite/neat/_rules/models/_base_rules.py +0 -2
  27. cognite/neat/_rules/models/data_types.py +7 -0
  28. cognite/neat/_rules/models/dms/_exporter.py +9 -8
  29. cognite/neat/_rules/models/dms/_rules.py +26 -1
  30. cognite/neat/_rules/models/dms/_rules_input.py +5 -1
  31. cognite/neat/_rules/models/dms/_validation.py +101 -1
  32. cognite/neat/_rules/models/entities/_single_value.py +8 -3
  33. cognite/neat/_rules/models/entities/_wrapped.py +2 -2
  34. cognite/neat/_rules/models/information/_rules_input.py +1 -0
  35. cognite/neat/_rules/models/information/_validation.py +64 -17
  36. cognite/neat/_rules/transformers/_converters.py +7 -2
  37. cognite/neat/_session/_base.py +2 -0
  38. cognite/neat/_session/_explore.py +39 -0
  39. cognite/neat/_session/_inspect.py +25 -6
  40. cognite/neat/_session/_read.py +67 -3
  41. cognite/neat/_session/_set.py +7 -1
  42. cognite/neat/_session/_state.py +6 -0
  43. cognite/neat/_session/_to.py +115 -8
  44. cognite/neat/_store/_graph_store.py +8 -4
  45. cognite/neat/_utils/rdf_.py +34 -3
  46. cognite/neat/_utils/text.py +72 -4
  47. cognite/neat/_utils/upload.py +2 -0
  48. cognite/neat/_version.py +2 -2
  49. {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/METADATA +1 -1
  50. {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/RECORD +53 -50
  51. {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/LICENSE +0 -0
  52. {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/WHEEL +0 -0
  53. {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  import difflib
2
- from collections.abc import Callable
2
+ from collections.abc import Callable, Set
3
3
  from typing import Literal, overload
4
4
 
5
5
  import pandas as pd
@@ -85,11 +85,13 @@ class InspectIssues:
85
85
 
86
86
  def __init__(self, state: SessionState) -> None:
87
87
  self._state = state
88
+ self._max_display = 50
88
89
 
89
90
  @overload
90
91
  def __call__(
91
92
  self,
92
93
  search: str | None = None,
94
+ include: Literal["all", "errors", "warning"] | Set[Literal["all", "errors", "warning"]] = "all",
93
95
  return_dataframe: Literal[True] = (False if IN_NOTEBOOK else True), # type: ignore[assignment]
94
96
  ) -> pd.DataFrame: ...
95
97
 
@@ -97,12 +99,14 @@ class InspectIssues:
97
99
  def __call__(
98
100
  self,
99
101
  search: str | None = None,
102
+ include: Literal["all", "errors", "warning"] | Set[Literal["all", "errors", "warning"]] = "all",
100
103
  return_dataframe: Literal[False] = (False if IN_NOTEBOOK else True), # type: ignore[assignment]
101
104
  ) -> None: ...
102
105
 
103
106
  def __call__(
104
107
  self,
105
108
  search: str | None = None,
109
+ include: Literal["all", "errors", "warning"] | Set[Literal["all", "errors", "warning"]] = "all",
106
110
  return_dataframe: bool = (False if IN_NOTEBOOK else True), # type: ignore[assignment]
107
111
  ) -> pd.DataFrame | None:
108
112
  """Returns the issues of the current data model."""
@@ -113,6 +117,13 @@ class InspectIssues:
113
117
  elif issues is None:
114
118
  self._print("No issues found.")
115
119
  return pd.DataFrame() if return_dataframe else None
120
+ include_set = {include} if isinstance(include, str) else include
121
+ if "all" in include_set:
122
+ include_set = {"errors", "warning"}
123
+ if "warning" not in include_set:
124
+ issues = issues.errors
125
+ if "errors" not in include_set:
126
+ issues = issues.warnings
116
127
 
117
128
  if issues and search is not None:
118
129
  unique_types = {type(issue).__name__ for issue in issues}
@@ -120,18 +131,21 @@ class InspectIssues:
120
131
  issues = IssueList([issue for issue in issues if type(issue).__name__ in closest_match])
121
132
 
122
133
  issue_str = "\n".join(
123
- [f" * **{type(issue).__name__}**: {issue.as_message(include_type=False)}" for issue in issues]
134
+ [
135
+ f" * **{type(issue).__name__}**: {issue.as_message(include_type=False)}"
136
+ for issue in issues[: self._max_display]
137
+ ]
138
+ + ([] if len(issues) <= 50 else [f" * ... {len(issues) - self._max_display} more"])
124
139
  )
125
140
  markdown_str = f"### {len(issues)} issues found\n\n{issue_str}"
126
-
127
141
  if IN_NOTEBOOK:
128
142
  from IPython.display import Markdown, display
129
143
 
130
144
  display(Markdown(markdown_str))
131
145
  elif RICH_AVAILABLE:
132
- from rich import print
146
+ from rich import print as rprint
133
147
 
134
- print(RichMarkdown(markdown_str))
148
+ rprint(RichMarkdown(markdown_str))
135
149
 
136
150
  if return_dataframe:
137
151
  return issues.to_pandas()
@@ -170,6 +184,7 @@ class InspectOutcome:
170
184
  class InspectUploadOutcome:
171
185
  def __init__(self, get_last_outcome: Callable[[], UploadResultList]) -> None:
172
186
  self._get_last_outcome = get_last_outcome
187
+ self._max_display = 50
173
188
 
174
189
  @staticmethod
175
190
  def _as_set(value: str | list[str] | None) -> set[str] | None:
@@ -223,7 +238,7 @@ class InspectUploadOutcome:
223
238
  from IPython.display import Markdown, display
224
239
 
225
240
  lines: list[str] = []
226
- for item in outcome:
241
+ for line_no, item in enumerate(outcome):
227
242
  lines.append(f"### {item.name}")
228
243
  if unique_errors := set(item.error_messages):
229
244
  lines.append("#### Errors")
@@ -255,6 +270,10 @@ class InspectUploadOutcome:
255
270
  else:
256
271
  lines.append(f" * {value}")
257
272
 
273
+ if line_no >= self._max_display:
274
+ lines.append(f"### ... {len(outcome) - self._max_display} more")
275
+ break
276
+
258
277
  display(Markdown("\n".join(lines)))
259
278
 
260
279
  if return_dataframe:
@@ -153,6 +153,45 @@ class CDFReadAPI(BaseReadAPI):
153
153
  )
154
154
  return self._state.write_graph(extractor)
155
155
 
156
+ def raw(
157
+ self,
158
+ db_name: str,
159
+ table_name: str,
160
+ type: str | None = None,
161
+ foreign_keys: str | SequenceNotStr[str] | None = None,
162
+ unpack_json: bool = False,
163
+ str_to_ideal_type: bool = False,
164
+ ) -> IssueList:
165
+ """Reads a raw table from CDF to the knowledge graph.
166
+
167
+ Args:
168
+ db_name: The name of the database
169
+ table_name: The name of the table, this will be assumed to be the type of the instances.
170
+ type: The type of instances in the table. If None, the table name will be used.
171
+ foreign_keys: The name of the columns that are foreign keys. If None, no foreign keys are used.
172
+ unpack_json: If True, the JSON objects will be unpacked into the graph.
173
+ str_to_ideal_type: If True, the string values will be converted to ideal types.
174
+
175
+ Returns:
176
+ IssueList: A list of issues that occurred during the extraction.
177
+
178
+ Example:
179
+ ```python
180
+ neat.read.cdf.raw("my_db", "my_table", "Asset")
181
+ ```
182
+
183
+ """
184
+ extractor = extractors.RAWExtractor(
185
+ self._get_client,
186
+ db_name=db_name,
187
+ table_name=table_name,
188
+ table_type=type,
189
+ foreign_keys=foreign_keys,
190
+ unpack_json=unpack_json,
191
+ str_to_ideal_type=str_to_ideal_type,
192
+ )
193
+ return self._state.instances.store.write(extractor)
194
+
156
195
 
157
196
  @session_class_wrapper
158
197
  class CDFClassicAPI(BaseReadAPI):
@@ -229,6 +268,8 @@ class CDFClassicAPI(BaseReadAPI):
229
268
  identifier: Literal["id", "externalId"] = "id",
230
269
  reference_timeseries: bool = False,
231
270
  reference_files: bool = False,
271
+ unpack_metadata: bool = False,
272
+ skip_sequence_rows: bool = False,
232
273
  ) -> IssueList:
233
274
  namespace = CLASSIC_CDF_NAMESPACE
234
275
  extractor = extractors.ClassicGraphExtractor(
@@ -238,7 +279,11 @@ class CDFClassicAPI(BaseReadAPI):
238
279
  namespace=namespace,
239
280
  prefix="Classic",
240
281
  identifier=identifier,
282
+ unpack_metadata=unpack_metadata,
283
+ skip_sequence_rows=skip_sequence_rows,
241
284
  )
285
+ self._state.instances.neat_prefix_by_predicate_uri.update(extractor.neat_prefix_by_predicate_uri)
286
+ self._state.instances.neat_prefix_by_type_uri.update(extractor.neat_prefix_by_type_uri)
242
287
  extract_issues = self._state.write_graph(extractor)
243
288
  if identifier == "externalId":
244
289
  self._state.quoted_source_identifiers = True
@@ -361,6 +406,9 @@ class CSVReadAPI(BaseReadAPI):
361
406
  """
362
407
 
363
408
  def __call__(self, io: Any, type: str, primary_key: str) -> None:
409
+ warnings.filterwarnings("default")
410
+ AlphaFlags.csv_read.warn()
411
+
364
412
  engine = import_engine()
365
413
  engine.set.format = "csv"
366
414
  engine.set.file = NeatReader.create(io).materialize_path()
@@ -416,6 +464,9 @@ class XMLReadAPI(BaseReadAPI):
416
464
  - remove associations between nodes that do not exist in the extracted graph
417
465
  - remove edges to nodes that do not exist in the extracted graph
418
466
  """
467
+ warnings.filterwarnings("default")
468
+ AlphaFlags.dexpi_read.warn()
469
+
419
470
  path = NeatReader.create(io).materialize_path()
420
471
  engine = import_engine()
421
472
  engine.set.format = "dexpi"
@@ -467,6 +518,9 @@ class XMLReadAPI(BaseReadAPI):
467
518
  - remove unused attributes
468
519
  - remove edges to nodes that do not exist in the extracted graph
469
520
  """
521
+ warnings.filterwarnings("default")
522
+ AlphaFlags.aml_read.warn()
523
+
470
524
  path = NeatReader.create(io).materialize_path()
471
525
  engine = import_engine()
472
526
  engine.set.format = "aml"
@@ -518,6 +572,9 @@ class RDFReadAPI(BaseReadAPI):
518
572
  neat.read.rdf.ontology("url_or_path_to_owl_source")
519
573
  ```
520
574
  """
575
+ warnings.filterwarnings("default")
576
+ AlphaFlags.ontology_read.warn()
577
+
521
578
  reader = NeatReader.create(io)
522
579
  importer = importers.OWLImporter.from_file(reader.materialize_path(), source_name=f"file {reader!s}")
523
580
  return self._state.rule_import(importer)
@@ -533,10 +590,18 @@ class RDFReadAPI(BaseReadAPI):
533
590
  neat.read.rdf.imf("url_or_path_to_imf_source")
534
591
  ```
535
592
  """
593
+ warnings.filterwarnings("default")
594
+ AlphaFlags.imf_read.warn()
595
+
536
596
  reader = NeatReader.create(io)
537
597
  importer = importers.IMFImporter.from_file(reader.materialize_path(), source_name=f"file {reader!s}")
538
598
  return self._state.rule_import(importer)
539
599
 
600
+ def instances(self, io: Any) -> IssueList:
601
+ reader = NeatReader.create(io)
602
+ self._state.instances.store.write(extractors.RdfFileExtractor(reader.materialize_path()))
603
+ return IssueList()
604
+
540
605
  def __call__(
541
606
  self,
542
607
  io: Any,
@@ -560,9 +625,8 @@ class RDFReadAPI(BaseReadAPI):
560
625
  raise ValueError(f"Expected ontology, imf types or instances, got {source}")
561
626
 
562
627
  elif type == "instances":
563
- reader = NeatReader.create(io)
564
- self._state.instances.store.write(extractors.RdfFileExtractor(reader.materialize_path()))
565
- return IssueList()
628
+ return self.instances(io)
629
+
566
630
  else:
567
631
  raise NeatSessionError(f"Expected data model or instances, got {type}")
568
632
 
@@ -23,14 +23,20 @@ class SetAPI:
23
23
  self._verbose = verbose
24
24
  self.instances = SetInstances(state, verbose)
25
25
 
26
- def data_model_id(self, new_model_id: dm.DataModelId | tuple[str, str, str]) -> IssueList:
26
+ def data_model_id(self, new_model_id: dm.DataModelId | tuple[str, str, str], name: str | None = None) -> IssueList:
27
27
  """Sets the data model ID of the latest verified data model. Set the data model id as a tuple of strings
28
28
  following the template (<data_model_space>, <data_model_name>, <data_model_version>).
29
29
 
30
+ Args:
31
+ new_model_id (dm.DataModelId | tuple[str, str, str]): The new data model id.
32
+ name (str, optional): The display name of the data model. If not set, the external ID will be used
33
+ to generate the name.
34
+
30
35
  Example:
31
36
  Set a new data model id:
32
37
  ```python
33
38
  neat.set.data_model_id(("my_data_model_space", "My_Data_Model", "v1"))
39
+ neat.set.data_model_id(("my_data_model_space", "MyDataModel", "v1"), name="My Data Model")
34
40
  ```
35
41
  """
36
42
  if self._state.rule_store.empty:
@@ -1,6 +1,8 @@
1
1
  from pathlib import Path
2
2
  from typing import Literal, cast
3
3
 
4
+ from rdflib import URIRef
5
+
4
6
  from cognite.neat._client import NeatClient
5
7
  from cognite.neat._graph.extractors import KnowledgeGraphExtractor
6
8
  from cognite.neat._issues import IssueList
@@ -74,6 +76,10 @@ class InstancesState:
74
76
  self.storage_path = storage_path
75
77
  self.issue_lists = IssueList()
76
78
  self.outcome = UploadResultList()
79
+ # These contain prefixes added by Neat at the extraction stage.
80
+ # We store them such that they can be removed in the load stage.
81
+ self.neat_prefix_by_predicate_uri: dict[URIRef, str] = {}
82
+ self.neat_prefix_by_type_uri: dict[URIRef, str] = {}
77
83
 
78
84
  # Ensure that error handling is done in the constructor
79
85
  self.store: NeatGraphStore = _session_method_wrapper(self._create_store, "NeatSession")()
@@ -10,7 +10,7 @@ from cognite.client.data_classes.data_modeling import DataModelIdentifier
10
10
  from cognite.neat._alpha import AlphaFlags
11
11
  from cognite.neat._constants import COGNITE_MODELS
12
12
  from cognite.neat._graph import loaders
13
- from cognite.neat._issues import IssueList, catch_issues
13
+ from cognite.neat._issues import IssueList, NeatIssue, catch_issues
14
14
  from cognite.neat._rules import exporters
15
15
  from cognite.neat._rules._constants import PATTERNS
16
16
  from cognite.neat._rules._shared import VerifiedRules
@@ -35,6 +35,32 @@ class ToAPI:
35
35
  self._state = state
36
36
  self._verbose = verbose
37
37
  self.cdf = CDFToAPI(state, verbose)
38
+ self._python = ToPythonAPI(state, verbose)
39
+
40
+ def ontology(self, io: Any) -> None:
41
+ """Export the data model to ontology.
42
+
43
+ Args:
44
+ io: The file path to file-like object to write the session to.
45
+
46
+ Example:
47
+ Export the session to a file
48
+ ```python
49
+ ontology_file_name = "neat_session.ttl"
50
+ neat.to.ontology(ontology_file_name)
51
+ ```
52
+ """
53
+ warnings.filterwarnings("default")
54
+ AlphaFlags.to_ontology.warn()
55
+
56
+ filepath = Path(io)
57
+ if filepath.suffix != ".ttl":
58
+ warnings.warn("File extension is not .ttl, adding it to the file name", stacklevel=2)
59
+ filepath = filepath.with_suffix(".ttl")
60
+
61
+ exporter = exporters.OWLExporter()
62
+ self._state.rule_store.export_to_file(exporter, Path(io))
63
+ return None
38
64
 
39
65
  def excel(
40
66
  self,
@@ -209,6 +235,7 @@ class ToAPI:
209
235
  neat.to.yaml(your_folder_name, format="toolkit")
210
236
  ```
211
237
  """
238
+
212
239
  if format == "neat":
213
240
  exporter = exporters.YAMLExporter()
214
241
  if io is None:
@@ -270,28 +297,41 @@ class CDFToAPI:
270
297
  ```
271
298
 
272
299
  """
300
+ return self._instances(instance_space=space, space_from_property=space_property)
301
+
302
+ def _instances(
303
+ self,
304
+ instance_space: str | None = None,
305
+ space_from_property: str | None = None,
306
+ use_source_space: bool = False,
307
+ ) -> UploadResultList:
273
308
  if not self._state.client:
274
309
  raise NeatSessionError("No CDF client provided!")
275
310
  client = self._state.client
276
- space = space or f"{self._state.rule_store.last_verified_dms_rules.metadata.space}_instances"
311
+ dms_rules = self._state.rule_store.last_verified_dms_rules
312
+ instance_space = instance_space or f"{dms_rules.metadata.space}_instances"
277
313
 
278
- if space and space == self._state.rule_store.last_verified_dms_rules.metadata.space:
314
+ if instance_space and instance_space == dms_rules.metadata.space:
279
315
  raise NeatSessionError("Space for instances must be different from the data model space.")
280
- elif not PATTERNS.space_compliance.match(str(space)):
316
+ elif not PATTERNS.space_compliance.match(str(instance_space)):
281
317
  raise NeatSessionError("Please provide a valid space name. {PATTERNS.space_compliance.pattern}")
282
318
 
283
- if not client.data_modeling.spaces.retrieve(space):
284
- client.data_modeling.spaces.apply(dm.SpaceApply(space=space))
319
+ if not client.data_modeling.spaces.retrieve(instance_space):
320
+ client.data_modeling.spaces.apply(dm.SpaceApply(space=instance_space))
285
321
 
286
322
  loader = loaders.DMSLoader(
287
323
  self._state.rule_store.last_verified_dms_rules,
288
324
  self._state.rule_store.last_verified_information_rules,
289
325
  self._state.instances.store,
290
- instance_space=space,
326
+ instance_space=instance_space,
291
327
  client=client,
328
+ space_property=space_from_property,
329
+ use_source_space=use_source_space,
292
330
  # In case urllib.parse.quote() was run on the extraction, we need to run
293
331
  # urllib.parse.unquote() on the load.
294
- unquote_external_ids=self._state.quoted_source_identifiers,
332
+ unquote_external_ids=True,
333
+ neat_prefix_by_predicate_uri=self._state.instances.neat_prefix_by_predicate_uri,
334
+ neat_prefix_by_type_uri=self._state.instances.neat_prefix_by_type_uri,
295
335
  )
296
336
 
297
337
  result = loader.load_into_cdf(client)
@@ -334,3 +374,70 @@ class CDFToAPI:
334
374
  result = self._state.rule_store.export_to_cdf(exporter, self._state.client, dry_run)
335
375
  print("You can inspect the details with the .inspect.outcome.data_model(...) method.")
336
376
  return result
377
+
378
+
379
+ @session_class_wrapper
380
+ class ToPythonAPI:
381
+ """API used to write the contents of a NeatSession to Python objects"""
382
+
383
+ def __init__(self, state: SessionState, verbose: bool) -> None:
384
+ self._state = state
385
+ self._verbose = verbose
386
+
387
+ def instances(
388
+ self,
389
+ instance_space: str | None = None,
390
+ space_from_property: str | None = None,
391
+ use_source_space: bool = False,
392
+ ) -> tuple[list[dm.InstanceApply], IssueList]:
393
+ """Export the verified DMS instances to Python objects.
394
+
395
+ Args:
396
+ instance_space: The name of the instance space to use. Defaults to None.
397
+ space_from_property: This is an alternative to the 'instance_space' argument. If provided,
398
+ the space will be set to the value of the property with the given name for each instance.
399
+ If the property is not found, the 'instance_space' argument will be used. Defaults to None.
400
+ use_source_space: If True, the instance space will be set to the source space of the instance.
401
+ This is only relevant if the instances were extracted from CDF data models. Defaults to False.
402
+
403
+ Returns:
404
+ list[dm.InstanceApply]: The instances as Python objects.
405
+
406
+ Example:
407
+ Export instances to Python objects
408
+ ```python
409
+ instances = neat.to._python.instances()
410
+ ```
411
+
412
+ Export instances to Python objects using the `dataSetId` property as the space
413
+ ```python
414
+ instances = neat.to._python.instances(space_from_property="dataSetId")
415
+ ```
416
+ """
417
+ dms_rules = self._state.rule_store.last_verified_dms_rules
418
+ instance_space = instance_space or f"{dms_rules.metadata.space}_instances"
419
+
420
+ if instance_space and instance_space == dms_rules.metadata.space:
421
+ raise NeatSessionError("Space for instances must be different from the data model space.")
422
+ elif not PATTERNS.space_compliance.match(str(instance_space)):
423
+ raise NeatSessionError(f"Please provide a valid space name. {PATTERNS.space_compliance.pattern}")
424
+
425
+ loader = loaders.DMSLoader(
426
+ self._state.rule_store.last_verified_dms_rules,
427
+ self._state.rule_store.last_verified_information_rules,
428
+ self._state.instances.store,
429
+ instance_space=instance_space,
430
+ space_property=space_from_property,
431
+ use_source_space=use_source_space,
432
+ unquote_external_ids=True,
433
+ neat_prefix_by_predicate_uri=self._state.instances.neat_prefix_by_predicate_uri,
434
+ neat_prefix_by_type_uri=self._state.instances.neat_prefix_by_type_uri,
435
+ )
436
+ issue_list = IssueList()
437
+ instances: list[dm.InstanceApply] = []
438
+ for item in loader.load(stop_on_exception=False):
439
+ if isinstance(item, dm.InstanceApply):
440
+ instances.append(item)
441
+ elif isinstance(item, NeatIssue):
442
+ issue_list.append(item)
443
+ return instances, issue_list
@@ -3,7 +3,7 @@ import warnings
3
3
  from collections.abc import Iterable
4
4
  from datetime import datetime, timezone
5
5
  from pathlib import Path
6
- from typing import cast, overload
6
+ from typing import Any, cast, overload
7
7
  from zipfile import ZipExtFile
8
8
 
9
9
  import pandas as pd
@@ -239,14 +239,18 @@ class NeatGraphStore:
239
239
  class_uri: URIRef,
240
240
  named_graph: URIRef | None = None,
241
241
  property_renaming_config: dict[URIRef, str] | None = None,
242
- ) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
242
+ remove_uri_namespace: bool = True,
243
+ ) -> Iterable[tuple[URIRef, dict[str | InstanceType, list[Any]]]]:
243
244
  named_graph = named_graph or self.default_named_graph
244
245
 
245
- instance_ids = self.queries.list_instances_ids_of_class(class_uri, named_graph=named_graph)
246
+ instance_ids = self.queries.list_instances_ids(class_uri, named_graph=named_graph)
246
247
 
247
248
  for instance_id in instance_ids:
248
249
  if res := self.queries.describe(
249
- instance_id=instance_id, instance_type=class_uri, property_renaming_config=property_renaming_config
250
+ instance_id=instance_id,
251
+ instance_type=class_uri,
252
+ property_renaming_config=property_renaming_config,
253
+ remove_uri_namespace=remove_uri_namespace,
250
254
  ):
251
255
  yield res
252
256
 
@@ -7,6 +7,8 @@ from pydantic import HttpUrl, TypeAdapter, ValidationError
7
7
  from rdflib import Graph, Namespace, URIRef
8
8
  from rdflib import Literal as RdfLiteral
9
9
 
10
+ from cognite.neat._constants import SPACE_URI_PATTERN
11
+
10
12
  Triple: TypeAlias = tuple[URIRef, URIRef, RdfLiteral | URIRef]
11
13
 
12
14
 
@@ -100,12 +102,41 @@ def get_namespace(URI: URIRef, special_separator: str = "#_") -> str:
100
102
  str
101
103
  Entity id without namespace
102
104
  """
105
+ return split_uri(URI, special_separator)[0]
106
+
107
+
108
+ def namespace_as_space(namespace: str) -> str | None:
109
+ if match := SPACE_URI_PATTERN.match(namespace):
110
+ return match.group("space")
111
+ return None
112
+
113
+
114
+ def split_uri(URI: URIRef, special_separator: str = "#_") -> tuple[str, str]:
115
+ """Splits URI into namespace and entity name
116
+
117
+ Parameters
118
+ ----------
119
+ URI : URIRef
120
+ URI of an entity
121
+ special_separator : str
122
+ Special separator to use instead of # or / if present in URI
123
+ Set by default to "#_" which covers special client use case
124
+
125
+ Returns
126
+ -------
127
+ tuple[str, str]
128
+ Tuple of namespace and entity name
129
+ """
103
130
  if special_separator in URI:
104
- return URI.split(special_separator)[0] + special_separator
131
+ namespace, rest = URI.split(special_separator, maxsplit=1)
132
+ namespace += special_separator
105
133
  elif "#" in URI:
106
- return URI.split("#")[0] + "#"
134
+ namespace, rest = URI.split("#", maxsplit=1)
135
+ namespace += "#"
107
136
  else:
108
- return "/".join(URI.split("/")[:-1]) + "/"
137
+ namespace, rest = URI.rsplit("/", maxsplit=1)
138
+ namespace += "/"
139
+ return namespace, rest
109
140
 
110
141
 
111
142
  def as_neat_compliant_uri(uri: URIRef) -> URIRef:
@@ -1,7 +1,44 @@
1
1
  import re
2
- from collections.abc import Collection
2
+ import urllib.parse
3
+ from collections.abc import Collection, Set
4
+ from re import Pattern
3
5
  from typing import Any
4
6
 
7
+ from cognite.neat._rules._constants import get_reserved_words
8
+
9
+ PREPOSITIONS = frozenset(
10
+ {
11
+ "in",
12
+ "on",
13
+ "at",
14
+ "by",
15
+ "for",
16
+ "with",
17
+ "about",
18
+ "against",
19
+ "between",
20
+ "into",
21
+ "through",
22
+ "during",
23
+ "before",
24
+ "after",
25
+ "above",
26
+ "below",
27
+ "to",
28
+ "from",
29
+ "up",
30
+ "down",
31
+ "out",
32
+ "off",
33
+ "over",
34
+ "under",
35
+ "again",
36
+ "further",
37
+ "then",
38
+ "once",
39
+ }
40
+ )
41
+
5
42
 
6
43
  def to_camel_case(string: str) -> str:
7
44
  """Convert snake_case_name to camelCaseName.
@@ -127,6 +164,18 @@ def to_snake_case(string: str) -> str:
127
164
  return "_".join(map(str.lower, words))
128
165
 
129
166
 
167
+ def to_words(string: str) -> str:
168
+ """Converts snake_case camelCase or PascalCase to words."""
169
+ return to_snake_case(string).replace("_", " ")
170
+
171
+
172
+ def title(text: str, skip_words: Set[str] = PREPOSITIONS) -> str:
173
+ """Converts text to title case, skipping prepositions."""
174
+ words = (word.lower() for word in text.split())
175
+ titled_words = (word.capitalize() if word not in skip_words else word for word in words)
176
+ return " ".join(titled_words)
177
+
178
+
130
179
  def sentence_or_string_to_camel(string: str) -> str:
131
180
  # Could be a combination of kebab and pascal/camel case
132
181
  if " " in string:
@@ -159,7 +208,8 @@ def humanize_collection(collection: Collection[Any], /, *, sort: bool = True) ->
159
208
 
160
209
 
161
210
  class NamingStandardization:
162
- _clean_pattern = re.compile(r"[^a-zA-Z0-9_]+")
211
+ _letter_number_underscore = re.compile(r"[^a-zA-Z0-9_]+")
212
+ _letter_number_underscore_hyphen = re.compile(r"[^a-zA-Z0-9_-]+")
163
213
  _multi_underscore_pattern = re.compile(r"_+")
164
214
  _start_letter_pattern = re.compile(r"^[a-zA-Z]")
165
215
 
@@ -182,6 +232,24 @@ class NamingStandardization:
182
232
  return to_camel_case(clean)
183
233
 
184
234
  @classmethod
185
- def _clean_string(cls, raw: str) -> str:
186
- raw = cls._clean_pattern.sub("_", raw)
235
+ def standardize_space_str(cls, raw: str) -> str:
236
+ clean = cls._clean_string(raw, cls._letter_number_underscore_hyphen)
237
+ if not cls._start_letter_pattern.match(clean):
238
+ clean = f"sp_{clean}"
239
+ if clean in set(get_reserved_words("space")):
240
+ clean = f"my_{clean}"
241
+ if len(clean) > 43:
242
+ clean = clean[:43]
243
+ if not (clean[-1].isalnum()) and len(clean) == 43:
244
+ clean = f"{clean[:-1]}x"
245
+ elif not clean[-1].isalnum():
246
+ clean = f"{clean}x"
247
+ if not clean:
248
+ raise ValueError("Space name must contain at least one letter.")
249
+ return to_snake_case(clean)
250
+
251
+ @classmethod
252
+ def _clean_string(cls, raw: str, clean_pattern: Pattern[str] = _letter_number_underscore) -> str:
253
+ raw = urllib.parse.unquote(raw)
254
+ raw = clean_pattern.sub("_", raw)
187
255
  return cls._multi_underscore_pattern.sub("_", raw)
@@ -55,6 +55,7 @@ class UploadResult(UploadResultCore, Generic[T_ID]):
55
55
  failed_upserted: set[T_ID] = field(default_factory=set)
56
56
  failed_changed: set[T_ID] = field(default_factory=set)
57
57
  failed_deleted: set[T_ID] = field(default_factory=set)
58
+ failed_items: list = field(default_factory=list)
58
59
 
59
60
  @property
60
61
  def failed(self) -> int:
@@ -129,4 +130,5 @@ class UploadResult(UploadResultCore, Generic[T_ID]):
129
130
  failed_upserted=self.failed_upserted.union(other.failed_upserted),
130
131
  failed_changed=self.failed_changed.union(other.failed_changed),
131
132
  failed_deleted=self.failed_deleted.union(other.failed_deleted),
133
+ failed_items=self.failed_items + other.failed_items,
132
134
  )
cognite/neat/_version.py CHANGED
@@ -1,2 +1,2 @@
1
- __version__ = "0.110.0"
2
- __engine__ = "^2.0.3"
1
+ __version__ = "0.111.1"
2
+ __engine__ = "^2.0.4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: cognite-neat
3
- Version: 0.110.0
3
+ Version: 0.111.1
4
4
  Summary: Knowledge graph transformation
5
5
  License: Apache-2.0
6
6
  Author: Nikola Vasiljevic