cognite-neat 0.110.0__py3-none-any.whl → 0.111.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_alpha.py +6 -0
- cognite/neat/_client/_api/schema.py +26 -0
- cognite/neat/_client/data_classes/schema.py +1 -1
- cognite/neat/_constants.py +4 -1
- cognite/neat/_graph/extractors/__init__.py +4 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +8 -16
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +39 -9
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +23 -17
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +15 -17
- cognite/neat/_graph/extractors/_dict.py +102 -0
- cognite/neat/_graph/extractors/_dms.py +27 -40
- cognite/neat/_graph/extractors/_dms_graph.py +30 -3
- cognite/neat/_graph/extractors/_raw.py +67 -0
- cognite/neat/_graph/loaders/_base.py +20 -4
- cognite/neat/_graph/loaders/_rdf2dms.py +243 -89
- cognite/neat/_graph/queries/_base.py +137 -43
- cognite/neat/_graph/transformers/_classic_cdf.py +6 -22
- cognite/neat/_issues/_factory.py +9 -1
- cognite/neat/_issues/errors/__init__.py +2 -0
- cognite/neat/_issues/errors/_external.py +7 -0
- cognite/neat/_issues/warnings/user_modeling.py +12 -0
- cognite/neat/_rules/_constants.py +3 -0
- cognite/neat/_rules/analysis/_base.py +29 -50
- cognite/neat/_rules/exporters/_rules2excel.py +1 -1
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +16 -10
- cognite/neat/_rules/models/_base_rules.py +0 -2
- cognite/neat/_rules/models/data_types.py +7 -0
- cognite/neat/_rules/models/dms/_exporter.py +9 -8
- cognite/neat/_rules/models/dms/_rules.py +26 -1
- cognite/neat/_rules/models/dms/_rules_input.py +5 -1
- cognite/neat/_rules/models/dms/_validation.py +101 -1
- cognite/neat/_rules/models/entities/_single_value.py +8 -3
- cognite/neat/_rules/models/entities/_wrapped.py +2 -2
- cognite/neat/_rules/models/information/_rules_input.py +1 -0
- cognite/neat/_rules/models/information/_validation.py +64 -17
- cognite/neat/_rules/transformers/_converters.py +7 -2
- cognite/neat/_session/_base.py +2 -0
- cognite/neat/_session/_explore.py +39 -0
- cognite/neat/_session/_inspect.py +25 -6
- cognite/neat/_session/_read.py +67 -3
- cognite/neat/_session/_set.py +7 -1
- cognite/neat/_session/_state.py +6 -0
- cognite/neat/_session/_to.py +115 -8
- cognite/neat/_store/_graph_store.py +8 -4
- cognite/neat/_utils/rdf_.py +34 -3
- cognite/neat/_utils/text.py +72 -4
- cognite/neat/_utils/upload.py +2 -0
- cognite/neat/_version.py +2 -2
- {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/METADATA +1 -1
- {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/RECORD +53 -50
- {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/LICENSE +0 -0
- {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/WHEEL +0 -0
- {cognite_neat-0.110.0.dist-info → cognite_neat-0.111.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import difflib
|
|
2
|
-
from collections.abc import Callable
|
|
2
|
+
from collections.abc import Callable, Set
|
|
3
3
|
from typing import Literal, overload
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
@@ -85,11 +85,13 @@ class InspectIssues:
|
|
|
85
85
|
|
|
86
86
|
def __init__(self, state: SessionState) -> None:
|
|
87
87
|
self._state = state
|
|
88
|
+
self._max_display = 50
|
|
88
89
|
|
|
89
90
|
@overload
|
|
90
91
|
def __call__(
|
|
91
92
|
self,
|
|
92
93
|
search: str | None = None,
|
|
94
|
+
include: Literal["all", "errors", "warning"] | Set[Literal["all", "errors", "warning"]] = "all",
|
|
93
95
|
return_dataframe: Literal[True] = (False if IN_NOTEBOOK else True), # type: ignore[assignment]
|
|
94
96
|
) -> pd.DataFrame: ...
|
|
95
97
|
|
|
@@ -97,12 +99,14 @@ class InspectIssues:
|
|
|
97
99
|
def __call__(
|
|
98
100
|
self,
|
|
99
101
|
search: str | None = None,
|
|
102
|
+
include: Literal["all", "errors", "warning"] | Set[Literal["all", "errors", "warning"]] = "all",
|
|
100
103
|
return_dataframe: Literal[False] = (False if IN_NOTEBOOK else True), # type: ignore[assignment]
|
|
101
104
|
) -> None: ...
|
|
102
105
|
|
|
103
106
|
def __call__(
|
|
104
107
|
self,
|
|
105
108
|
search: str | None = None,
|
|
109
|
+
include: Literal["all", "errors", "warning"] | Set[Literal["all", "errors", "warning"]] = "all",
|
|
106
110
|
return_dataframe: bool = (False if IN_NOTEBOOK else True), # type: ignore[assignment]
|
|
107
111
|
) -> pd.DataFrame | None:
|
|
108
112
|
"""Returns the issues of the current data model."""
|
|
@@ -113,6 +117,13 @@ class InspectIssues:
|
|
|
113
117
|
elif issues is None:
|
|
114
118
|
self._print("No issues found.")
|
|
115
119
|
return pd.DataFrame() if return_dataframe else None
|
|
120
|
+
include_set = {include} if isinstance(include, str) else include
|
|
121
|
+
if "all" in include_set:
|
|
122
|
+
include_set = {"errors", "warning"}
|
|
123
|
+
if "warning" not in include_set:
|
|
124
|
+
issues = issues.errors
|
|
125
|
+
if "errors" not in include_set:
|
|
126
|
+
issues = issues.warnings
|
|
116
127
|
|
|
117
128
|
if issues and search is not None:
|
|
118
129
|
unique_types = {type(issue).__name__ for issue in issues}
|
|
@@ -120,18 +131,21 @@ class InspectIssues:
|
|
|
120
131
|
issues = IssueList([issue for issue in issues if type(issue).__name__ in closest_match])
|
|
121
132
|
|
|
122
133
|
issue_str = "\n".join(
|
|
123
|
-
[
|
|
134
|
+
[
|
|
135
|
+
f" * **{type(issue).__name__}**: {issue.as_message(include_type=False)}"
|
|
136
|
+
for issue in issues[: self._max_display]
|
|
137
|
+
]
|
|
138
|
+
+ ([] if len(issues) <= 50 else [f" * ... {len(issues) - self._max_display} more"])
|
|
124
139
|
)
|
|
125
140
|
markdown_str = f"### {len(issues)} issues found\n\n{issue_str}"
|
|
126
|
-
|
|
127
141
|
if IN_NOTEBOOK:
|
|
128
142
|
from IPython.display import Markdown, display
|
|
129
143
|
|
|
130
144
|
display(Markdown(markdown_str))
|
|
131
145
|
elif RICH_AVAILABLE:
|
|
132
|
-
from rich import print
|
|
146
|
+
from rich import print as rprint
|
|
133
147
|
|
|
134
|
-
|
|
148
|
+
rprint(RichMarkdown(markdown_str))
|
|
135
149
|
|
|
136
150
|
if return_dataframe:
|
|
137
151
|
return issues.to_pandas()
|
|
@@ -170,6 +184,7 @@ class InspectOutcome:
|
|
|
170
184
|
class InspectUploadOutcome:
|
|
171
185
|
def __init__(self, get_last_outcome: Callable[[], UploadResultList]) -> None:
|
|
172
186
|
self._get_last_outcome = get_last_outcome
|
|
187
|
+
self._max_display = 50
|
|
173
188
|
|
|
174
189
|
@staticmethod
|
|
175
190
|
def _as_set(value: str | list[str] | None) -> set[str] | None:
|
|
@@ -223,7 +238,7 @@ class InspectUploadOutcome:
|
|
|
223
238
|
from IPython.display import Markdown, display
|
|
224
239
|
|
|
225
240
|
lines: list[str] = []
|
|
226
|
-
for item in outcome:
|
|
241
|
+
for line_no, item in enumerate(outcome):
|
|
227
242
|
lines.append(f"### {item.name}")
|
|
228
243
|
if unique_errors := set(item.error_messages):
|
|
229
244
|
lines.append("#### Errors")
|
|
@@ -255,6 +270,10 @@ class InspectUploadOutcome:
|
|
|
255
270
|
else:
|
|
256
271
|
lines.append(f" * {value}")
|
|
257
272
|
|
|
273
|
+
if line_no >= self._max_display:
|
|
274
|
+
lines.append(f"### ... {len(outcome) - self._max_display} more")
|
|
275
|
+
break
|
|
276
|
+
|
|
258
277
|
display(Markdown("\n".join(lines)))
|
|
259
278
|
|
|
260
279
|
if return_dataframe:
|
cognite/neat/_session/_read.py
CHANGED
|
@@ -153,6 +153,45 @@ class CDFReadAPI(BaseReadAPI):
|
|
|
153
153
|
)
|
|
154
154
|
return self._state.write_graph(extractor)
|
|
155
155
|
|
|
156
|
+
def raw(
|
|
157
|
+
self,
|
|
158
|
+
db_name: str,
|
|
159
|
+
table_name: str,
|
|
160
|
+
type: str | None = None,
|
|
161
|
+
foreign_keys: str | SequenceNotStr[str] | None = None,
|
|
162
|
+
unpack_json: bool = False,
|
|
163
|
+
str_to_ideal_type: bool = False,
|
|
164
|
+
) -> IssueList:
|
|
165
|
+
"""Reads a raw table from CDF to the knowledge graph.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
db_name: The name of the database
|
|
169
|
+
table_name: The name of the table, this will be assumed to be the type of the instances.
|
|
170
|
+
type: The type of instances in the table. If None, the table name will be used.
|
|
171
|
+
foreign_keys: The name of the columns that are foreign keys. If None, no foreign keys are used.
|
|
172
|
+
unpack_json: If True, the JSON objects will be unpacked into the graph.
|
|
173
|
+
str_to_ideal_type: If True, the string values will be converted to ideal types.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
IssueList: A list of issues that occurred during the extraction.
|
|
177
|
+
|
|
178
|
+
Example:
|
|
179
|
+
```python
|
|
180
|
+
neat.read.cdf.raw("my_db", "my_table", "Asset")
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
"""
|
|
184
|
+
extractor = extractors.RAWExtractor(
|
|
185
|
+
self._get_client,
|
|
186
|
+
db_name=db_name,
|
|
187
|
+
table_name=table_name,
|
|
188
|
+
table_type=type,
|
|
189
|
+
foreign_keys=foreign_keys,
|
|
190
|
+
unpack_json=unpack_json,
|
|
191
|
+
str_to_ideal_type=str_to_ideal_type,
|
|
192
|
+
)
|
|
193
|
+
return self._state.instances.store.write(extractor)
|
|
194
|
+
|
|
156
195
|
|
|
157
196
|
@session_class_wrapper
|
|
158
197
|
class CDFClassicAPI(BaseReadAPI):
|
|
@@ -229,6 +268,8 @@ class CDFClassicAPI(BaseReadAPI):
|
|
|
229
268
|
identifier: Literal["id", "externalId"] = "id",
|
|
230
269
|
reference_timeseries: bool = False,
|
|
231
270
|
reference_files: bool = False,
|
|
271
|
+
unpack_metadata: bool = False,
|
|
272
|
+
skip_sequence_rows: bool = False,
|
|
232
273
|
) -> IssueList:
|
|
233
274
|
namespace = CLASSIC_CDF_NAMESPACE
|
|
234
275
|
extractor = extractors.ClassicGraphExtractor(
|
|
@@ -238,7 +279,11 @@ class CDFClassicAPI(BaseReadAPI):
|
|
|
238
279
|
namespace=namespace,
|
|
239
280
|
prefix="Classic",
|
|
240
281
|
identifier=identifier,
|
|
282
|
+
unpack_metadata=unpack_metadata,
|
|
283
|
+
skip_sequence_rows=skip_sequence_rows,
|
|
241
284
|
)
|
|
285
|
+
self._state.instances.neat_prefix_by_predicate_uri.update(extractor.neat_prefix_by_predicate_uri)
|
|
286
|
+
self._state.instances.neat_prefix_by_type_uri.update(extractor.neat_prefix_by_type_uri)
|
|
242
287
|
extract_issues = self._state.write_graph(extractor)
|
|
243
288
|
if identifier == "externalId":
|
|
244
289
|
self._state.quoted_source_identifiers = True
|
|
@@ -361,6 +406,9 @@ class CSVReadAPI(BaseReadAPI):
|
|
|
361
406
|
"""
|
|
362
407
|
|
|
363
408
|
def __call__(self, io: Any, type: str, primary_key: str) -> None:
|
|
409
|
+
warnings.filterwarnings("default")
|
|
410
|
+
AlphaFlags.csv_read.warn()
|
|
411
|
+
|
|
364
412
|
engine = import_engine()
|
|
365
413
|
engine.set.format = "csv"
|
|
366
414
|
engine.set.file = NeatReader.create(io).materialize_path()
|
|
@@ -416,6 +464,9 @@ class XMLReadAPI(BaseReadAPI):
|
|
|
416
464
|
- remove associations between nodes that do not exist in the extracted graph
|
|
417
465
|
- remove edges to nodes that do not exist in the extracted graph
|
|
418
466
|
"""
|
|
467
|
+
warnings.filterwarnings("default")
|
|
468
|
+
AlphaFlags.dexpi_read.warn()
|
|
469
|
+
|
|
419
470
|
path = NeatReader.create(io).materialize_path()
|
|
420
471
|
engine = import_engine()
|
|
421
472
|
engine.set.format = "dexpi"
|
|
@@ -467,6 +518,9 @@ class XMLReadAPI(BaseReadAPI):
|
|
|
467
518
|
- remove unused attributes
|
|
468
519
|
- remove edges to nodes that do not exist in the extracted graph
|
|
469
520
|
"""
|
|
521
|
+
warnings.filterwarnings("default")
|
|
522
|
+
AlphaFlags.aml_read.warn()
|
|
523
|
+
|
|
470
524
|
path = NeatReader.create(io).materialize_path()
|
|
471
525
|
engine = import_engine()
|
|
472
526
|
engine.set.format = "aml"
|
|
@@ -518,6 +572,9 @@ class RDFReadAPI(BaseReadAPI):
|
|
|
518
572
|
neat.read.rdf.ontology("url_or_path_to_owl_source")
|
|
519
573
|
```
|
|
520
574
|
"""
|
|
575
|
+
warnings.filterwarnings("default")
|
|
576
|
+
AlphaFlags.ontology_read.warn()
|
|
577
|
+
|
|
521
578
|
reader = NeatReader.create(io)
|
|
522
579
|
importer = importers.OWLImporter.from_file(reader.materialize_path(), source_name=f"file {reader!s}")
|
|
523
580
|
return self._state.rule_import(importer)
|
|
@@ -533,10 +590,18 @@ class RDFReadAPI(BaseReadAPI):
|
|
|
533
590
|
neat.read.rdf.imf("url_or_path_to_imf_source")
|
|
534
591
|
```
|
|
535
592
|
"""
|
|
593
|
+
warnings.filterwarnings("default")
|
|
594
|
+
AlphaFlags.imf_read.warn()
|
|
595
|
+
|
|
536
596
|
reader = NeatReader.create(io)
|
|
537
597
|
importer = importers.IMFImporter.from_file(reader.materialize_path(), source_name=f"file {reader!s}")
|
|
538
598
|
return self._state.rule_import(importer)
|
|
539
599
|
|
|
600
|
+
def instances(self, io: Any) -> IssueList:
|
|
601
|
+
reader = NeatReader.create(io)
|
|
602
|
+
self._state.instances.store.write(extractors.RdfFileExtractor(reader.materialize_path()))
|
|
603
|
+
return IssueList()
|
|
604
|
+
|
|
540
605
|
def __call__(
|
|
541
606
|
self,
|
|
542
607
|
io: Any,
|
|
@@ -560,9 +625,8 @@ class RDFReadAPI(BaseReadAPI):
|
|
|
560
625
|
raise ValueError(f"Expected ontology, imf types or instances, got {source}")
|
|
561
626
|
|
|
562
627
|
elif type == "instances":
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
return IssueList()
|
|
628
|
+
return self.instances(io)
|
|
629
|
+
|
|
566
630
|
else:
|
|
567
631
|
raise NeatSessionError(f"Expected data model or instances, got {type}")
|
|
568
632
|
|
cognite/neat/_session/_set.py
CHANGED
|
@@ -23,14 +23,20 @@ class SetAPI:
|
|
|
23
23
|
self._verbose = verbose
|
|
24
24
|
self.instances = SetInstances(state, verbose)
|
|
25
25
|
|
|
26
|
-
def data_model_id(self, new_model_id: dm.DataModelId | tuple[str, str, str]) -> IssueList:
|
|
26
|
+
def data_model_id(self, new_model_id: dm.DataModelId | tuple[str, str, str], name: str | None = None) -> IssueList:
|
|
27
27
|
"""Sets the data model ID of the latest verified data model. Set the data model id as a tuple of strings
|
|
28
28
|
following the template (<data_model_space>, <data_model_name>, <data_model_version>).
|
|
29
29
|
|
|
30
|
+
Args:
|
|
31
|
+
new_model_id (dm.DataModelId | tuple[str, str, str]): The new data model id.
|
|
32
|
+
name (str, optional): The display name of the data model. If not set, the external ID will be used
|
|
33
|
+
to generate the name.
|
|
34
|
+
|
|
30
35
|
Example:
|
|
31
36
|
Set a new data model id:
|
|
32
37
|
```python
|
|
33
38
|
neat.set.data_model_id(("my_data_model_space", "My_Data_Model", "v1"))
|
|
39
|
+
neat.set.data_model_id(("my_data_model_space", "MyDataModel", "v1"), name="My Data Model")
|
|
34
40
|
```
|
|
35
41
|
"""
|
|
36
42
|
if self._state.rule_store.empty:
|
cognite/neat/_session/_state.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
from typing import Literal, cast
|
|
3
3
|
|
|
4
|
+
from rdflib import URIRef
|
|
5
|
+
|
|
4
6
|
from cognite.neat._client import NeatClient
|
|
5
7
|
from cognite.neat._graph.extractors import KnowledgeGraphExtractor
|
|
6
8
|
from cognite.neat._issues import IssueList
|
|
@@ -74,6 +76,10 @@ class InstancesState:
|
|
|
74
76
|
self.storage_path = storage_path
|
|
75
77
|
self.issue_lists = IssueList()
|
|
76
78
|
self.outcome = UploadResultList()
|
|
79
|
+
# These contain prefixes added by Neat at the extraction stage.
|
|
80
|
+
# We store them such that they can be removed in the load stage.
|
|
81
|
+
self.neat_prefix_by_predicate_uri: dict[URIRef, str] = {}
|
|
82
|
+
self.neat_prefix_by_type_uri: dict[URIRef, str] = {}
|
|
77
83
|
|
|
78
84
|
# Ensure that error handling is done in the constructor
|
|
79
85
|
self.store: NeatGraphStore = _session_method_wrapper(self._create_store, "NeatSession")()
|
cognite/neat/_session/_to.py
CHANGED
|
@@ -10,7 +10,7 @@ from cognite.client.data_classes.data_modeling import DataModelIdentifier
|
|
|
10
10
|
from cognite.neat._alpha import AlphaFlags
|
|
11
11
|
from cognite.neat._constants import COGNITE_MODELS
|
|
12
12
|
from cognite.neat._graph import loaders
|
|
13
|
-
from cognite.neat._issues import IssueList, catch_issues
|
|
13
|
+
from cognite.neat._issues import IssueList, NeatIssue, catch_issues
|
|
14
14
|
from cognite.neat._rules import exporters
|
|
15
15
|
from cognite.neat._rules._constants import PATTERNS
|
|
16
16
|
from cognite.neat._rules._shared import VerifiedRules
|
|
@@ -35,6 +35,32 @@ class ToAPI:
|
|
|
35
35
|
self._state = state
|
|
36
36
|
self._verbose = verbose
|
|
37
37
|
self.cdf = CDFToAPI(state, verbose)
|
|
38
|
+
self._python = ToPythonAPI(state, verbose)
|
|
39
|
+
|
|
40
|
+
def ontology(self, io: Any) -> None:
|
|
41
|
+
"""Export the data model to ontology.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
io: The file path to file-like object to write the session to.
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
Export the session to a file
|
|
48
|
+
```python
|
|
49
|
+
ontology_file_name = "neat_session.ttl"
|
|
50
|
+
neat.to.ontology(ontology_file_name)
|
|
51
|
+
```
|
|
52
|
+
"""
|
|
53
|
+
warnings.filterwarnings("default")
|
|
54
|
+
AlphaFlags.to_ontology.warn()
|
|
55
|
+
|
|
56
|
+
filepath = Path(io)
|
|
57
|
+
if filepath.suffix != ".ttl":
|
|
58
|
+
warnings.warn("File extension is not .ttl, adding it to the file name", stacklevel=2)
|
|
59
|
+
filepath = filepath.with_suffix(".ttl")
|
|
60
|
+
|
|
61
|
+
exporter = exporters.OWLExporter()
|
|
62
|
+
self._state.rule_store.export_to_file(exporter, Path(io))
|
|
63
|
+
return None
|
|
38
64
|
|
|
39
65
|
def excel(
|
|
40
66
|
self,
|
|
@@ -209,6 +235,7 @@ class ToAPI:
|
|
|
209
235
|
neat.to.yaml(your_folder_name, format="toolkit")
|
|
210
236
|
```
|
|
211
237
|
"""
|
|
238
|
+
|
|
212
239
|
if format == "neat":
|
|
213
240
|
exporter = exporters.YAMLExporter()
|
|
214
241
|
if io is None:
|
|
@@ -270,28 +297,41 @@ class CDFToAPI:
|
|
|
270
297
|
```
|
|
271
298
|
|
|
272
299
|
"""
|
|
300
|
+
return self._instances(instance_space=space, space_from_property=space_property)
|
|
301
|
+
|
|
302
|
+
def _instances(
|
|
303
|
+
self,
|
|
304
|
+
instance_space: str | None = None,
|
|
305
|
+
space_from_property: str | None = None,
|
|
306
|
+
use_source_space: bool = False,
|
|
307
|
+
) -> UploadResultList:
|
|
273
308
|
if not self._state.client:
|
|
274
309
|
raise NeatSessionError("No CDF client provided!")
|
|
275
310
|
client = self._state.client
|
|
276
|
-
|
|
311
|
+
dms_rules = self._state.rule_store.last_verified_dms_rules
|
|
312
|
+
instance_space = instance_space or f"{dms_rules.metadata.space}_instances"
|
|
277
313
|
|
|
278
|
-
if
|
|
314
|
+
if instance_space and instance_space == dms_rules.metadata.space:
|
|
279
315
|
raise NeatSessionError("Space for instances must be different from the data model space.")
|
|
280
|
-
elif not PATTERNS.space_compliance.match(str(
|
|
316
|
+
elif not PATTERNS.space_compliance.match(str(instance_space)):
|
|
281
317
|
raise NeatSessionError("Please provide a valid space name. {PATTERNS.space_compliance.pattern}")
|
|
282
318
|
|
|
283
|
-
if not client.data_modeling.spaces.retrieve(
|
|
284
|
-
client.data_modeling.spaces.apply(dm.SpaceApply(space=
|
|
319
|
+
if not client.data_modeling.spaces.retrieve(instance_space):
|
|
320
|
+
client.data_modeling.spaces.apply(dm.SpaceApply(space=instance_space))
|
|
285
321
|
|
|
286
322
|
loader = loaders.DMSLoader(
|
|
287
323
|
self._state.rule_store.last_verified_dms_rules,
|
|
288
324
|
self._state.rule_store.last_verified_information_rules,
|
|
289
325
|
self._state.instances.store,
|
|
290
|
-
instance_space=
|
|
326
|
+
instance_space=instance_space,
|
|
291
327
|
client=client,
|
|
328
|
+
space_property=space_from_property,
|
|
329
|
+
use_source_space=use_source_space,
|
|
292
330
|
# In case urllib.parse.quote() was run on the extraction, we need to run
|
|
293
331
|
# urllib.parse.unquote() on the load.
|
|
294
|
-
unquote_external_ids=
|
|
332
|
+
unquote_external_ids=True,
|
|
333
|
+
neat_prefix_by_predicate_uri=self._state.instances.neat_prefix_by_predicate_uri,
|
|
334
|
+
neat_prefix_by_type_uri=self._state.instances.neat_prefix_by_type_uri,
|
|
295
335
|
)
|
|
296
336
|
|
|
297
337
|
result = loader.load_into_cdf(client)
|
|
@@ -334,3 +374,70 @@ class CDFToAPI:
|
|
|
334
374
|
result = self._state.rule_store.export_to_cdf(exporter, self._state.client, dry_run)
|
|
335
375
|
print("You can inspect the details with the .inspect.outcome.data_model(...) method.")
|
|
336
376
|
return result
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
@session_class_wrapper
|
|
380
|
+
class ToPythonAPI:
|
|
381
|
+
"""API used to write the contents of a NeatSession to Python objects"""
|
|
382
|
+
|
|
383
|
+
def __init__(self, state: SessionState, verbose: bool) -> None:
|
|
384
|
+
self._state = state
|
|
385
|
+
self._verbose = verbose
|
|
386
|
+
|
|
387
|
+
def instances(
|
|
388
|
+
self,
|
|
389
|
+
instance_space: str | None = None,
|
|
390
|
+
space_from_property: str | None = None,
|
|
391
|
+
use_source_space: bool = False,
|
|
392
|
+
) -> tuple[list[dm.InstanceApply], IssueList]:
|
|
393
|
+
"""Export the verified DMS instances to Python objects.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
instance_space: The name of the instance space to use. Defaults to None.
|
|
397
|
+
space_from_property: This is an alternative to the 'instance_space' argument. If provided,
|
|
398
|
+
the space will be set to the value of the property with the given name for each instance.
|
|
399
|
+
If the property is not found, the 'instance_space' argument will be used. Defaults to None.
|
|
400
|
+
use_source_space: If True, the instance space will be set to the source space of the instance.
|
|
401
|
+
This is only relevant if the instances were extracted from CDF data models. Defaults to False.
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
list[dm.InstanceApply]: The instances as Python objects.
|
|
405
|
+
|
|
406
|
+
Example:
|
|
407
|
+
Export instances to Python objects
|
|
408
|
+
```python
|
|
409
|
+
instances = neat.to._python.instances()
|
|
410
|
+
```
|
|
411
|
+
|
|
412
|
+
Export instances to Python objects using the `dataSetId` property as the space
|
|
413
|
+
```python
|
|
414
|
+
instances = neat.to._python.instances(space_from_property="dataSetId")
|
|
415
|
+
```
|
|
416
|
+
"""
|
|
417
|
+
dms_rules = self._state.rule_store.last_verified_dms_rules
|
|
418
|
+
instance_space = instance_space or f"{dms_rules.metadata.space}_instances"
|
|
419
|
+
|
|
420
|
+
if instance_space and instance_space == dms_rules.metadata.space:
|
|
421
|
+
raise NeatSessionError("Space for instances must be different from the data model space.")
|
|
422
|
+
elif not PATTERNS.space_compliance.match(str(instance_space)):
|
|
423
|
+
raise NeatSessionError(f"Please provide a valid space name. {PATTERNS.space_compliance.pattern}")
|
|
424
|
+
|
|
425
|
+
loader = loaders.DMSLoader(
|
|
426
|
+
self._state.rule_store.last_verified_dms_rules,
|
|
427
|
+
self._state.rule_store.last_verified_information_rules,
|
|
428
|
+
self._state.instances.store,
|
|
429
|
+
instance_space=instance_space,
|
|
430
|
+
space_property=space_from_property,
|
|
431
|
+
use_source_space=use_source_space,
|
|
432
|
+
unquote_external_ids=True,
|
|
433
|
+
neat_prefix_by_predicate_uri=self._state.instances.neat_prefix_by_predicate_uri,
|
|
434
|
+
neat_prefix_by_type_uri=self._state.instances.neat_prefix_by_type_uri,
|
|
435
|
+
)
|
|
436
|
+
issue_list = IssueList()
|
|
437
|
+
instances: list[dm.InstanceApply] = []
|
|
438
|
+
for item in loader.load(stop_on_exception=False):
|
|
439
|
+
if isinstance(item, dm.InstanceApply):
|
|
440
|
+
instances.append(item)
|
|
441
|
+
elif isinstance(item, NeatIssue):
|
|
442
|
+
issue_list.append(item)
|
|
443
|
+
return instances, issue_list
|
|
@@ -3,7 +3,7 @@ import warnings
|
|
|
3
3
|
from collections.abc import Iterable
|
|
4
4
|
from datetime import datetime, timezone
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import cast, overload
|
|
6
|
+
from typing import Any, cast, overload
|
|
7
7
|
from zipfile import ZipExtFile
|
|
8
8
|
|
|
9
9
|
import pandas as pd
|
|
@@ -239,14 +239,18 @@ class NeatGraphStore:
|
|
|
239
239
|
class_uri: URIRef,
|
|
240
240
|
named_graph: URIRef | None = None,
|
|
241
241
|
property_renaming_config: dict[URIRef, str] | None = None,
|
|
242
|
-
|
|
242
|
+
remove_uri_namespace: bool = True,
|
|
243
|
+
) -> Iterable[tuple[URIRef, dict[str | InstanceType, list[Any]]]]:
|
|
243
244
|
named_graph = named_graph or self.default_named_graph
|
|
244
245
|
|
|
245
|
-
instance_ids = self.queries.
|
|
246
|
+
instance_ids = self.queries.list_instances_ids(class_uri, named_graph=named_graph)
|
|
246
247
|
|
|
247
248
|
for instance_id in instance_ids:
|
|
248
249
|
if res := self.queries.describe(
|
|
249
|
-
instance_id=instance_id,
|
|
250
|
+
instance_id=instance_id,
|
|
251
|
+
instance_type=class_uri,
|
|
252
|
+
property_renaming_config=property_renaming_config,
|
|
253
|
+
remove_uri_namespace=remove_uri_namespace,
|
|
250
254
|
):
|
|
251
255
|
yield res
|
|
252
256
|
|
cognite/neat/_utils/rdf_.py
CHANGED
|
@@ -7,6 +7,8 @@ from pydantic import HttpUrl, TypeAdapter, ValidationError
|
|
|
7
7
|
from rdflib import Graph, Namespace, URIRef
|
|
8
8
|
from rdflib import Literal as RdfLiteral
|
|
9
9
|
|
|
10
|
+
from cognite.neat._constants import SPACE_URI_PATTERN
|
|
11
|
+
|
|
10
12
|
Triple: TypeAlias = tuple[URIRef, URIRef, RdfLiteral | URIRef]
|
|
11
13
|
|
|
12
14
|
|
|
@@ -100,12 +102,41 @@ def get_namespace(URI: URIRef, special_separator: str = "#_") -> str:
|
|
|
100
102
|
str
|
|
101
103
|
Entity id without namespace
|
|
102
104
|
"""
|
|
105
|
+
return split_uri(URI, special_separator)[0]
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def namespace_as_space(namespace: str) -> str | None:
|
|
109
|
+
if match := SPACE_URI_PATTERN.match(namespace):
|
|
110
|
+
return match.group("space")
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def split_uri(URI: URIRef, special_separator: str = "#_") -> tuple[str, str]:
|
|
115
|
+
"""Splits URI into namespace and entity name
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
URI : URIRef
|
|
120
|
+
URI of an entity
|
|
121
|
+
special_separator : str
|
|
122
|
+
Special separator to use instead of # or / if present in URI
|
|
123
|
+
Set by default to "#_" which covers special client use case
|
|
124
|
+
|
|
125
|
+
Returns
|
|
126
|
+
-------
|
|
127
|
+
tuple[str, str]
|
|
128
|
+
Tuple of namespace and entity name
|
|
129
|
+
"""
|
|
103
130
|
if special_separator in URI:
|
|
104
|
-
|
|
131
|
+
namespace, rest = URI.split(special_separator, maxsplit=1)
|
|
132
|
+
namespace += special_separator
|
|
105
133
|
elif "#" in URI:
|
|
106
|
-
|
|
134
|
+
namespace, rest = URI.split("#", maxsplit=1)
|
|
135
|
+
namespace += "#"
|
|
107
136
|
else:
|
|
108
|
-
|
|
137
|
+
namespace, rest = URI.rsplit("/", maxsplit=1)
|
|
138
|
+
namespace += "/"
|
|
139
|
+
return namespace, rest
|
|
109
140
|
|
|
110
141
|
|
|
111
142
|
def as_neat_compliant_uri(uri: URIRef) -> URIRef:
|
cognite/neat/_utils/text.py
CHANGED
|
@@ -1,7 +1,44 @@
|
|
|
1
1
|
import re
|
|
2
|
-
|
|
2
|
+
import urllib.parse
|
|
3
|
+
from collections.abc import Collection, Set
|
|
4
|
+
from re import Pattern
|
|
3
5
|
from typing import Any
|
|
4
6
|
|
|
7
|
+
from cognite.neat._rules._constants import get_reserved_words
|
|
8
|
+
|
|
9
|
+
PREPOSITIONS = frozenset(
|
|
10
|
+
{
|
|
11
|
+
"in",
|
|
12
|
+
"on",
|
|
13
|
+
"at",
|
|
14
|
+
"by",
|
|
15
|
+
"for",
|
|
16
|
+
"with",
|
|
17
|
+
"about",
|
|
18
|
+
"against",
|
|
19
|
+
"between",
|
|
20
|
+
"into",
|
|
21
|
+
"through",
|
|
22
|
+
"during",
|
|
23
|
+
"before",
|
|
24
|
+
"after",
|
|
25
|
+
"above",
|
|
26
|
+
"below",
|
|
27
|
+
"to",
|
|
28
|
+
"from",
|
|
29
|
+
"up",
|
|
30
|
+
"down",
|
|
31
|
+
"out",
|
|
32
|
+
"off",
|
|
33
|
+
"over",
|
|
34
|
+
"under",
|
|
35
|
+
"again",
|
|
36
|
+
"further",
|
|
37
|
+
"then",
|
|
38
|
+
"once",
|
|
39
|
+
}
|
|
40
|
+
)
|
|
41
|
+
|
|
5
42
|
|
|
6
43
|
def to_camel_case(string: str) -> str:
|
|
7
44
|
"""Convert snake_case_name to camelCaseName.
|
|
@@ -127,6 +164,18 @@ def to_snake_case(string: str) -> str:
|
|
|
127
164
|
return "_".join(map(str.lower, words))
|
|
128
165
|
|
|
129
166
|
|
|
167
|
+
def to_words(string: str) -> str:
|
|
168
|
+
"""Converts snake_case camelCase or PascalCase to words."""
|
|
169
|
+
return to_snake_case(string).replace("_", " ")
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def title(text: str, skip_words: Set[str] = PREPOSITIONS) -> str:
|
|
173
|
+
"""Converts text to title case, skipping prepositions."""
|
|
174
|
+
words = (word.lower() for word in text.split())
|
|
175
|
+
titled_words = (word.capitalize() if word not in skip_words else word for word in words)
|
|
176
|
+
return " ".join(titled_words)
|
|
177
|
+
|
|
178
|
+
|
|
130
179
|
def sentence_or_string_to_camel(string: str) -> str:
|
|
131
180
|
# Could be a combination of kebab and pascal/camel case
|
|
132
181
|
if " " in string:
|
|
@@ -159,7 +208,8 @@ def humanize_collection(collection: Collection[Any], /, *, sort: bool = True) ->
|
|
|
159
208
|
|
|
160
209
|
|
|
161
210
|
class NamingStandardization:
|
|
162
|
-
|
|
211
|
+
_letter_number_underscore = re.compile(r"[^a-zA-Z0-9_]+")
|
|
212
|
+
_letter_number_underscore_hyphen = re.compile(r"[^a-zA-Z0-9_-]+")
|
|
163
213
|
_multi_underscore_pattern = re.compile(r"_+")
|
|
164
214
|
_start_letter_pattern = re.compile(r"^[a-zA-Z]")
|
|
165
215
|
|
|
@@ -182,6 +232,24 @@ class NamingStandardization:
|
|
|
182
232
|
return to_camel_case(clean)
|
|
183
233
|
|
|
184
234
|
@classmethod
|
|
185
|
-
def
|
|
186
|
-
|
|
235
|
+
def standardize_space_str(cls, raw: str) -> str:
|
|
236
|
+
clean = cls._clean_string(raw, cls._letter_number_underscore_hyphen)
|
|
237
|
+
if not cls._start_letter_pattern.match(clean):
|
|
238
|
+
clean = f"sp_{clean}"
|
|
239
|
+
if clean in set(get_reserved_words("space")):
|
|
240
|
+
clean = f"my_{clean}"
|
|
241
|
+
if len(clean) > 43:
|
|
242
|
+
clean = clean[:43]
|
|
243
|
+
if not (clean[-1].isalnum()) and len(clean) == 43:
|
|
244
|
+
clean = f"{clean[:-1]}x"
|
|
245
|
+
elif not clean[-1].isalnum():
|
|
246
|
+
clean = f"{clean}x"
|
|
247
|
+
if not clean:
|
|
248
|
+
raise ValueError("Space name must contain at least one letter.")
|
|
249
|
+
return to_snake_case(clean)
|
|
250
|
+
|
|
251
|
+
@classmethod
|
|
252
|
+
def _clean_string(cls, raw: str, clean_pattern: Pattern[str] = _letter_number_underscore) -> str:
|
|
253
|
+
raw = urllib.parse.unquote(raw)
|
|
254
|
+
raw = clean_pattern.sub("_", raw)
|
|
187
255
|
return cls._multi_underscore_pattern.sub("_", raw)
|
cognite/neat/_utils/upload.py
CHANGED
|
@@ -55,6 +55,7 @@ class UploadResult(UploadResultCore, Generic[T_ID]):
|
|
|
55
55
|
failed_upserted: set[T_ID] = field(default_factory=set)
|
|
56
56
|
failed_changed: set[T_ID] = field(default_factory=set)
|
|
57
57
|
failed_deleted: set[T_ID] = field(default_factory=set)
|
|
58
|
+
failed_items: list = field(default_factory=list)
|
|
58
59
|
|
|
59
60
|
@property
|
|
60
61
|
def failed(self) -> int:
|
|
@@ -129,4 +130,5 @@ class UploadResult(UploadResultCore, Generic[T_ID]):
|
|
|
129
130
|
failed_upserted=self.failed_upserted.union(other.failed_upserted),
|
|
130
131
|
failed_changed=self.failed_changed.union(other.failed_changed),
|
|
131
132
|
failed_deleted=self.failed_deleted.union(other.failed_deleted),
|
|
133
|
+
failed_items=self.failed_items + other.failed_items,
|
|
132
134
|
)
|
cognite/neat/_version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "0.
|
|
2
|
-
__engine__ = "^2.0.
|
|
1
|
+
__version__ = "0.111.1"
|
|
2
|
+
__engine__ = "^2.0.4"
|