cognite-neat 0.107.0__py3-none-any.whl → 0.108.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_constants.py +35 -1
- cognite/neat/_graph/_shared.py +4 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +115 -14
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +83 -6
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +48 -12
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +19 -1
- cognite/neat/_graph/extractors/_dms.py +162 -47
- cognite/neat/_graph/extractors/_dms_graph.py +54 -4
- cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
- cognite/neat/_graph/extractors/_rdf_file.py +3 -2
- cognite/neat/_graph/loaders/__init__.py +1 -3
- cognite/neat/_graph/loaders/_rdf2dms.py +20 -10
- cognite/neat/_graph/queries/_base.py +140 -84
- cognite/neat/_graph/queries/_construct.py +1 -1
- cognite/neat/_graph/transformers/__init__.py +3 -1
- cognite/neat/_graph/transformers/_value_type.py +54 -3
- cognite/neat/_issues/errors/_resources.py +1 -1
- cognite/neat/_issues/warnings/__init__.py +0 -2
- cognite/neat/_issues/warnings/_models.py +1 -1
- cognite/neat/_issues/warnings/_properties.py +0 -8
- cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
- cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
- cognite/neat/_rules/importers/__init__.py +3 -1
- cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
- cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
- cognite/neat/_rules/importers/_rdf/_base.py +2 -2
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +241 -18
- cognite/neat/_rules/models/_base_rules.py +13 -3
- cognite/neat/_rules/models/dms/_rules.py +1 -8
- cognite/neat/_rules/models/dms/_rules_input.py +4 -0
- cognite/neat/_rules/models/information/_rules_input.py +5 -0
- cognite/neat/_rules/transformers/__init__.py +6 -0
- cognite/neat/_rules/transformers/_converters.py +98 -7
- cognite/neat/_session/_base.py +55 -4
- cognite/neat/_session/_drop.py +5 -1
- cognite/neat/_session/_inspect.py +3 -2
- cognite/neat/_session/_read.py +61 -14
- cognite/neat/_session/_set.py +27 -0
- cognite/neat/_session/_show.py +4 -4
- cognite/neat/_session/_state.py +8 -4
- cognite/neat/_session/_to.py +4 -1
- cognite/neat/_session/_wizard.py +1 -1
- cognite/neat/_session/exceptions.py +2 -1
- cognite/neat/_store/_graph_store.py +287 -133
- cognite/neat/_store/_rules_store.py +108 -1
- cognite/neat/_utils/auth.py +1 -1
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/RECORD +52 -52
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/entry_points.txt +0 -0
|
@@ -302,8 +302,7 @@ class Interface(DTDLBase):
|
|
|
302
302
|
spec_version = frozenset(["2", "3"])
|
|
303
303
|
default_context: ClassVar[IRI] = Field(
|
|
304
304
|
"dtmi:dtdl:context;3",
|
|
305
|
-
description="This can be set directly on the class to change the "
|
|
306
|
-
"default context used when parsing a document.",
|
|
305
|
+
description="This can be set directly on the class to change the default context used when parsing a document.",
|
|
307
306
|
)
|
|
308
307
|
id_: DTMI = Field(alias="@id") # type: ignore[assignment]
|
|
309
308
|
context: IRI | None = Field(alias="@context")
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from ._imf2rules import IMFImporter
|
|
2
|
-
from ._inference2rules import InferenceImporter
|
|
2
|
+
from ._inference2rules import InferenceImporter, SubclassInferenceImporter
|
|
3
3
|
from ._owl2rules import OWLImporter
|
|
4
4
|
|
|
5
|
-
__all__ = ["IMFImporter", "InferenceImporter", "OWLImporter"]
|
|
5
|
+
__all__ = ["IMFImporter", "InferenceImporter", "OWLImporter", "SubclassInferenceImporter"]
|
|
@@ -72,7 +72,7 @@ class BaseRDFImporter(BaseImporter[InformationInputRules]):
|
|
|
72
72
|
):
|
|
73
73
|
return cls(
|
|
74
74
|
IssueList(title=f"{cls.__name__} issues"),
|
|
75
|
-
store.
|
|
75
|
+
store.dataset,
|
|
76
76
|
data_model_id=data_model_id,
|
|
77
77
|
max_number_of_instance=max_number_of_instance,
|
|
78
78
|
non_existing_node_type=non_existing_node_type,
|
|
@@ -140,7 +140,7 @@ class BaseRDFImporter(BaseImporter[InformationInputRules]):
|
|
|
140
140
|
prefixes: Dict of prefixes and namespaces
|
|
141
141
|
"""
|
|
142
142
|
if Namespace(get_namespace(URI)) not in prefixes.values():
|
|
143
|
-
prefixes[f"prefix_{len(prefixes)+1}"] = Namespace(get_namespace(URI))
|
|
143
|
+
prefixes[f"prefix_{len(prefixes) + 1}"] = Namespace(get_namespace(URI))
|
|
144
144
|
|
|
145
145
|
@property
|
|
146
146
|
def _metadata(self) -> dict:
|
|
@@ -1,18 +1,25 @@
|
|
|
1
|
+
import itertools
|
|
1
2
|
from collections import Counter, defaultdict
|
|
2
|
-
from collections.abc import Mapping
|
|
3
|
+
from collections.abc import Iterable, Mapping
|
|
4
|
+
from dataclasses import dataclass
|
|
3
5
|
from datetime import datetime, timezone
|
|
4
6
|
from pathlib import Path
|
|
5
|
-
from typing import ClassVar, cast
|
|
7
|
+
from typing import Any, ClassVar, cast
|
|
6
8
|
|
|
7
9
|
from cognite.client import data_modeling as dm
|
|
8
|
-
from rdflib import RDF, Namespace, URIRef
|
|
10
|
+
from rdflib import RDF, Graph, Namespace, URIRef
|
|
9
11
|
from rdflib import Literal as RdfLiteral
|
|
10
12
|
|
|
11
|
-
from cognite.neat.
|
|
12
|
-
from cognite.neat.
|
|
13
|
+
from cognite.neat._constants import NEAT
|
|
14
|
+
from cognite.neat._issues import IssueList
|
|
15
|
+
from cognite.neat._issues.warnings import PropertyValueTypeUndefinedWarning
|
|
16
|
+
from cognite.neat._rules.analysis import InformationAnalysis
|
|
17
|
+
from cognite.neat._rules.models import InformationRules, data_types
|
|
13
18
|
from cognite.neat._rules.models.data_types import AnyURI
|
|
14
19
|
from cognite.neat._rules.models.entities._single_value import UnknownEntity
|
|
15
20
|
from cognite.neat._rules.models.information import (
|
|
21
|
+
InformationInputClass,
|
|
22
|
+
InformationInputProperty,
|
|
16
23
|
InformationMetadata,
|
|
17
24
|
)
|
|
18
25
|
from cognite.neat._store import NeatGraphStore
|
|
@@ -157,7 +164,7 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
157
164
|
for class_uri, no_instances in self.graph.query(ORDERED_CLASSES_QUERY): # type: ignore[misc]
|
|
158
165
|
if (class_id := remove_namespace_from_uri(cast(URIRef, class_uri))) in classes:
|
|
159
166
|
# handles cases when class id is already present in classes
|
|
160
|
-
class_id = f"{class_id}_{len(classes)+1}"
|
|
167
|
+
class_id = f"{class_id}_{len(classes) + 1}"
|
|
161
168
|
|
|
162
169
|
classes[class_id] = {
|
|
163
170
|
"class_": class_id,
|
|
@@ -190,18 +197,6 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
190
197
|
if property_uri == RDF.type:
|
|
191
198
|
continue
|
|
192
199
|
property_id = remove_namespace_from_uri(property_uri)
|
|
193
|
-
if property_id in {"external_id", "externalId"}:
|
|
194
|
-
skip_issue = PropertySkippedWarning(
|
|
195
|
-
resource_type="Property",
|
|
196
|
-
identifier=f"{class_id}:{property_id}",
|
|
197
|
-
property_name=property_id,
|
|
198
|
-
reason="External ID is assumed to be the unique identifier of the instance "
|
|
199
|
-
"and is not part of the data model schema.",
|
|
200
|
-
)
|
|
201
|
-
if skip_issue not in self.issue_list:
|
|
202
|
-
self.issue_list.append(skip_issue)
|
|
203
|
-
continue
|
|
204
|
-
|
|
205
200
|
self._add_uri_namespace_to_prefixes(cast(URIRef, property_uri), prefixes)
|
|
206
201
|
|
|
207
202
|
if isinstance(data_type_uri, URIRef):
|
|
@@ -292,3 +287,231 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
292
287
|
@property
|
|
293
288
|
def source_uri(self) -> URIRef:
|
|
294
289
|
return INSTANCES_ENTITY.id_
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
# Internal helper class
|
|
293
|
+
@dataclass
|
|
294
|
+
class _ReadProperties:
|
|
295
|
+
class_uri: URIRef
|
|
296
|
+
subclass_uri: URIRef
|
|
297
|
+
property_uri: URIRef
|
|
298
|
+
data_type: URIRef | None
|
|
299
|
+
object_type: URIRef | None
|
|
300
|
+
max_occurrence: int
|
|
301
|
+
instance_count: int
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
class SubclassInferenceImporter(BaseRDFImporter):
|
|
305
|
+
"""Infer subclasses from a triple store.
|
|
306
|
+
|
|
307
|
+
Assumes that the graph already is connected to a schema. The classes should
|
|
308
|
+
match the RDF.type of the instances in the graph, while the subclasses should
|
|
309
|
+
match the NEAT.type of the instances in the graph.
|
|
310
|
+
|
|
311
|
+
ClassVars:
|
|
312
|
+
overwrite_data_types: Mapping of data types to be overwritten. The InferenceImporter will overwrite
|
|
313
|
+
32-bit integer and 32-bit float data types to 64-bit integer and 64-bit float data types
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
issue_list: Issue list to store issues
|
|
317
|
+
graph: Knowledge graph
|
|
318
|
+
max_number_of_instance: Maximum number of instances to be used in inference
|
|
319
|
+
"""
|
|
320
|
+
|
|
321
|
+
overwrite_data_types: ClassVar[Mapping[URIRef, URIRef]] = {
|
|
322
|
+
data_types.Integer.as_xml_uri_ref(): data_types.Long.as_xml_uri_ref(),
|
|
323
|
+
data_types.Float.as_xml_uri_ref(): data_types.Double.as_xml_uri_ref(),
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
def __init__(
|
|
327
|
+
self,
|
|
328
|
+
issue_list: IssueList,
|
|
329
|
+
graph: Graph,
|
|
330
|
+
rules: InformationRules,
|
|
331
|
+
max_number_of_instance: int,
|
|
332
|
+
non_existing_node_type: UnknownEntity | AnyURI = DEFAULT_NON_EXISTING_NODE_TYPE,
|
|
333
|
+
) -> None:
|
|
334
|
+
super().__init__(
|
|
335
|
+
issue_list,
|
|
336
|
+
graph,
|
|
337
|
+
rules.metadata.as_data_model_id().as_tuple(), # type: ignore[arg-type]
|
|
338
|
+
max_number_of_instance,
|
|
339
|
+
non_existing_node_type,
|
|
340
|
+
language="en",
|
|
341
|
+
)
|
|
342
|
+
self._rules = rules
|
|
343
|
+
|
|
344
|
+
_ordered_subclass_query = f"""SELECT DISTINCT ?class ?subclass (count(?s) as ?instances )
|
|
345
|
+
WHERE {{ ?s a ?class . ?s <{NEAT.type}> ?subclass }}
|
|
346
|
+
group by ?class ?subclass order by DESC(?instances)"""
|
|
347
|
+
|
|
348
|
+
_properties_query = """SELECT DISTINCT ?property ?dataType ?objectType
|
|
349
|
+
WHERE {{
|
|
350
|
+
?s a <{type}> .
|
|
351
|
+
?s <{neat_type}> <{subtype}> .
|
|
352
|
+
?s ?property ?value .
|
|
353
|
+
BIND(datatype(?value) AS ?dataType) .
|
|
354
|
+
OPTIONAL {{?value rdf:type ?objectType}}
|
|
355
|
+
}}"""
|
|
356
|
+
|
|
357
|
+
_max_occurrence_query = """SELECT (MAX(?count) AS ?maxCount)
|
|
358
|
+
WHERE {{
|
|
359
|
+
{{
|
|
360
|
+
SELECT ?subject (COUNT(?object) AS ?count)
|
|
361
|
+
WHERE {{
|
|
362
|
+
?subject a <{type}> .
|
|
363
|
+
?subject <{neat_type}> <{subtype}> .
|
|
364
|
+
?subject <{property}> ?object .
|
|
365
|
+
}}
|
|
366
|
+
GROUP BY ?subject
|
|
367
|
+
}}
|
|
368
|
+
}}"""
|
|
369
|
+
|
|
370
|
+
def _to_rules_components(
|
|
371
|
+
self,
|
|
372
|
+
) -> dict:
|
|
373
|
+
properties_by_class_subclass_pair = self._read_class_properties_from_graph()
|
|
374
|
+
existing_classes = {class_.class_.suffix: class_ for class_ in self._rules.classes}
|
|
375
|
+
prefixes = self._rules.prefixes.copy()
|
|
376
|
+
|
|
377
|
+
classes: list[InformationInputClass] = []
|
|
378
|
+
properties: list[InformationInputProperty] = []
|
|
379
|
+
# Help for IDE
|
|
380
|
+
subclass_uri: URIRef
|
|
381
|
+
for class_uri, class_properties_iterable in itertools.groupby(
|
|
382
|
+
properties_by_class_subclass_pair, key=lambda x: x.class_uri
|
|
383
|
+
):
|
|
384
|
+
properties_by_subclass_by_property = self._get_properties_by_subclass_by_property(class_properties_iterable)
|
|
385
|
+
|
|
386
|
+
shared_property_uris = set.intersection(
|
|
387
|
+
*[
|
|
388
|
+
set(properties_by_property.keys())
|
|
389
|
+
for properties_by_property in properties_by_subclass_by_property.values()
|
|
390
|
+
]
|
|
391
|
+
)
|
|
392
|
+
class_suffix = remove_namespace_from_uri(class_uri)
|
|
393
|
+
self._add_uri_namespace_to_prefixes(class_uri, prefixes)
|
|
394
|
+
if class_suffix not in existing_classes:
|
|
395
|
+
classes.append(InformationInputClass(class_=class_suffix))
|
|
396
|
+
else:
|
|
397
|
+
classes.append(InformationInputClass.load(existing_classes[class_suffix].model_dump()))
|
|
398
|
+
shared_properties: dict[URIRef, list[_ReadProperties]] = defaultdict(list)
|
|
399
|
+
for subclass_uri, properties_by_property_uri in properties_by_subclass_by_property.items():
|
|
400
|
+
subclass_suffix = remove_namespace_from_uri(subclass_uri)
|
|
401
|
+
self._add_uri_namespace_to_prefixes(subclass_uri, prefixes)
|
|
402
|
+
if subclass_suffix not in existing_classes:
|
|
403
|
+
classes.append(InformationInputClass(class_=subclass_suffix, implements=class_suffix))
|
|
404
|
+
else:
|
|
405
|
+
classes.append(InformationInputClass.load(existing_classes[subclass_suffix].model_dump()))
|
|
406
|
+
for property_uri, read_properties in properties_by_property_uri.items():
|
|
407
|
+
if property_uri in shared_property_uris:
|
|
408
|
+
shared_properties[property_uri].extend(read_properties)
|
|
409
|
+
continue
|
|
410
|
+
properties.append(
|
|
411
|
+
self._create_property(read_properties, subclass_suffix, class_uri, property_uri, prefixes)
|
|
412
|
+
)
|
|
413
|
+
for property_uri, read_properties in shared_properties.items():
|
|
414
|
+
properties.append(
|
|
415
|
+
self._create_property(read_properties, class_suffix, class_uri, property_uri, prefixes)
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
return {
|
|
419
|
+
"metadata": self._rules.metadata.model_dump(),
|
|
420
|
+
"classes": [cls.dump(self._rules.metadata.prefix) for cls in classes],
|
|
421
|
+
"properties": [prop.dump(self._rules.metadata.prefix) for prop in properties],
|
|
422
|
+
"prefixes": self._rules.prefixes,
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
@staticmethod
|
|
426
|
+
def _get_properties_by_subclass_by_property(
|
|
427
|
+
class_properties_iterable: Iterable[_ReadProperties],
|
|
428
|
+
) -> dict[URIRef, dict[URIRef, list[_ReadProperties]]]:
|
|
429
|
+
properties_by_subclass_by_property: dict[URIRef, dict[URIRef, list[_ReadProperties]]] = {}
|
|
430
|
+
for subclass_uri, subclass_properties_iterable in itertools.groupby(
|
|
431
|
+
class_properties_iterable, key=lambda x: x.subclass_uri
|
|
432
|
+
):
|
|
433
|
+
properties_by_subclass_by_property[subclass_uri] = defaultdict(list)
|
|
434
|
+
for read_prop in subclass_properties_iterable:
|
|
435
|
+
properties_by_subclass_by_property[subclass_uri][read_prop.property_uri].append(read_prop)
|
|
436
|
+
return properties_by_subclass_by_property
|
|
437
|
+
|
|
438
|
+
def _read_class_properties_from_graph(self) -> list[_ReadProperties]:
|
|
439
|
+
count_by_class_subclass_pair: dict[tuple[URIRef, URIRef], int] = {}
|
|
440
|
+
# Infers all the classes w in the graph
|
|
441
|
+
for result_row in self.graph.query(self._ordered_subclass_query):
|
|
442
|
+
class_uri, subclass_uri, instance_count_literal = cast(tuple[URIRef, URIRef, RdfLiteral], result_row)
|
|
443
|
+
count_by_class_subclass_pair[(class_uri, subclass_uri)] = instance_count_literal.toPython()
|
|
444
|
+
analysis = InformationAnalysis(self._rules)
|
|
445
|
+
existing_class_properties = {
|
|
446
|
+
(class_entity.suffix, property)
|
|
447
|
+
for class_entity, properties in analysis.class_property_pairs(consider_inheritance=True).items()
|
|
448
|
+
for property in properties.keys()
|
|
449
|
+
}
|
|
450
|
+
properties_by_class_by_subclass: list[_ReadProperties] = []
|
|
451
|
+
for (class_uri, subclass_uri), instance_count in count_by_class_subclass_pair.items():
|
|
452
|
+
property_query = self._properties_query.format(type=class_uri, subtype=subclass_uri, neat_type=NEAT.type)
|
|
453
|
+
class_suffix = remove_namespace_from_uri(class_uri)
|
|
454
|
+
for result_row in self.graph.query(property_query):
|
|
455
|
+
property_uri, data_type_uri, object_type_uri = cast(tuple[URIRef, URIRef, URIRef], result_row)
|
|
456
|
+
if property_uri == RDF.type or property_uri == NEAT.type:
|
|
457
|
+
continue
|
|
458
|
+
property_str = remove_namespace_from_uri(property_uri)
|
|
459
|
+
if (class_suffix, property_str) in existing_class_properties:
|
|
460
|
+
continue
|
|
461
|
+
occurrence_query = self._max_occurrence_query.format(
|
|
462
|
+
type=class_uri, subtype=subclass_uri, property=property_uri, neat_type=NEAT.type
|
|
463
|
+
)
|
|
464
|
+
max_occurrence = 1 # default value
|
|
465
|
+
result_row, *_ = list(self.graph.query(occurrence_query))
|
|
466
|
+
if result_row:
|
|
467
|
+
max_occurrence_literal, *__ = cast(tuple[RdfLiteral, Any], result_row)
|
|
468
|
+
max_occurrence = int(max_occurrence_literal.toPython())
|
|
469
|
+
properties_by_class_by_subclass.append(
|
|
470
|
+
_ReadProperties(
|
|
471
|
+
class_uri=class_uri,
|
|
472
|
+
subclass_uri=subclass_uri,
|
|
473
|
+
property_uri=property_uri,
|
|
474
|
+
data_type=data_type_uri,
|
|
475
|
+
object_type=object_type_uri,
|
|
476
|
+
max_occurrence=max_occurrence,
|
|
477
|
+
instance_count=instance_count,
|
|
478
|
+
)
|
|
479
|
+
)
|
|
480
|
+
return properties_by_class_by_subclass
|
|
481
|
+
|
|
482
|
+
def _create_property(
|
|
483
|
+
self,
|
|
484
|
+
read_properties: list[_ReadProperties],
|
|
485
|
+
class_suffix: str,
|
|
486
|
+
class_uri: URIRef,
|
|
487
|
+
property_uri: URIRef,
|
|
488
|
+
prefixes: dict[str, Namespace],
|
|
489
|
+
) -> InformationInputProperty:
|
|
490
|
+
first = read_properties[0]
|
|
491
|
+
value_type = self._get_value_type(read_properties, prefixes)
|
|
492
|
+
property_name = remove_namespace_from_uri(property_uri)
|
|
493
|
+
self._add_uri_namespace_to_prefixes(property_uri, prefixes)
|
|
494
|
+
|
|
495
|
+
return InformationInputProperty(
|
|
496
|
+
class_=class_suffix,
|
|
497
|
+
property_=property_name,
|
|
498
|
+
max_count=first.max_occurrence,
|
|
499
|
+
value_type=value_type,
|
|
500
|
+
instance_source=(f"{uri_to_short_form(class_uri, prefixes)}({uri_to_short_form(property_uri, prefixes)})"),
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
def _get_value_type(
|
|
504
|
+
self, read_properties: list[_ReadProperties], prefixes: dict[str, Namespace]
|
|
505
|
+
) -> str | UnknownEntity:
|
|
506
|
+
value_types = {prop.data_type for prop in read_properties if prop.data_type} | {
|
|
507
|
+
prop.object_type for prop in read_properties if prop.object_type
|
|
508
|
+
}
|
|
509
|
+
if len(value_types) == 1:
|
|
510
|
+
uri_ref = value_types.pop()
|
|
511
|
+
self._add_uri_namespace_to_prefixes(uri_ref, prefixes)
|
|
512
|
+
return remove_namespace_from_uri(uri_ref)
|
|
513
|
+
elif len(value_types) == 0:
|
|
514
|
+
return UnknownEntity()
|
|
515
|
+
for uri_ref in value_types:
|
|
516
|
+
self._add_uri_namespace_to_prefixes(uri_ref, prefixes)
|
|
517
|
+
return " | ".join(remove_namespace_from_uri(uri_ref) for uri_ref in value_types)
|
|
@@ -21,6 +21,7 @@ from typing import (
|
|
|
21
21
|
)
|
|
22
22
|
|
|
23
23
|
import pandas as pd
|
|
24
|
+
from cognite.client import data_modeling as dm
|
|
24
25
|
from pydantic import (
|
|
25
26
|
BaseModel,
|
|
26
27
|
BeforeValidator,
|
|
@@ -180,6 +181,12 @@ class BaseMetadata(SchemaModel):
|
|
|
180
181
|
description="Date of the data model update",
|
|
181
182
|
)
|
|
182
183
|
|
|
184
|
+
source_id: URIRefType | None = Field(
|
|
185
|
+
None,
|
|
186
|
+
description="Id of source that produced this rules",
|
|
187
|
+
alias="sourceId",
|
|
188
|
+
)
|
|
189
|
+
|
|
183
190
|
@field_validator("*", mode="before")
|
|
184
191
|
def strip_string(cls, value: Any) -> Any:
|
|
185
192
|
if isinstance(value, str):
|
|
@@ -213,9 +220,6 @@ class BaseMetadata(SchemaModel):
|
|
|
213
220
|
def prefix(self) -> str:
|
|
214
221
|
return self.space
|
|
215
222
|
|
|
216
|
-
def as_identifier(self) -> str:
|
|
217
|
-
return f"{self.prefix}:{self.external_id}"
|
|
218
|
-
|
|
219
223
|
def get_prefix(self) -> str:
|
|
220
224
|
return self.prefix
|
|
221
225
|
|
|
@@ -234,6 +238,12 @@ class BaseMetadata(SchemaModel):
|
|
|
234
238
|
"""Namespace for the data model used for the entities in the data model."""
|
|
235
239
|
return Namespace(f"{self.identifier}/")
|
|
236
240
|
|
|
241
|
+
def as_data_model_id(self) -> dm.DataModelId:
|
|
242
|
+
return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
|
|
243
|
+
|
|
244
|
+
def as_identifier(self) -> str:
|
|
245
|
+
return repr(self.as_data_model_id())
|
|
246
|
+
|
|
237
247
|
|
|
238
248
|
class BaseRules(SchemaModel, ABC):
|
|
239
249
|
"""
|
|
@@ -64,9 +64,6 @@ class DMSMetadata(BaseMetadata):
|
|
|
64
64
|
space=self.space,
|
|
65
65
|
)
|
|
66
66
|
|
|
67
|
-
def as_data_model_id(self) -> dm.DataModelId:
|
|
68
|
-
return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
|
|
69
|
-
|
|
70
67
|
def as_data_model(self) -> dm.DataModelApply:
|
|
71
68
|
suffix = f"Creator: {', '.join(self.creator)}"
|
|
72
69
|
if self.description:
|
|
@@ -83,9 +80,6 @@ class DMSMetadata(BaseMetadata):
|
|
|
83
80
|
views=[],
|
|
84
81
|
)
|
|
85
82
|
|
|
86
|
-
def as_identifier(self) -> str:
|
|
87
|
-
return repr(self.as_data_model_id())
|
|
88
|
-
|
|
89
83
|
def get_prefix(self) -> str:
|
|
90
84
|
return self.space
|
|
91
85
|
|
|
@@ -109,8 +103,7 @@ class DMSProperty(SheetRow):
|
|
|
109
103
|
)
|
|
110
104
|
value_type: DataType | ViewEntity | DMSUnknownEntity = Field(
|
|
111
105
|
alias="Value Type",
|
|
112
|
-
description="Value type that the property can hold. "
|
|
113
|
-
"It takes either subset of CDF primitive types or a View id",
|
|
106
|
+
description="Value type that the property can hold. It takes either subset of CDF primitive types or a View id",
|
|
114
107
|
)
|
|
115
108
|
nullable: bool | None = Field(
|
|
116
109
|
default=None,
|
|
@@ -37,6 +37,7 @@ class DMSInputMetadata(InputComponent[DMSMetadata]):
|
|
|
37
37
|
created: datetime | str | None = None
|
|
38
38
|
updated: datetime | str | None = None
|
|
39
39
|
logical: str | URIRef | None = None
|
|
40
|
+
source_id: str | URIRef | None = None
|
|
40
41
|
|
|
41
42
|
@classmethod
|
|
42
43
|
def _get_verified_cls(cls) -> type[DMSMetadata]:
|
|
@@ -77,6 +78,9 @@ class DMSInputMetadata(InputComponent[DMSMetadata]):
|
|
|
77
78
|
description = None
|
|
78
79
|
return description, creator
|
|
79
80
|
|
|
81
|
+
def as_data_model_id(self) -> dm.DataModelId:
|
|
82
|
+
return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
|
|
83
|
+
|
|
80
84
|
@property
|
|
81
85
|
def identifier(self) -> URIRef:
|
|
82
86
|
"""Globally unique identifier for the data model.
|
|
@@ -3,6 +3,7 @@ from datetime import datetime
|
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
|
+
from cognite.client import data_modeling as dm
|
|
6
7
|
from rdflib import Namespace, URIRef
|
|
7
8
|
|
|
8
9
|
from cognite.neat._constants import DEFAULT_NAMESPACE
|
|
@@ -36,6 +37,7 @@ class InformationInputMetadata(InputComponent[InformationMetadata]):
|
|
|
36
37
|
updated: datetime | str | None = None
|
|
37
38
|
physical: str | URIRef | None = None
|
|
38
39
|
conceptual: str | URIRef | None = None
|
|
40
|
+
source_id: str | URIRef | None = None
|
|
39
41
|
|
|
40
42
|
@classmethod
|
|
41
43
|
def _get_verified_cls(cls) -> type[InformationMetadata]:
|
|
@@ -49,6 +51,9 @@ class InformationInputMetadata(InputComponent[InformationMetadata]):
|
|
|
49
51
|
output["updated"] = datetime.now()
|
|
50
52
|
return output
|
|
51
53
|
|
|
54
|
+
def as_data_model_id(self) -> dm.DataModelId:
|
|
55
|
+
return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
|
|
56
|
+
|
|
52
57
|
@property
|
|
53
58
|
def prefix(self) -> str:
|
|
54
59
|
return self.space
|
|
@@ -3,10 +3,13 @@ from ._converters import (
|
|
|
3
3
|
AddClassImplements,
|
|
4
4
|
ChangeViewPrefix,
|
|
5
5
|
ClassicPrepareCore,
|
|
6
|
+
ConversionTransformer,
|
|
6
7
|
ConvertToRules,
|
|
7
8
|
DMSToInformation,
|
|
8
9
|
IncludeReferenced,
|
|
9
10
|
InformationToDMS,
|
|
11
|
+
MergeDMSRules,
|
|
12
|
+
MergeInformationRules,
|
|
10
13
|
PrefixEntities,
|
|
11
14
|
ReduceCogniteModel,
|
|
12
15
|
SetIDDMSModel,
|
|
@@ -24,11 +27,14 @@ __all__ = [
|
|
|
24
27
|
"AsParentPropertyId",
|
|
25
28
|
"ChangeViewPrefix",
|
|
26
29
|
"ClassicPrepareCore",
|
|
30
|
+
"ConversionTransformer",
|
|
27
31
|
"ConvertToRules",
|
|
28
32
|
"DMSToInformation",
|
|
29
33
|
"IncludeReferenced",
|
|
30
34
|
"InformationToDMS",
|
|
31
35
|
"MapOneToOne",
|
|
36
|
+
"MergeDMSRules",
|
|
37
|
+
"MergeInformationRules",
|
|
32
38
|
"PrefixEntities",
|
|
33
39
|
"ReduceCogniteModel",
|
|
34
40
|
"RuleMapper",
|
|
@@ -16,6 +16,7 @@ from cognite.neat._client.data_classes.data_modeling import ContainerApplyDict,
|
|
|
16
16
|
from cognite.neat._constants import (
|
|
17
17
|
COGNITE_MODELS,
|
|
18
18
|
DMS_CONTAINER_PROPERTY_SIZE_LIMIT,
|
|
19
|
+
DMS_RESERVED_PROPERTIES,
|
|
19
20
|
get_default_prefixes_and_namespaces,
|
|
20
21
|
)
|
|
21
22
|
from cognite.neat._issues.errors import NeatValueError
|
|
@@ -41,9 +42,9 @@ from cognite.neat._rules.models import (
|
|
|
41
42
|
)
|
|
42
43
|
from cognite.neat._rules.models._rdfpath import Entity as RDFPathEntity
|
|
43
44
|
from cognite.neat._rules.models._rdfpath import RDFPath, SingleProperty
|
|
44
|
-
from cognite.neat._rules.models.data_types import AnyURI, DataType, String
|
|
45
|
+
from cognite.neat._rules.models.data_types import AnyURI, DataType, Enum, File, String, Timeseries
|
|
45
46
|
from cognite.neat._rules.models.dms import DMSMetadata, DMSProperty, DMSValidation, DMSView
|
|
46
|
-
from cognite.neat._rules.models.dms._rules import DMSContainer
|
|
47
|
+
from cognite.neat._rules.models.dms._rules import DMSContainer, DMSEnum, DMSNode
|
|
47
48
|
from cognite.neat._rules.models.entities import (
|
|
48
49
|
ClassEntity,
|
|
49
50
|
ContainerEntity,
|
|
@@ -252,11 +253,16 @@ class PrefixEntities(RulesTransformer[ReadRules[T_InputRules], ReadRules[T_Input
|
|
|
252
253
|
class InformationToDMS(ConversionTransformer[InformationRules, DMSRules]):
|
|
253
254
|
"""Converts InformationRules to DMSRules."""
|
|
254
255
|
|
|
255
|
-
def __init__(
|
|
256
|
+
def __init__(
|
|
257
|
+
self, ignore_undefined_value_types: bool = False, reserved_properties: Literal["error", "skip"] = "error"
|
|
258
|
+
):
|
|
256
259
|
self.ignore_undefined_value_types = ignore_undefined_value_types
|
|
260
|
+
self.reserved_properties = reserved_properties
|
|
257
261
|
|
|
258
262
|
def transform(self, rules: InformationRules) -> DMSRules:
|
|
259
|
-
return _InformationRulesConverter(rules).as_dms_rules(
|
|
263
|
+
return _InformationRulesConverter(rules).as_dms_rules(
|
|
264
|
+
self.ignore_undefined_value_types, self.reserved_properties
|
|
265
|
+
)
|
|
260
266
|
|
|
261
267
|
|
|
262
268
|
class DMSToInformation(ConversionTransformer[DMSRules, InformationRules]):
|
|
@@ -823,11 +829,21 @@ class ClassicPrepareCore(RulesTransformer[InformationRules, InformationRules]):
|
|
|
823
829
|
- ClassicTimeseries.isString from boolean to string
|
|
824
830
|
- Add class ClassicSourceSystem, and update all source properties from string to ClassicSourceSystem.
|
|
825
831
|
- Rename externalId properties to classicExternalId
|
|
826
|
-
- Renames the Relationship.
|
|
832
|
+
- Renames the Relationship.sourceExternalId and Relationship.targetExternalId to startNode and endNode
|
|
833
|
+
- If reference_timeseries is True, the classicExternalId property of the TimeSeries class will change type
|
|
834
|
+
from string to timeseries.
|
|
835
|
+
- If reference_files is True, the classicExternalId property of the File class will change type from string to file.
|
|
827
836
|
"""
|
|
828
837
|
|
|
829
|
-
def __init__(
|
|
838
|
+
def __init__(
|
|
839
|
+
self,
|
|
840
|
+
instance_namespace: Namespace,
|
|
841
|
+
reference_timeseries: bool = False,
|
|
842
|
+
reference_files: bool = False,
|
|
843
|
+
) -> None:
|
|
830
844
|
self.instance_namespace = instance_namespace
|
|
845
|
+
self.reference_timeseries = reference_timeseries
|
|
846
|
+
self.reference_files = reference_files
|
|
831
847
|
|
|
832
848
|
@property
|
|
833
849
|
def description(self) -> str:
|
|
@@ -851,6 +867,10 @@ class ClassicPrepareCore(RulesTransformer[InformationRules, InformationRules]):
|
|
|
851
867
|
prop.value_type = ClassEntity(prefix=prefix, suffix="ClassicSourceSystem")
|
|
852
868
|
elif prop.property_ == "externalId":
|
|
853
869
|
prop.property_ = "classicExternalId"
|
|
870
|
+
if self.reference_timeseries and prop.class_.suffix == "ClassicTimeSeries":
|
|
871
|
+
prop.value_type = Timeseries()
|
|
872
|
+
elif self.reference_files and prop.class_.suffix == "ClassicFile":
|
|
873
|
+
prop.value_type = File()
|
|
854
874
|
elif prop.property_ == "sourceExternalId" and prop.class_.suffix == "ClassicRelationship":
|
|
855
875
|
prop.property_ = "startNode"
|
|
856
876
|
elif prop.property_ == "targetExternalId" and prop.class_.suffix == "ClassicRelationship":
|
|
@@ -907,6 +927,68 @@ class ChangeViewPrefix(RulesTransformer[DMSRules, DMSRules]):
|
|
|
907
927
|
return output
|
|
908
928
|
|
|
909
929
|
|
|
930
|
+
class MergeDMSRules(RulesTransformer[DMSRules, DMSRules]):
|
|
931
|
+
def __init__(self, extra: DMSRules) -> None:
|
|
932
|
+
self.extra = extra
|
|
933
|
+
|
|
934
|
+
def transform(self, rules: DMSRules) -> DMSRules:
|
|
935
|
+
output = rules.model_copy(deep=True)
|
|
936
|
+
existing_views = {view.view for view in output.views}
|
|
937
|
+
for view in self.extra.views:
|
|
938
|
+
if view.view not in existing_views:
|
|
939
|
+
output.views.append(view)
|
|
940
|
+
existing_properties = {(prop.view, prop.view_property) for prop in output.properties}
|
|
941
|
+
existing_containers = {container.container for container in output.containers or []}
|
|
942
|
+
existing_enum_collections = {collection.collection for collection in output.enum or []}
|
|
943
|
+
new_containers_by_entity = {container.container: container for container in self.extra.containers or []}
|
|
944
|
+
new_enum_collections_by_entity = {collection.collection: collection for collection in self.extra.enum or []}
|
|
945
|
+
for prop in self.extra.properties:
|
|
946
|
+
if (prop.view, prop.view_property) in existing_properties:
|
|
947
|
+
continue
|
|
948
|
+
output.properties.append(prop)
|
|
949
|
+
if prop.container and prop.container not in existing_containers:
|
|
950
|
+
if output.containers is None:
|
|
951
|
+
output.containers = SheetList[DMSContainer]()
|
|
952
|
+
output.containers.append(new_containers_by_entity[prop.container])
|
|
953
|
+
if isinstance(prop.value_type, Enum) and prop.value_type.collection not in existing_enum_collections:
|
|
954
|
+
if output.enum is None:
|
|
955
|
+
output.enum = SheetList[DMSEnum]()
|
|
956
|
+
output.enum.append(new_enum_collections_by_entity[prop.value_type.collection])
|
|
957
|
+
|
|
958
|
+
existing_nodes = {node.node for node in output.nodes or []}
|
|
959
|
+
for node in self.extra.nodes or []:
|
|
960
|
+
if node.node not in existing_nodes:
|
|
961
|
+
if output.nodes is None:
|
|
962
|
+
output.nodes = SheetList[DMSNode]()
|
|
963
|
+
output.nodes.append(node)
|
|
964
|
+
|
|
965
|
+
return output
|
|
966
|
+
|
|
967
|
+
@property
|
|
968
|
+
def description(self) -> str:
|
|
969
|
+
return f"Merged with {self.extra.metadata.as_data_model_id()}"
|
|
970
|
+
|
|
971
|
+
|
|
972
|
+
class MergeInformationRules(RulesTransformer[InformationRules, InformationRules]):
|
|
973
|
+
def __init__(self, extra: InformationRules) -> None:
|
|
974
|
+
self.extra = extra
|
|
975
|
+
|
|
976
|
+
def transform(self, rules: InformationRules) -> InformationRules:
|
|
977
|
+
output = rules.model_copy(deep=True)
|
|
978
|
+
existing_classes = {cls.class_ for cls in output.classes}
|
|
979
|
+
for cls in self.extra.classes:
|
|
980
|
+
if cls.class_ not in existing_classes:
|
|
981
|
+
output.classes.append(cls)
|
|
982
|
+
existing_properties = {(prop.class_, prop.property_) for prop in output.properties}
|
|
983
|
+
for prop in self.extra.properties:
|
|
984
|
+
if (prop.class_, prop.property_) not in existing_properties:
|
|
985
|
+
output.properties.append(prop)
|
|
986
|
+
for prefix, namespace in self.extra.prefixes.items():
|
|
987
|
+
if prefix not in output.prefixes:
|
|
988
|
+
output.prefixes[prefix] = namespace
|
|
989
|
+
return output
|
|
990
|
+
|
|
991
|
+
|
|
910
992
|
class _InformationRulesConverter:
|
|
911
993
|
_start_or_end_node: ClassVar[frozenset[str]] = frozenset({"endNode", "end_node", "startNode", "start_node"})
|
|
912
994
|
|
|
@@ -914,7 +996,9 @@ class _InformationRulesConverter:
|
|
|
914
996
|
self.rules = information
|
|
915
997
|
self.property_count_by_container: dict[ContainerEntity, int] = defaultdict(int)
|
|
916
998
|
|
|
917
|
-
def as_dms_rules(
|
|
999
|
+
def as_dms_rules(
|
|
1000
|
+
self, ignore_undefined_value_types: bool = False, reserved_properties: Literal["error", "skip"] = "error"
|
|
1001
|
+
) -> "DMSRules":
|
|
918
1002
|
from cognite.neat._rules.models.dms._rules import (
|
|
919
1003
|
DMSContainer,
|
|
920
1004
|
DMSProperty,
|
|
@@ -957,6 +1041,13 @@ class _InformationRulesConverter:
|
|
|
957
1041
|
continue
|
|
958
1042
|
if prop.class_ in edge_classes and prop.property_ in self._start_or_end_node:
|
|
959
1043
|
continue
|
|
1044
|
+
if prop.property_ in DMS_RESERVED_PROPERTIES:
|
|
1045
|
+
msg = f"Property {prop.property_} is a reserved property in DMS."
|
|
1046
|
+
if reserved_properties == "error":
|
|
1047
|
+
raise NeatValueError(msg)
|
|
1048
|
+
warnings.warn(NeatValueWarning(f"{msg} Skipping..."), stacklevel=2)
|
|
1049
|
+
continue
|
|
1050
|
+
|
|
960
1051
|
dms_property = self._as_dms_property(
|
|
961
1052
|
prop,
|
|
962
1053
|
default_space,
|