cognite-neat 0.106.0__py3-none-any.whl → 0.108.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_constants.py +35 -1
- cognite/neat/_graph/_shared.py +4 -0
- cognite/neat/_graph/extractors/__init__.py +5 -1
- cognite/neat/_graph/extractors/_base.py +32 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +128 -14
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +156 -12
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +50 -12
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +26 -1
- cognite/neat/_graph/extractors/_dms.py +196 -47
- cognite/neat/_graph/extractors/_dms_graph.py +199 -0
- cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
- cognite/neat/_graph/extractors/_rdf_file.py +33 -5
- cognite/neat/_graph/loaders/__init__.py +1 -3
- cognite/neat/_graph/loaders/_rdf2dms.py +123 -19
- cognite/neat/_graph/queries/_base.py +140 -84
- cognite/neat/_graph/queries/_construct.py +2 -2
- cognite/neat/_graph/transformers/__init__.py +8 -1
- cognite/neat/_graph/transformers/_base.py +9 -1
- cognite/neat/_graph/transformers/_classic_cdf.py +90 -3
- cognite/neat/_graph/transformers/_rdfpath.py +3 -3
- cognite/neat/_graph/transformers/_value_type.py +106 -45
- cognite/neat/_issues/errors/_resources.py +1 -1
- cognite/neat/_issues/warnings/__init__.py +0 -2
- cognite/neat/_issues/warnings/_models.py +1 -1
- cognite/neat/_issues/warnings/_properties.py +0 -8
- cognite/neat/_rules/analysis/_base.py +1 -1
- cognite/neat/_rules/analysis/_information.py +14 -13
- cognite/neat/_rules/catalog/__init__.py +1 -0
- cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
- cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
- cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
- cognite/neat/_rules/importers/__init__.py +3 -1
- cognite/neat/_rules/importers/_dms2rules.py +7 -5
- cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
- cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
- cognite/neat/_rules/importers/_rdf/_base.py +2 -2
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +242 -19
- cognite/neat/_rules/models/_base_rules.py +13 -15
- cognite/neat/_rules/models/_types.py +5 -0
- cognite/neat/_rules/models/dms/_rules.py +51 -10
- cognite/neat/_rules/models/dms/_rules_input.py +4 -0
- cognite/neat/_rules/models/information/_rules.py +48 -5
- cognite/neat/_rules/models/information/_rules_input.py +6 -1
- cognite/neat/_rules/models/mapping/_classic2core.py +4 -5
- cognite/neat/_rules/transformers/__init__.py +10 -0
- cognite/neat/_rules/transformers/_converters.py +300 -62
- cognite/neat/_session/_base.py +57 -10
- cognite/neat/_session/_drop.py +5 -1
- cognite/neat/_session/_inspect.py +3 -2
- cognite/neat/_session/_mapping.py +17 -6
- cognite/neat/_session/_prepare.py +0 -47
- cognite/neat/_session/_read.py +115 -10
- cognite/neat/_session/_set.py +27 -0
- cognite/neat/_session/_show.py +4 -4
- cognite/neat/_session/_state.py +12 -1
- cognite/neat/_session/_to.py +43 -2
- cognite/neat/_session/_wizard.py +1 -1
- cognite/neat/_session/exceptions.py +8 -3
- cognite/neat/_store/_graph_store.py +331 -136
- cognite/neat/_store/_rules_store.py +130 -1
- cognite/neat/_utils/auth.py +3 -1
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/METADATA +2 -2
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/RECORD +67 -65
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/WHEEL +1 -1
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,18 +1,25 @@
|
|
|
1
|
+
import itertools
|
|
1
2
|
from collections import Counter, defaultdict
|
|
2
|
-
from collections.abc import Mapping
|
|
3
|
+
from collections.abc import Iterable, Mapping
|
|
4
|
+
from dataclasses import dataclass
|
|
3
5
|
from datetime import datetime, timezone
|
|
4
6
|
from pathlib import Path
|
|
5
|
-
from typing import ClassVar, cast
|
|
7
|
+
from typing import Any, ClassVar, cast
|
|
6
8
|
|
|
7
9
|
from cognite.client import data_modeling as dm
|
|
8
|
-
from rdflib import RDF, Namespace, URIRef
|
|
10
|
+
from rdflib import RDF, Graph, Namespace, URIRef
|
|
9
11
|
from rdflib import Literal as RdfLiteral
|
|
10
12
|
|
|
11
|
-
from cognite.neat.
|
|
12
|
-
from cognite.neat.
|
|
13
|
+
from cognite.neat._constants import NEAT
|
|
14
|
+
from cognite.neat._issues import IssueList
|
|
15
|
+
from cognite.neat._issues.warnings import PropertyValueTypeUndefinedWarning
|
|
16
|
+
from cognite.neat._rules.analysis import InformationAnalysis
|
|
17
|
+
from cognite.neat._rules.models import InformationRules, data_types
|
|
13
18
|
from cognite.neat._rules.models.data_types import AnyURI
|
|
14
19
|
from cognite.neat._rules.models.entities._single_value import UnknownEntity
|
|
15
20
|
from cognite.neat._rules.models.information import (
|
|
21
|
+
InformationInputClass,
|
|
22
|
+
InformationInputProperty,
|
|
16
23
|
InformationMetadata,
|
|
17
24
|
)
|
|
18
25
|
from cognite.neat._store import NeatGraphStore
|
|
@@ -157,7 +164,7 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
157
164
|
for class_uri, no_instances in self.graph.query(ORDERED_CLASSES_QUERY): # type: ignore[misc]
|
|
158
165
|
if (class_id := remove_namespace_from_uri(cast(URIRef, class_uri))) in classes:
|
|
159
166
|
# handles cases when class id is already present in classes
|
|
160
|
-
class_id = f"{class_id}_{len(classes)+1}"
|
|
167
|
+
class_id = f"{class_id}_{len(classes) + 1}"
|
|
161
168
|
|
|
162
169
|
classes[class_id] = {
|
|
163
170
|
"class_": class_id,
|
|
@@ -190,18 +197,6 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
190
197
|
if property_uri == RDF.type:
|
|
191
198
|
continue
|
|
192
199
|
property_id = remove_namespace_from_uri(property_uri)
|
|
193
|
-
if property_id in {"external_id", "externalId"}:
|
|
194
|
-
skip_issue = PropertySkippedWarning(
|
|
195
|
-
resource_type="Property",
|
|
196
|
-
identifier=f"{class_id}:{property_id}",
|
|
197
|
-
property_name=property_id,
|
|
198
|
-
reason="External ID is assumed to be the unique identifier of the instance "
|
|
199
|
-
"and is not part of the data model schema.",
|
|
200
|
-
)
|
|
201
|
-
if skip_issue not in self.issue_list:
|
|
202
|
-
self.issue_list.append(skip_issue)
|
|
203
|
-
continue
|
|
204
|
-
|
|
205
200
|
self._add_uri_namespace_to_prefixes(cast(URIRef, property_uri), prefixes)
|
|
206
201
|
|
|
207
202
|
if isinstance(data_type_uri, URIRef):
|
|
@@ -233,7 +228,7 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
233
228
|
"property_": property_id,
|
|
234
229
|
"max_count": cast(RdfLiteral, occurrence).value,
|
|
235
230
|
"value_type": value_type_id,
|
|
236
|
-
"
|
|
231
|
+
"instance_source": (
|
|
237
232
|
f"{uri_to_short_form(class_definition['uri'], prefixes)}"
|
|
238
233
|
f"({uri_to_short_form(cast(URIRef, property_uri), prefixes)})"
|
|
239
234
|
),
|
|
@@ -292,3 +287,231 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
292
287
|
@property
|
|
293
288
|
def source_uri(self) -> URIRef:
|
|
294
289
|
return INSTANCES_ENTITY.id_
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
# Internal helper class
|
|
293
|
+
@dataclass
|
|
294
|
+
class _ReadProperties:
|
|
295
|
+
class_uri: URIRef
|
|
296
|
+
subclass_uri: URIRef
|
|
297
|
+
property_uri: URIRef
|
|
298
|
+
data_type: URIRef | None
|
|
299
|
+
object_type: URIRef | None
|
|
300
|
+
max_occurrence: int
|
|
301
|
+
instance_count: int
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
class SubclassInferenceImporter(BaseRDFImporter):
|
|
305
|
+
"""Infer subclasses from a triple store.
|
|
306
|
+
|
|
307
|
+
Assumes that the graph already is connected to a schema. The classes should
|
|
308
|
+
match the RDF.type of the instances in the graph, while the subclasses should
|
|
309
|
+
match the NEAT.type of the instances in the graph.
|
|
310
|
+
|
|
311
|
+
ClassVars:
|
|
312
|
+
overwrite_data_types: Mapping of data types to be overwritten. The InferenceImporter will overwrite
|
|
313
|
+
32-bit integer and 32-bit float data types to 64-bit integer and 64-bit float data types
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
issue_list: Issue list to store issues
|
|
317
|
+
graph: Knowledge graph
|
|
318
|
+
max_number_of_instance: Maximum number of instances to be used in inference
|
|
319
|
+
"""
|
|
320
|
+
|
|
321
|
+
overwrite_data_types: ClassVar[Mapping[URIRef, URIRef]] = {
|
|
322
|
+
data_types.Integer.as_xml_uri_ref(): data_types.Long.as_xml_uri_ref(),
|
|
323
|
+
data_types.Float.as_xml_uri_ref(): data_types.Double.as_xml_uri_ref(),
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
def __init__(
|
|
327
|
+
self,
|
|
328
|
+
issue_list: IssueList,
|
|
329
|
+
graph: Graph,
|
|
330
|
+
rules: InformationRules,
|
|
331
|
+
max_number_of_instance: int,
|
|
332
|
+
non_existing_node_type: UnknownEntity | AnyURI = DEFAULT_NON_EXISTING_NODE_TYPE,
|
|
333
|
+
) -> None:
|
|
334
|
+
super().__init__(
|
|
335
|
+
issue_list,
|
|
336
|
+
graph,
|
|
337
|
+
rules.metadata.as_data_model_id().as_tuple(), # type: ignore[arg-type]
|
|
338
|
+
max_number_of_instance,
|
|
339
|
+
non_existing_node_type,
|
|
340
|
+
language="en",
|
|
341
|
+
)
|
|
342
|
+
self._rules = rules
|
|
343
|
+
|
|
344
|
+
_ordered_subclass_query = f"""SELECT DISTINCT ?class ?subclass (count(?s) as ?instances )
|
|
345
|
+
WHERE {{ ?s a ?class . ?s <{NEAT.type}> ?subclass }}
|
|
346
|
+
group by ?class ?subclass order by DESC(?instances)"""
|
|
347
|
+
|
|
348
|
+
_properties_query = """SELECT DISTINCT ?property ?dataType ?objectType
|
|
349
|
+
WHERE {{
|
|
350
|
+
?s a <{type}> .
|
|
351
|
+
?s <{neat_type}> <{subtype}> .
|
|
352
|
+
?s ?property ?value .
|
|
353
|
+
BIND(datatype(?value) AS ?dataType) .
|
|
354
|
+
OPTIONAL {{?value rdf:type ?objectType}}
|
|
355
|
+
}}"""
|
|
356
|
+
|
|
357
|
+
_max_occurrence_query = """SELECT (MAX(?count) AS ?maxCount)
|
|
358
|
+
WHERE {{
|
|
359
|
+
{{
|
|
360
|
+
SELECT ?subject (COUNT(?object) AS ?count)
|
|
361
|
+
WHERE {{
|
|
362
|
+
?subject a <{type}> .
|
|
363
|
+
?subject <{neat_type}> <{subtype}> .
|
|
364
|
+
?subject <{property}> ?object .
|
|
365
|
+
}}
|
|
366
|
+
GROUP BY ?subject
|
|
367
|
+
}}
|
|
368
|
+
}}"""
|
|
369
|
+
|
|
370
|
+
def _to_rules_components(
|
|
371
|
+
self,
|
|
372
|
+
) -> dict:
|
|
373
|
+
properties_by_class_subclass_pair = self._read_class_properties_from_graph()
|
|
374
|
+
existing_classes = {class_.class_.suffix: class_ for class_ in self._rules.classes}
|
|
375
|
+
prefixes = self._rules.prefixes.copy()
|
|
376
|
+
|
|
377
|
+
classes: list[InformationInputClass] = []
|
|
378
|
+
properties: list[InformationInputProperty] = []
|
|
379
|
+
# Help for IDE
|
|
380
|
+
subclass_uri: URIRef
|
|
381
|
+
for class_uri, class_properties_iterable in itertools.groupby(
|
|
382
|
+
properties_by_class_subclass_pair, key=lambda x: x.class_uri
|
|
383
|
+
):
|
|
384
|
+
properties_by_subclass_by_property = self._get_properties_by_subclass_by_property(class_properties_iterable)
|
|
385
|
+
|
|
386
|
+
shared_property_uris = set.intersection(
|
|
387
|
+
*[
|
|
388
|
+
set(properties_by_property.keys())
|
|
389
|
+
for properties_by_property in properties_by_subclass_by_property.values()
|
|
390
|
+
]
|
|
391
|
+
)
|
|
392
|
+
class_suffix = remove_namespace_from_uri(class_uri)
|
|
393
|
+
self._add_uri_namespace_to_prefixes(class_uri, prefixes)
|
|
394
|
+
if class_suffix not in existing_classes:
|
|
395
|
+
classes.append(InformationInputClass(class_=class_suffix))
|
|
396
|
+
else:
|
|
397
|
+
classes.append(InformationInputClass.load(existing_classes[class_suffix].model_dump()))
|
|
398
|
+
shared_properties: dict[URIRef, list[_ReadProperties]] = defaultdict(list)
|
|
399
|
+
for subclass_uri, properties_by_property_uri in properties_by_subclass_by_property.items():
|
|
400
|
+
subclass_suffix = remove_namespace_from_uri(subclass_uri)
|
|
401
|
+
self._add_uri_namespace_to_prefixes(subclass_uri, prefixes)
|
|
402
|
+
if subclass_suffix not in existing_classes:
|
|
403
|
+
classes.append(InformationInputClass(class_=subclass_suffix, implements=class_suffix))
|
|
404
|
+
else:
|
|
405
|
+
classes.append(InformationInputClass.load(existing_classes[subclass_suffix].model_dump()))
|
|
406
|
+
for property_uri, read_properties in properties_by_property_uri.items():
|
|
407
|
+
if property_uri in shared_property_uris:
|
|
408
|
+
shared_properties[property_uri].extend(read_properties)
|
|
409
|
+
continue
|
|
410
|
+
properties.append(
|
|
411
|
+
self._create_property(read_properties, subclass_suffix, class_uri, property_uri, prefixes)
|
|
412
|
+
)
|
|
413
|
+
for property_uri, read_properties in shared_properties.items():
|
|
414
|
+
properties.append(
|
|
415
|
+
self._create_property(read_properties, class_suffix, class_uri, property_uri, prefixes)
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
return {
|
|
419
|
+
"metadata": self._rules.metadata.model_dump(),
|
|
420
|
+
"classes": [cls.dump(self._rules.metadata.prefix) for cls in classes],
|
|
421
|
+
"properties": [prop.dump(self._rules.metadata.prefix) for prop in properties],
|
|
422
|
+
"prefixes": self._rules.prefixes,
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
@staticmethod
|
|
426
|
+
def _get_properties_by_subclass_by_property(
|
|
427
|
+
class_properties_iterable: Iterable[_ReadProperties],
|
|
428
|
+
) -> dict[URIRef, dict[URIRef, list[_ReadProperties]]]:
|
|
429
|
+
properties_by_subclass_by_property: dict[URIRef, dict[URIRef, list[_ReadProperties]]] = {}
|
|
430
|
+
for subclass_uri, subclass_properties_iterable in itertools.groupby(
|
|
431
|
+
class_properties_iterable, key=lambda x: x.subclass_uri
|
|
432
|
+
):
|
|
433
|
+
properties_by_subclass_by_property[subclass_uri] = defaultdict(list)
|
|
434
|
+
for read_prop in subclass_properties_iterable:
|
|
435
|
+
properties_by_subclass_by_property[subclass_uri][read_prop.property_uri].append(read_prop)
|
|
436
|
+
return properties_by_subclass_by_property
|
|
437
|
+
|
|
438
|
+
def _read_class_properties_from_graph(self) -> list[_ReadProperties]:
|
|
439
|
+
count_by_class_subclass_pair: dict[tuple[URIRef, URIRef], int] = {}
|
|
440
|
+
# Infers all the classes w in the graph
|
|
441
|
+
for result_row in self.graph.query(self._ordered_subclass_query):
|
|
442
|
+
class_uri, subclass_uri, instance_count_literal = cast(tuple[URIRef, URIRef, RdfLiteral], result_row)
|
|
443
|
+
count_by_class_subclass_pair[(class_uri, subclass_uri)] = instance_count_literal.toPython()
|
|
444
|
+
analysis = InformationAnalysis(self._rules)
|
|
445
|
+
existing_class_properties = {
|
|
446
|
+
(class_entity.suffix, property)
|
|
447
|
+
for class_entity, properties in analysis.class_property_pairs(consider_inheritance=True).items()
|
|
448
|
+
for property in properties.keys()
|
|
449
|
+
}
|
|
450
|
+
properties_by_class_by_subclass: list[_ReadProperties] = []
|
|
451
|
+
for (class_uri, subclass_uri), instance_count in count_by_class_subclass_pair.items():
|
|
452
|
+
property_query = self._properties_query.format(type=class_uri, subtype=subclass_uri, neat_type=NEAT.type)
|
|
453
|
+
class_suffix = remove_namespace_from_uri(class_uri)
|
|
454
|
+
for result_row in self.graph.query(property_query):
|
|
455
|
+
property_uri, data_type_uri, object_type_uri = cast(tuple[URIRef, URIRef, URIRef], result_row)
|
|
456
|
+
if property_uri == RDF.type or property_uri == NEAT.type:
|
|
457
|
+
continue
|
|
458
|
+
property_str = remove_namespace_from_uri(property_uri)
|
|
459
|
+
if (class_suffix, property_str) in existing_class_properties:
|
|
460
|
+
continue
|
|
461
|
+
occurrence_query = self._max_occurrence_query.format(
|
|
462
|
+
type=class_uri, subtype=subclass_uri, property=property_uri, neat_type=NEAT.type
|
|
463
|
+
)
|
|
464
|
+
max_occurrence = 1 # default value
|
|
465
|
+
result_row, *_ = list(self.graph.query(occurrence_query))
|
|
466
|
+
if result_row:
|
|
467
|
+
max_occurrence_literal, *__ = cast(tuple[RdfLiteral, Any], result_row)
|
|
468
|
+
max_occurrence = int(max_occurrence_literal.toPython())
|
|
469
|
+
properties_by_class_by_subclass.append(
|
|
470
|
+
_ReadProperties(
|
|
471
|
+
class_uri=class_uri,
|
|
472
|
+
subclass_uri=subclass_uri,
|
|
473
|
+
property_uri=property_uri,
|
|
474
|
+
data_type=data_type_uri,
|
|
475
|
+
object_type=object_type_uri,
|
|
476
|
+
max_occurrence=max_occurrence,
|
|
477
|
+
instance_count=instance_count,
|
|
478
|
+
)
|
|
479
|
+
)
|
|
480
|
+
return properties_by_class_by_subclass
|
|
481
|
+
|
|
482
|
+
def _create_property(
|
|
483
|
+
self,
|
|
484
|
+
read_properties: list[_ReadProperties],
|
|
485
|
+
class_suffix: str,
|
|
486
|
+
class_uri: URIRef,
|
|
487
|
+
property_uri: URIRef,
|
|
488
|
+
prefixes: dict[str, Namespace],
|
|
489
|
+
) -> InformationInputProperty:
|
|
490
|
+
first = read_properties[0]
|
|
491
|
+
value_type = self._get_value_type(read_properties, prefixes)
|
|
492
|
+
property_name = remove_namespace_from_uri(property_uri)
|
|
493
|
+
self._add_uri_namespace_to_prefixes(property_uri, prefixes)
|
|
494
|
+
|
|
495
|
+
return InformationInputProperty(
|
|
496
|
+
class_=class_suffix,
|
|
497
|
+
property_=property_name,
|
|
498
|
+
max_count=first.max_occurrence,
|
|
499
|
+
value_type=value_type,
|
|
500
|
+
instance_source=(f"{uri_to_short_form(class_uri, prefixes)}({uri_to_short_form(property_uri, prefixes)})"),
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
def _get_value_type(
|
|
504
|
+
self, read_properties: list[_ReadProperties], prefixes: dict[str, Namespace]
|
|
505
|
+
) -> str | UnknownEntity:
|
|
506
|
+
value_types = {prop.data_type for prop in read_properties if prop.data_type} | {
|
|
507
|
+
prop.object_type for prop in read_properties if prop.object_type
|
|
508
|
+
}
|
|
509
|
+
if len(value_types) == 1:
|
|
510
|
+
uri_ref = value_types.pop()
|
|
511
|
+
self._add_uri_namespace_to_prefixes(uri_ref, prefixes)
|
|
512
|
+
return remove_namespace_from_uri(uri_ref)
|
|
513
|
+
elif len(value_types) == 0:
|
|
514
|
+
return UnknownEntity()
|
|
515
|
+
for uri_ref in value_types:
|
|
516
|
+
self._add_uri_namespace_to_prefixes(uri_ref, prefixes)
|
|
517
|
+
return " | ".join(remove_namespace_from_uri(uri_ref) for uri_ref in value_types)
|
|
@@ -5,7 +5,6 @@ its sub-models and validators.
|
|
|
5
5
|
import math
|
|
6
6
|
import sys
|
|
7
7
|
import types
|
|
8
|
-
import uuid
|
|
9
8
|
from abc import ABC, abstractmethod
|
|
10
9
|
from collections.abc import Callable, Hashable, Iterator, MutableSequence, Sequence
|
|
11
10
|
from datetime import datetime
|
|
@@ -22,6 +21,7 @@ from typing import (
|
|
|
22
21
|
)
|
|
23
22
|
|
|
24
23
|
import pandas as pd
|
|
24
|
+
from cognite.client import data_modeling as dm
|
|
25
25
|
from pydantic import (
|
|
26
26
|
BaseModel,
|
|
27
27
|
BeforeValidator,
|
|
@@ -31,7 +31,6 @@ from pydantic import (
|
|
|
31
31
|
PlainSerializer,
|
|
32
32
|
field_validator,
|
|
33
33
|
model_serializer,
|
|
34
|
-
model_validator,
|
|
35
34
|
)
|
|
36
35
|
from pydantic.main import IncEx
|
|
37
36
|
from pydantic_core import core_schema
|
|
@@ -182,6 +181,12 @@ class BaseMetadata(SchemaModel):
|
|
|
182
181
|
description="Date of the data model update",
|
|
183
182
|
)
|
|
184
183
|
|
|
184
|
+
source_id: URIRefType | None = Field(
|
|
185
|
+
None,
|
|
186
|
+
description="Id of source that produced this rules",
|
|
187
|
+
alias="sourceId",
|
|
188
|
+
)
|
|
189
|
+
|
|
185
190
|
@field_validator("*", mode="before")
|
|
186
191
|
def strip_string(cls, value: Any) -> Any:
|
|
187
192
|
if isinstance(value, str):
|
|
@@ -215,9 +220,6 @@ class BaseMetadata(SchemaModel):
|
|
|
215
220
|
def prefix(self) -> str:
|
|
216
221
|
return self.space
|
|
217
222
|
|
|
218
|
-
def as_identifier(self) -> str:
|
|
219
|
-
return f"{self.prefix}:{self.external_id}"
|
|
220
|
-
|
|
221
223
|
def get_prefix(self) -> str:
|
|
222
224
|
return self.prefix
|
|
223
225
|
|
|
@@ -236,6 +238,12 @@ class BaseMetadata(SchemaModel):
|
|
|
236
238
|
"""Namespace for the data model used for the entities in the data model."""
|
|
237
239
|
return Namespace(f"{self.identifier}/")
|
|
238
240
|
|
|
241
|
+
def as_data_model_id(self) -> dm.DataModelId:
|
|
242
|
+
return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
|
|
243
|
+
|
|
244
|
+
def as_identifier(self) -> str:
|
|
245
|
+
return repr(self.as_data_model_id())
|
|
246
|
+
|
|
239
247
|
|
|
240
248
|
class BaseRules(SchemaModel, ABC):
|
|
241
249
|
"""
|
|
@@ -343,10 +351,6 @@ class BaseRules(SchemaModel, ABC):
|
|
|
343
351
|
return output
|
|
344
352
|
|
|
345
353
|
|
|
346
|
-
def make_neat_id() -> URIRef:
|
|
347
|
-
return DEFAULT_NAMESPACE[f"neatId_{str(uuid.uuid4()).replace('-', '_')}"]
|
|
348
|
-
|
|
349
|
-
|
|
350
354
|
class SheetRow(SchemaModel):
|
|
351
355
|
neatId: URIRefType | None = Field(
|
|
352
356
|
alias="Neat ID",
|
|
@@ -354,12 +358,6 @@ class SheetRow(SchemaModel):
|
|
|
354
358
|
default=None,
|
|
355
359
|
)
|
|
356
360
|
|
|
357
|
-
@model_validator(mode="after")
|
|
358
|
-
def set_neat_id(self) -> "SheetRow":
|
|
359
|
-
if self.neatId is None:
|
|
360
|
-
self.neatId = DEFAULT_NAMESPACE[f"neatId_{str(uuid.uuid4()).replace('-', '_')}"]
|
|
361
|
-
return self
|
|
362
|
-
|
|
363
361
|
@abstractmethod
|
|
364
362
|
def _identifier(self) -> tuple[Hashable, ...]:
|
|
365
363
|
raise NotImplementedError()
|
|
@@ -76,6 +76,11 @@ NamespaceType = Annotated[
|
|
|
76
76
|
URIRefType = Annotated[
|
|
77
77
|
rdflib.URIRef,
|
|
78
78
|
BeforeValidator(lambda value: rdflib.URIRef(value)),
|
|
79
|
+
PlainSerializer(
|
|
80
|
+
lambda value: str(value),
|
|
81
|
+
return_type=str,
|
|
82
|
+
when_used="unless-none",
|
|
83
|
+
),
|
|
79
84
|
]
|
|
80
85
|
|
|
81
86
|
PrefixType = Annotated[
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import warnings
|
|
2
2
|
from collections.abc import Hashable
|
|
3
|
-
from typing import Any, ClassVar, Literal
|
|
3
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Literal
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
from cognite.client import data_modeling as dm
|
|
7
|
-
from pydantic import Field, field_serializer, field_validator
|
|
7
|
+
from pydantic import Field, field_serializer, field_validator, model_validator
|
|
8
8
|
from pydantic_core.core_schema import SerializationInfo, ValidationInfo
|
|
9
9
|
|
|
10
10
|
from cognite.neat._client.data_classes.schema import DMSSchema
|
|
@@ -48,6 +48,9 @@ from cognite.neat._rules.models.entities import (
|
|
|
48
48
|
ViewEntityList,
|
|
49
49
|
)
|
|
50
50
|
|
|
51
|
+
if TYPE_CHECKING:
|
|
52
|
+
from cognite.neat._rules.models import InformationRules
|
|
53
|
+
|
|
51
54
|
_DEFAULT_VERSION = "1"
|
|
52
55
|
|
|
53
56
|
|
|
@@ -61,9 +64,6 @@ class DMSMetadata(BaseMetadata):
|
|
|
61
64
|
space=self.space,
|
|
62
65
|
)
|
|
63
66
|
|
|
64
|
-
def as_data_model_id(self) -> dm.DataModelId:
|
|
65
|
-
return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
|
|
66
|
-
|
|
67
67
|
def as_data_model(self) -> dm.DataModelApply:
|
|
68
68
|
suffix = f"Creator: {', '.join(self.creator)}"
|
|
69
69
|
if self.description:
|
|
@@ -80,9 +80,6 @@ class DMSMetadata(BaseMetadata):
|
|
|
80
80
|
views=[],
|
|
81
81
|
)
|
|
82
82
|
|
|
83
|
-
def as_identifier(self) -> str:
|
|
84
|
-
return repr(self.as_data_model_id())
|
|
85
|
-
|
|
86
83
|
def get_prefix(self) -> str:
|
|
87
84
|
return self.space
|
|
88
85
|
|
|
@@ -106,8 +103,7 @@ class DMSProperty(SheetRow):
|
|
|
106
103
|
)
|
|
107
104
|
value_type: DataType | ViewEntity | DMSUnknownEntity = Field(
|
|
108
105
|
alias="Value Type",
|
|
109
|
-
description="Value type that the property can hold. "
|
|
110
|
-
"It takes either subset of CDF primitive types or a View id",
|
|
106
|
+
description="Value type that the property can hold. It takes either subset of CDF primitive types or a View id",
|
|
111
107
|
)
|
|
112
108
|
nullable: bool | None = Field(
|
|
113
109
|
default=None,
|
|
@@ -442,6 +438,51 @@ class DMSRules(BaseRules):
|
|
|
442
438
|
)
|
|
443
439
|
return value
|
|
444
440
|
|
|
441
|
+
@model_validator(mode="after")
|
|
442
|
+
def set_neat_id(self) -> "DMSRules":
|
|
443
|
+
namespace = self.metadata.namespace
|
|
444
|
+
|
|
445
|
+
for view in self.views:
|
|
446
|
+
if not view.neatId:
|
|
447
|
+
view.neatId = namespace[view.view.suffix]
|
|
448
|
+
|
|
449
|
+
for property_ in self.properties:
|
|
450
|
+
if not property_.neatId:
|
|
451
|
+
property_.neatId = namespace[f"{property_.view.suffix}/{property_.view_property}"]
|
|
452
|
+
|
|
453
|
+
return self
|
|
454
|
+
|
|
455
|
+
def update_neat_id(self) -> None:
|
|
456
|
+
"""Update neat ids"""
|
|
457
|
+
|
|
458
|
+
namespace = self.metadata.namespace
|
|
459
|
+
|
|
460
|
+
for view in self.views:
|
|
461
|
+
view.neatId = namespace[view.view.suffix]
|
|
462
|
+
|
|
463
|
+
for property_ in self.properties:
|
|
464
|
+
property_.neatId = namespace[f"{property_.view.suffix}/{property_.view_property}"]
|
|
465
|
+
|
|
466
|
+
def sync_with_info_rules(self, info_rules: "InformationRules") -> None:
|
|
467
|
+
# Sync at the metadata level
|
|
468
|
+
if info_rules.metadata.physical == self.metadata.identifier:
|
|
469
|
+
self.metadata.logical = info_rules.metadata.identifier
|
|
470
|
+
else:
|
|
471
|
+
# if models are not linked to start with, we skip
|
|
472
|
+
return None
|
|
473
|
+
|
|
474
|
+
info_properties_by_neat_id = {prop.neatId: prop for prop in info_rules.properties}
|
|
475
|
+
dms_properties_by_neat_id = {prop.neatId: prop for prop in self.properties}
|
|
476
|
+
for neat_id, prop in info_properties_by_neat_id.items():
|
|
477
|
+
if prop.physical in dms_properties_by_neat_id:
|
|
478
|
+
dms_properties_by_neat_id[prop.physical].logical = neat_id
|
|
479
|
+
|
|
480
|
+
info_classes_by_neat_id = {cls.neatId: cls for cls in info_rules.classes}
|
|
481
|
+
dms_views_by_neat_id = {view.neatId: view for view in self.views}
|
|
482
|
+
for neat_id, class_ in info_classes_by_neat_id.items():
|
|
483
|
+
if class_.physical in dms_views_by_neat_id:
|
|
484
|
+
dms_views_by_neat_id[class_.physical].logical = neat_id
|
|
485
|
+
|
|
445
486
|
def as_schema(self, instance_space: str | None = None, remove_cdf_spaces: bool = False) -> DMSSchema:
|
|
446
487
|
from ._exporter import _DMSExporter
|
|
447
488
|
|
|
@@ -37,6 +37,7 @@ class DMSInputMetadata(InputComponent[DMSMetadata]):
|
|
|
37
37
|
created: datetime | str | None = None
|
|
38
38
|
updated: datetime | str | None = None
|
|
39
39
|
logical: str | URIRef | None = None
|
|
40
|
+
source_id: str | URIRef | None = None
|
|
40
41
|
|
|
41
42
|
@classmethod
|
|
42
43
|
def _get_verified_cls(cls) -> type[DMSMetadata]:
|
|
@@ -77,6 +78,9 @@ class DMSInputMetadata(InputComponent[DMSMetadata]):
|
|
|
77
78
|
description = None
|
|
78
79
|
return description, creator
|
|
79
80
|
|
|
81
|
+
def as_data_model_id(self) -> dm.DataModelId:
|
|
82
|
+
return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
|
|
83
|
+
|
|
80
84
|
@property
|
|
81
85
|
def identifier(self) -> URIRef:
|
|
82
86
|
"""Globally unique identifier for the data model.
|
|
@@ -121,7 +121,7 @@ class InformationProperty(SheetRow):
|
|
|
121
121
|
min_count: Minimum count of the property values. Defaults to 0
|
|
122
122
|
max_count: Maximum count of the property values. Defaults to None
|
|
123
123
|
default: Default value of the property
|
|
124
|
-
|
|
124
|
+
instance_source: Actual rule for the transformation from source to target representation of
|
|
125
125
|
knowledge graph. Defaults to None (no transformation)
|
|
126
126
|
"""
|
|
127
127
|
|
|
@@ -153,10 +153,10 @@ class InformationProperty(SheetRow):
|
|
|
153
153
|
"which means that the property can hold any number of values (listable).",
|
|
154
154
|
)
|
|
155
155
|
default: Any | None = Field(alias="Default", default=None, description="Default value of the property.")
|
|
156
|
-
|
|
157
|
-
alias="
|
|
156
|
+
instance_source: RDFPath | None = Field(
|
|
157
|
+
alias="Instance Source",
|
|
158
158
|
default=None,
|
|
159
|
-
description="The
|
|
159
|
+
description="The link to to the instance property for the model. "
|
|
160
160
|
"The rule is provided in a RDFPath query syntax which is converted to downstream solution query (e.g. SPARQL).",
|
|
161
161
|
)
|
|
162
162
|
inherited: bool = Field(
|
|
@@ -181,7 +181,7 @@ class InformationProperty(SheetRow):
|
|
|
181
181
|
return float("inf")
|
|
182
182
|
return value
|
|
183
183
|
|
|
184
|
-
@field_validator("
|
|
184
|
+
@field_validator("instance_source", mode="before")
|
|
185
185
|
def generate_rdfpath(cls, value: str | RDFPath | None) -> RDFPath | None:
|
|
186
186
|
if value is None or isinstance(value, RDFPath):
|
|
187
187
|
return value
|
|
@@ -267,6 +267,49 @@ class InformationRules(BaseRules):
|
|
|
267
267
|
values = get_default_prefixes_and_namespaces()
|
|
268
268
|
return values
|
|
269
269
|
|
|
270
|
+
@model_validator(mode="after")
|
|
271
|
+
def set_neat_id(self) -> "InformationRules":
|
|
272
|
+
namespace = self.metadata.namespace
|
|
273
|
+
|
|
274
|
+
for class_ in self.classes:
|
|
275
|
+
if not class_.neatId:
|
|
276
|
+
class_.neatId = namespace[class_.class_.suffix]
|
|
277
|
+
for property_ in self.properties:
|
|
278
|
+
if not property_.neatId:
|
|
279
|
+
property_.neatId = namespace[f"{property_.class_.suffix}/{property_.property_}"]
|
|
280
|
+
|
|
281
|
+
return self
|
|
282
|
+
|
|
283
|
+
def update_neat_id(self) -> None:
|
|
284
|
+
"""Update neat ids"""
|
|
285
|
+
|
|
286
|
+
namespace = self.metadata.namespace
|
|
287
|
+
|
|
288
|
+
for class_ in self.classes:
|
|
289
|
+
class_.neatId = namespace[class_.class_.suffix]
|
|
290
|
+
for property_ in self.properties:
|
|
291
|
+
property_.neatId = namespace[f"{property_.class_.suffix}/{property_.property_}"]
|
|
292
|
+
|
|
293
|
+
def sync_with_dms_rules(self, dms_rules: "DMSRules") -> None:
|
|
294
|
+
# Sync at the metadata level
|
|
295
|
+
if dms_rules.metadata.logical == self.metadata.identifier:
|
|
296
|
+
self.metadata.physical = dms_rules.metadata.identifier
|
|
297
|
+
else:
|
|
298
|
+
# if models are not linked to start with, we skip
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
info_properties_by_neat_id = {prop.neatId: prop for prop in self.properties}
|
|
302
|
+
dms_properties_by_neat_id = {prop.neatId: prop for prop in dms_rules.properties}
|
|
303
|
+
for neat_id, prop in dms_properties_by_neat_id.items():
|
|
304
|
+
if prop.logical in info_properties_by_neat_id:
|
|
305
|
+
info_properties_by_neat_id[prop.logical].physical = neat_id
|
|
306
|
+
|
|
307
|
+
info_classes_by_neat_id = {cls.neatId: cls for cls in self.classes}
|
|
308
|
+
dms_views_by_neat_id = {view.neatId: view for view in dms_rules.views}
|
|
309
|
+
for neat_id, view in dms_views_by_neat_id.items():
|
|
310
|
+
if view.logical in info_classes_by_neat_id:
|
|
311
|
+
info_classes_by_neat_id[view.logical].physical = neat_id
|
|
312
|
+
|
|
270
313
|
def as_dms_rules(self) -> "DMSRules":
|
|
271
314
|
from cognite.neat._rules.transformers._converters import _InformationRulesConverter
|
|
272
315
|
|
|
@@ -3,6 +3,7 @@ from datetime import datetime
|
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
|
+
from cognite.client import data_modeling as dm
|
|
6
7
|
from rdflib import Namespace, URIRef
|
|
7
8
|
|
|
8
9
|
from cognite.neat._constants import DEFAULT_NAMESPACE
|
|
@@ -36,6 +37,7 @@ class InformationInputMetadata(InputComponent[InformationMetadata]):
|
|
|
36
37
|
updated: datetime | str | None = None
|
|
37
38
|
physical: str | URIRef | None = None
|
|
38
39
|
conceptual: str | URIRef | None = None
|
|
40
|
+
source_id: str | URIRef | None = None
|
|
39
41
|
|
|
40
42
|
@classmethod
|
|
41
43
|
def _get_verified_cls(cls) -> type[InformationMetadata]:
|
|
@@ -49,6 +51,9 @@ class InformationInputMetadata(InputComponent[InformationMetadata]):
|
|
|
49
51
|
output["updated"] = datetime.now()
|
|
50
52
|
return output
|
|
51
53
|
|
|
54
|
+
def as_data_model_id(self) -> dm.DataModelId:
|
|
55
|
+
return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
|
|
56
|
+
|
|
52
57
|
@property
|
|
53
58
|
def prefix(self) -> str:
|
|
54
59
|
return self.space
|
|
@@ -79,7 +84,7 @@ class InformationInputProperty(InputComponent[InformationProperty]):
|
|
|
79
84
|
min_count: int | None = None
|
|
80
85
|
max_count: int | float | None = None
|
|
81
86
|
default: Any | None = None
|
|
82
|
-
|
|
87
|
+
instance_source: str | None = None
|
|
83
88
|
# Only used internally
|
|
84
89
|
inherited: bool = False
|
|
85
90
|
neatId: str | URIRef | None = None
|
|
@@ -15,14 +15,13 @@ def _read_source_file() -> str:
|
|
|
15
15
|
return _CLASSIC_TO_CORE_MAPPING.read_text()
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
def load_classic_to_core_mapping(org_name: str, source_space: str, source_version: str) -> DMSRules:
|
|
19
|
-
if not org_name:
|
|
20
|
-
raise NeatValueError("Organization name must be provided.")
|
|
21
|
-
|
|
18
|
+
def load_classic_to_core_mapping(org_name: str | None, source_space: str, source_version: str) -> DMSRules:
|
|
22
19
|
from cognite.neat._rules.importers import YAMLImporter
|
|
23
20
|
from cognite.neat._rules.transformers import VerifyDMSRules
|
|
24
21
|
|
|
25
|
-
raw_str = _read_source_file()
|
|
22
|
+
raw_str = _read_source_file()
|
|
23
|
+
if org_name is not None:
|
|
24
|
+
raw_str = raw_str.replace("Classic", org_name)
|
|
26
25
|
|
|
27
26
|
loaded = yaml.safe_load(raw_str)
|
|
28
27
|
loaded["metadata"]["space"] = source_space
|
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
from ._base import RulesTransformer
|
|
2
2
|
from ._converters import (
|
|
3
3
|
AddClassImplements,
|
|
4
|
+
ChangeViewPrefix,
|
|
5
|
+
ClassicPrepareCore,
|
|
6
|
+
ConversionTransformer,
|
|
4
7
|
ConvertToRules,
|
|
5
8
|
DMSToInformation,
|
|
6
9
|
IncludeReferenced,
|
|
7
10
|
InformationToDMS,
|
|
11
|
+
MergeDMSRules,
|
|
12
|
+
MergeInformationRules,
|
|
8
13
|
PrefixEntities,
|
|
9
14
|
ReduceCogniteModel,
|
|
10
15
|
SetIDDMSModel,
|
|
@@ -20,11 +25,16 @@ from ._verification import VerifyAnyRules, VerifyDMSRules, VerifyInformationRule
|
|
|
20
25
|
__all__ = [
|
|
21
26
|
"AddClassImplements",
|
|
22
27
|
"AsParentPropertyId",
|
|
28
|
+
"ChangeViewPrefix",
|
|
29
|
+
"ClassicPrepareCore",
|
|
30
|
+
"ConversionTransformer",
|
|
23
31
|
"ConvertToRules",
|
|
24
32
|
"DMSToInformation",
|
|
25
33
|
"IncludeReferenced",
|
|
26
34
|
"InformationToDMS",
|
|
27
35
|
"MapOneToOne",
|
|
36
|
+
"MergeDMSRules",
|
|
37
|
+
"MergeInformationRules",
|
|
28
38
|
"PrefixEntities",
|
|
29
39
|
"ReduceCogniteModel",
|
|
30
40
|
"RuleMapper",
|