cognite-neat 0.107.0__py3-none-any.whl → 0.109.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_constants.py +35 -1
- cognite/neat/_graph/_shared.py +4 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +115 -14
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +87 -6
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +48 -12
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +19 -1
- cognite/neat/_graph/extractors/_dms.py +162 -47
- cognite/neat/_graph/extractors/_dms_graph.py +54 -4
- cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
- cognite/neat/_graph/extractors/_rdf_file.py +3 -2
- cognite/neat/_graph/loaders/__init__.py +1 -3
- cognite/neat/_graph/loaders/_rdf2dms.py +20 -10
- cognite/neat/_graph/queries/_base.py +144 -84
- cognite/neat/_graph/queries/_construct.py +1 -1
- cognite/neat/_graph/transformers/__init__.py +3 -1
- cognite/neat/_graph/transformers/_base.py +4 -4
- cognite/neat/_graph/transformers/_classic_cdf.py +13 -13
- cognite/neat/_graph/transformers/_prune_graph.py +3 -3
- cognite/neat/_graph/transformers/_rdfpath.py +3 -4
- cognite/neat/_graph/transformers/_value_type.py +71 -13
- cognite/neat/_issues/errors/__init__.py +2 -0
- cognite/neat/_issues/errors/_external.py +8 -0
- cognite/neat/_issues/errors/_resources.py +1 -1
- cognite/neat/_issues/warnings/__init__.py +0 -2
- cognite/neat/_issues/warnings/_models.py +1 -1
- cognite/neat/_issues/warnings/_properties.py +0 -8
- cognite/neat/_issues/warnings/_resources.py +1 -1
- cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
- cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
- cognite/neat/_rules/exporters/_rules2yaml.py +1 -1
- cognite/neat/_rules/importers/__init__.py +3 -1
- cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
- cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
- cognite/neat/_rules/importers/_rdf/_base.py +2 -2
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +310 -26
- cognite/neat/_rules/models/_base_rules.py +22 -11
- cognite/neat/_rules/models/dms/_exporter.py +5 -4
- cognite/neat/_rules/models/dms/_rules.py +1 -8
- cognite/neat/_rules/models/dms/_rules_input.py +4 -0
- cognite/neat/_rules/models/information/_rules_input.py +5 -0
- cognite/neat/_rules/transformers/__init__.py +10 -3
- cognite/neat/_rules/transformers/_base.py +6 -1
- cognite/neat/_rules/transformers/_converters.py +530 -364
- cognite/neat/_rules/transformers/_mapping.py +4 -4
- cognite/neat/_session/_base.py +100 -47
- cognite/neat/_session/_create.py +133 -0
- cognite/neat/_session/_drop.py +60 -2
- cognite/neat/_session/_fix.py +28 -0
- cognite/neat/_session/_inspect.py +22 -7
- cognite/neat/_session/_mapping.py +8 -8
- cognite/neat/_session/_prepare.py +3 -247
- cognite/neat/_session/_read.py +138 -17
- cognite/neat/_session/_set.py +50 -1
- cognite/neat/_session/_show.py +16 -43
- cognite/neat/_session/_state.py +53 -52
- cognite/neat/_session/_to.py +11 -4
- cognite/neat/_session/_wizard.py +1 -1
- cognite/neat/_session/exceptions.py +8 -1
- cognite/neat/_store/_graph_store.py +301 -146
- cognite/neat/_store/_provenance.py +36 -20
- cognite/neat/_store/_rules_store.py +253 -267
- cognite/neat/_store/exceptions.py +40 -4
- cognite/neat/_utils/auth.py +5 -3
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/RECORD +69 -67
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,18 +1,25 @@
|
|
|
1
|
+
import itertools
|
|
1
2
|
from collections import Counter, defaultdict
|
|
2
|
-
from collections.abc import Mapping
|
|
3
|
+
from collections.abc import Iterable, Mapping
|
|
4
|
+
from dataclasses import dataclass
|
|
3
5
|
from datetime import datetime, timezone
|
|
4
6
|
from pathlib import Path
|
|
5
|
-
from typing import ClassVar, cast
|
|
7
|
+
from typing import Any, ClassVar, cast
|
|
6
8
|
|
|
7
9
|
from cognite.client import data_modeling as dm
|
|
8
|
-
from rdflib import RDF, Namespace, URIRef
|
|
10
|
+
from rdflib import RDF, RDFS, Graph, Namespace, URIRef
|
|
9
11
|
from rdflib import Literal as RdfLiteral
|
|
10
12
|
|
|
11
|
-
from cognite.neat.
|
|
12
|
-
from cognite.neat.
|
|
13
|
+
from cognite.neat._constants import NEAT, get_default_prefixes_and_namespaces
|
|
14
|
+
from cognite.neat._issues import IssueList
|
|
15
|
+
from cognite.neat._issues.warnings import PropertyValueTypeUndefinedWarning
|
|
16
|
+
from cognite.neat._rules.analysis import InformationAnalysis
|
|
17
|
+
from cognite.neat._rules.models import InformationRules, data_types
|
|
13
18
|
from cognite.neat._rules.models.data_types import AnyURI
|
|
14
19
|
from cognite.neat._rules.models.entities._single_value import UnknownEntity
|
|
15
20
|
from cognite.neat._rules.models.information import (
|
|
21
|
+
InformationInputClass,
|
|
22
|
+
InformationInputProperty,
|
|
16
23
|
InformationMetadata,
|
|
17
24
|
)
|
|
18
25
|
from cognite.neat._store import NeatGraphStore
|
|
@@ -74,7 +81,7 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
74
81
|
def from_graph_store(
|
|
75
82
|
cls,
|
|
76
83
|
store: NeatGraphStore,
|
|
77
|
-
data_model_id:
|
|
84
|
+
data_model_id: dm.DataModelId | tuple[str, str, str] = DEFAULT_INFERENCE_DATA_MODEL_ID,
|
|
78
85
|
max_number_of_instance: int = -1,
|
|
79
86
|
non_existing_node_type: UnknownEntity | AnyURI = DEFAULT_NON_EXISTING_NODE_TYPE,
|
|
80
87
|
language: str = "en",
|
|
@@ -157,7 +164,7 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
157
164
|
for class_uri, no_instances in self.graph.query(ORDERED_CLASSES_QUERY): # type: ignore[misc]
|
|
158
165
|
if (class_id := remove_namespace_from_uri(cast(URIRef, class_uri))) in classes:
|
|
159
166
|
# handles cases when class id is already present in classes
|
|
160
|
-
class_id = f"{class_id}_{len(classes)+1}"
|
|
167
|
+
class_id = f"{class_id}_{len(classes) + 1}"
|
|
161
168
|
|
|
162
169
|
classes[class_id] = {
|
|
163
170
|
"class_": class_id,
|
|
@@ -187,21 +194,10 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
187
194
|
INSTANCE_PROPERTIES_DEFINITION.replace("instance_id", instance)
|
|
188
195
|
): # type: ignore[misc]
|
|
189
196
|
# this is to skip rdf:type property
|
|
197
|
+
|
|
190
198
|
if property_uri == RDF.type:
|
|
191
199
|
continue
|
|
192
200
|
property_id = remove_namespace_from_uri(property_uri)
|
|
193
|
-
if property_id in {"external_id", "externalId"}:
|
|
194
|
-
skip_issue = PropertySkippedWarning(
|
|
195
|
-
resource_type="Property",
|
|
196
|
-
identifier=f"{class_id}:{property_id}",
|
|
197
|
-
property_name=property_id,
|
|
198
|
-
reason="External ID is assumed to be the unique identifier of the instance "
|
|
199
|
-
"and is not part of the data model schema.",
|
|
200
|
-
)
|
|
201
|
-
if skip_issue not in self.issue_list:
|
|
202
|
-
self.issue_list.append(skip_issue)
|
|
203
|
-
continue
|
|
204
|
-
|
|
205
201
|
self._add_uri_namespace_to_prefixes(cast(URIRef, property_uri), prefixes)
|
|
206
202
|
|
|
207
203
|
if isinstance(data_type_uri, URIRef):
|
|
@@ -250,13 +246,8 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
250
246
|
elif id_ in properties and definition["value_type"] not in properties[id_]["value_type"]:
|
|
251
247
|
properties[id_]["value_type"].add(definition["value_type"])
|
|
252
248
|
|
|
253
|
-
#
|
|
254
|
-
|
|
255
|
-
id_ in properties
|
|
256
|
-
and definition["value_type"] in properties[id_]["value_type"]
|
|
257
|
-
and properties[id_]["max_count"] != definition["max_count"]
|
|
258
|
-
):
|
|
259
|
-
properties[id_]["max_count"] = max(properties[id_]["max_count"], definition["max_count"])
|
|
249
|
+
# always update max_count with the upmost value
|
|
250
|
+
properties[id_]["max_count"] = max(properties[id_]["max_count"], definition["max_count"])
|
|
260
251
|
|
|
261
252
|
# Create multi-value properties otherwise single value
|
|
262
253
|
for property_ in properties.values():
|
|
@@ -292,3 +283,296 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
292
283
|
@property
|
|
293
284
|
def source_uri(self) -> URIRef:
|
|
294
285
|
return INSTANCES_ENTITY.id_
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
# Internal helper class
|
|
289
|
+
@dataclass
|
|
290
|
+
class _ReadProperties:
|
|
291
|
+
type_uri: URIRef
|
|
292
|
+
property_uri: URIRef
|
|
293
|
+
value_type: URIRef
|
|
294
|
+
parent_uri: URIRef | None
|
|
295
|
+
max_occurrence: int
|
|
296
|
+
instance_count: int
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class SubclassInferenceImporter(BaseRDFImporter):
|
|
300
|
+
"""Infer subclasses from a triple store.
|
|
301
|
+
|
|
302
|
+
Assumes that the graph already is connected to a schema. The classes should
|
|
303
|
+
match the RDF.type of the instances in the graph, while the subclasses should
|
|
304
|
+
match the NEAT.type of the instances in the graph.
|
|
305
|
+
|
|
306
|
+
ClassVars:
|
|
307
|
+
overwrite_data_types: Mapping of data types to be overwritten. The InferenceImporter will overwrite
|
|
308
|
+
32-bit integer and 32-bit float data types to 64-bit integer and 64-bit float data types
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
issue_list: Issue list to store issues
|
|
312
|
+
graph: Knowledge graph
|
|
313
|
+
"""
|
|
314
|
+
|
|
315
|
+
overwrite_data_types: ClassVar[Mapping[URIRef, URIRef]] = {
|
|
316
|
+
data_types.Integer.as_xml_uri_ref(): data_types.Long.as_xml_uri_ref(),
|
|
317
|
+
data_types.Float.as_xml_uri_ref(): data_types.Double.as_xml_uri_ref(),
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
_ordered_class_query = """SELECT DISTINCT ?class (count(?s) as ?instances )
|
|
321
|
+
WHERE { ?s a ?class }
|
|
322
|
+
group by ?class order by DESC(?instances)"""
|
|
323
|
+
|
|
324
|
+
_type_parent_query = f"""SELECT ?parent ?type
|
|
325
|
+
WHERE {{ ?s a ?type .
|
|
326
|
+
?type <{RDFS.subClassOf}> ?parent }}"""
|
|
327
|
+
|
|
328
|
+
_properties_query = """SELECT DISTINCT ?property ?valueType
|
|
329
|
+
WHERE {{
|
|
330
|
+
?s a <{type}> .
|
|
331
|
+
?s ?property ?object .
|
|
332
|
+
OPTIONAL {{ ?object a ?objectType }}
|
|
333
|
+
BIND(
|
|
334
|
+
IF(
|
|
335
|
+
isLiteral(?object), datatype(?object),
|
|
336
|
+
IF(BOUND(?objectType), ?objectType, <{unknown_type}>)
|
|
337
|
+
) AS ?valueType
|
|
338
|
+
)
|
|
339
|
+
}}"""
|
|
340
|
+
|
|
341
|
+
_max_occurrence_query = """SELECT (MAX(?count) AS ?maxCount)
|
|
342
|
+
WHERE {{
|
|
343
|
+
{{
|
|
344
|
+
SELECT ?subject (COUNT(?object) AS ?count)
|
|
345
|
+
WHERE {{
|
|
346
|
+
?subject a <{type}> .
|
|
347
|
+
?subject <{property}> ?object .
|
|
348
|
+
}}
|
|
349
|
+
GROUP BY ?subject
|
|
350
|
+
}}
|
|
351
|
+
}}"""
|
|
352
|
+
|
|
353
|
+
def __init__(
|
|
354
|
+
self,
|
|
355
|
+
issue_list: IssueList,
|
|
356
|
+
graph: Graph,
|
|
357
|
+
rules: InformationRules | None = None,
|
|
358
|
+
data_model_id: dm.DataModelId | tuple[str, str, str] | None = None,
|
|
359
|
+
non_existing_node_type: UnknownEntity | AnyURI = DEFAULT_NON_EXISTING_NODE_TYPE,
|
|
360
|
+
) -> None:
|
|
361
|
+
if sum([1 for v in [rules, data_model_id] if v is not None]) != 1:
|
|
362
|
+
raise ValueError("Exactly one of rules or data_model_id must be provided.")
|
|
363
|
+
if data_model_id is not None:
|
|
364
|
+
identifier = data_model_id
|
|
365
|
+
elif rules is not None:
|
|
366
|
+
identifier = rules.metadata.as_data_model_id().as_tuple() # type: ignore[assignment]
|
|
367
|
+
else:
|
|
368
|
+
raise ValueError("Exactly one of rules or data_model_id must be provided.")
|
|
369
|
+
super().__init__(issue_list, graph, identifier, -1, non_existing_node_type, language="en")
|
|
370
|
+
self._rules = rules
|
|
371
|
+
|
|
372
|
+
def _to_rules_components(
|
|
373
|
+
self,
|
|
374
|
+
) -> dict:
|
|
375
|
+
if self._rules:
|
|
376
|
+
prefixes = self._rules.prefixes.copy()
|
|
377
|
+
else:
|
|
378
|
+
prefixes = get_default_prefixes_and_namespaces()
|
|
379
|
+
|
|
380
|
+
parent_by_child = self._read_parent_by_child_from_graph()
|
|
381
|
+
read_properties = self._read_class_properties_from_graph(parent_by_child)
|
|
382
|
+
classes, properties = self._create_classes_properties(read_properties, prefixes)
|
|
383
|
+
|
|
384
|
+
if self._rules:
|
|
385
|
+
metadata = self._rules.metadata.model_dump()
|
|
386
|
+
default_space = self._rules.metadata.prefix
|
|
387
|
+
else:
|
|
388
|
+
metadata = self._default_metadata()
|
|
389
|
+
default_space = metadata["space"]
|
|
390
|
+
return {
|
|
391
|
+
"metadata": metadata,
|
|
392
|
+
"classes": [cls.dump(default_space) for cls in classes],
|
|
393
|
+
"properties": [prop.dump(default_space) for prop in properties],
|
|
394
|
+
"prefixes": prefixes,
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
def _create_classes_properties(
|
|
398
|
+
self, read_properties: list[_ReadProperties], prefixes: dict[str, Namespace]
|
|
399
|
+
) -> tuple[list[InformationInputClass], list[InformationInputProperty]]:
|
|
400
|
+
if self._rules:
|
|
401
|
+
existing_classes = {class_.class_.suffix: class_ for class_ in self._rules.classes}
|
|
402
|
+
else:
|
|
403
|
+
existing_classes = {}
|
|
404
|
+
classes: list[InformationInputClass] = []
|
|
405
|
+
properties: list[InformationInputProperty] = []
|
|
406
|
+
# Help for IDE
|
|
407
|
+
type_uri: URIRef
|
|
408
|
+
parent_uri: URIRef
|
|
409
|
+
for parent_uri, parent_class_properties_iterable in itertools.groupby(
|
|
410
|
+
sorted(read_properties, key=lambda x: x.parent_uri or NEAT.EmptyType),
|
|
411
|
+
key=lambda x: x.parent_uri or NEAT.EmptyType,
|
|
412
|
+
):
|
|
413
|
+
properties_by_class_by_property = self._get_properties_by_class_by_property(
|
|
414
|
+
parent_class_properties_iterable
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
parent_suffix: str | None = None
|
|
418
|
+
if parent_uri != NEAT.EmptyType:
|
|
419
|
+
shared_property_uris = set.intersection(
|
|
420
|
+
*[
|
|
421
|
+
set(properties_by_property.keys())
|
|
422
|
+
for properties_by_property in properties_by_class_by_property.values()
|
|
423
|
+
]
|
|
424
|
+
)
|
|
425
|
+
parent_suffix = remove_namespace_from_uri(parent_uri)
|
|
426
|
+
self._add_uri_namespace_to_prefixes(parent_uri, prefixes)
|
|
427
|
+
if parent_suffix not in existing_classes:
|
|
428
|
+
classes.append(InformationInputClass(class_=parent_suffix))
|
|
429
|
+
else:
|
|
430
|
+
classes.append(InformationInputClass.load(existing_classes[parent_suffix].model_dump()))
|
|
431
|
+
else:
|
|
432
|
+
shared_property_uris = set()
|
|
433
|
+
shared_properties: dict[URIRef, list[_ReadProperties]] = defaultdict(list)
|
|
434
|
+
for type_uri, properties_by_property_uri in properties_by_class_by_property.items():
|
|
435
|
+
class_suffix = remove_namespace_from_uri(type_uri)
|
|
436
|
+
self._add_uri_namespace_to_prefixes(type_uri, prefixes)
|
|
437
|
+
|
|
438
|
+
if class_suffix not in existing_classes:
|
|
439
|
+
classes.append(
|
|
440
|
+
InformationInputClass(
|
|
441
|
+
class_=class_suffix,
|
|
442
|
+
implements=parent_suffix,
|
|
443
|
+
)
|
|
444
|
+
)
|
|
445
|
+
else:
|
|
446
|
+
classes.append(InformationInputClass.load(existing_classes[class_suffix].model_dump()))
|
|
447
|
+
for property_uri, read_properties in properties_by_property_uri.items():
|
|
448
|
+
if property_uri in shared_property_uris:
|
|
449
|
+
shared_properties[property_uri].extend(read_properties)
|
|
450
|
+
continue
|
|
451
|
+
properties.append(
|
|
452
|
+
self._create_property(read_properties, class_suffix, type_uri, property_uri, prefixes)
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
if parent_suffix:
|
|
456
|
+
for property_uri, read_properties in shared_properties.items():
|
|
457
|
+
properties.append(
|
|
458
|
+
self._create_property(
|
|
459
|
+
read_properties, parent_suffix, read_properties[0].type_uri, property_uri, prefixes
|
|
460
|
+
)
|
|
461
|
+
)
|
|
462
|
+
return classes, properties
|
|
463
|
+
|
|
464
|
+
@staticmethod
|
|
465
|
+
def _get_properties_by_class_by_property(
|
|
466
|
+
parent_class_properties_iterable: Iterable[_ReadProperties],
|
|
467
|
+
) -> dict[URIRef, dict[URIRef, list[_ReadProperties]]]:
|
|
468
|
+
properties_by_class_by_property: dict[URIRef, dict[URIRef, list[_ReadProperties]]] = {}
|
|
469
|
+
for class_uri, class_properties_iterable in itertools.groupby(
|
|
470
|
+
sorted(parent_class_properties_iterable, key=lambda x: x.type_uri), key=lambda x: x.type_uri
|
|
471
|
+
):
|
|
472
|
+
properties_by_class_by_property[class_uri] = defaultdict(list)
|
|
473
|
+
for read_prop in class_properties_iterable:
|
|
474
|
+
properties_by_class_by_property[class_uri][read_prop.property_uri].append(read_prop)
|
|
475
|
+
return properties_by_class_by_property
|
|
476
|
+
|
|
477
|
+
def _read_class_properties_from_graph(self, parent_by_child: dict[URIRef, URIRef]) -> list[_ReadProperties]:
|
|
478
|
+
count_by_type: dict[URIRef, int] = {}
|
|
479
|
+
# Infers all the classes in the graph
|
|
480
|
+
for result_row in self.graph.query(self._ordered_class_query):
|
|
481
|
+
type_uri, instance_count_literal = cast(tuple[URIRef, RdfLiteral], result_row)
|
|
482
|
+
count_by_type[type_uri] = instance_count_literal.toPython()
|
|
483
|
+
if self._rules:
|
|
484
|
+
analysis = InformationAnalysis(self._rules)
|
|
485
|
+
existing_class_properties = {
|
|
486
|
+
(class_entity.suffix, prop.property_)
|
|
487
|
+
for class_entity, properties in analysis.classes_with_properties(
|
|
488
|
+
consider_inheritance=True, allow_different_namespace=True
|
|
489
|
+
).items()
|
|
490
|
+
for prop in properties
|
|
491
|
+
}
|
|
492
|
+
else:
|
|
493
|
+
existing_class_properties = set()
|
|
494
|
+
properties_by_class_by_subclass: list[_ReadProperties] = []
|
|
495
|
+
for type_uri, instance_count in count_by_type.items():
|
|
496
|
+
property_query = self._properties_query.format(type=type_uri, unknown_type=NEAT.UnknownType)
|
|
497
|
+
class_suffix = remove_namespace_from_uri(type_uri)
|
|
498
|
+
for result_row in self.graph.query(property_query):
|
|
499
|
+
property_uri, value_type_uri = cast(tuple[URIRef, URIRef], result_row)
|
|
500
|
+
if property_uri == RDF.type:
|
|
501
|
+
continue
|
|
502
|
+
property_str = remove_namespace_from_uri(property_uri)
|
|
503
|
+
if (class_suffix, property_str) in existing_class_properties:
|
|
504
|
+
continue
|
|
505
|
+
occurrence_query = self._max_occurrence_query.format(type=type_uri, property=property_uri)
|
|
506
|
+
max_occurrence = 1 # default value
|
|
507
|
+
occurrence_row, *_ = list(self.graph.query(occurrence_query))
|
|
508
|
+
if occurrence_row:
|
|
509
|
+
max_occurrence_literal, *__ = cast(tuple[RdfLiteral, Any], occurrence_row)
|
|
510
|
+
max_occurrence = int(max_occurrence_literal.toPython())
|
|
511
|
+
properties_by_class_by_subclass.append(
|
|
512
|
+
_ReadProperties(
|
|
513
|
+
type_uri=type_uri,
|
|
514
|
+
property_uri=property_uri,
|
|
515
|
+
parent_uri=parent_by_child.get(type_uri),
|
|
516
|
+
value_type=value_type_uri,
|
|
517
|
+
max_occurrence=max_occurrence,
|
|
518
|
+
instance_count=instance_count,
|
|
519
|
+
)
|
|
520
|
+
)
|
|
521
|
+
return properties_by_class_by_subclass
|
|
522
|
+
|
|
523
|
+
def _read_parent_by_child_from_graph(self) -> dict[URIRef, URIRef]:
|
|
524
|
+
parent_by_child: dict[URIRef, URIRef] = {}
|
|
525
|
+
for result_row in self.graph.query(self._type_parent_query):
|
|
526
|
+
parent_uri, child_uri = cast(tuple[URIRef, URIRef], result_row)
|
|
527
|
+
parent_by_child[child_uri] = parent_uri
|
|
528
|
+
return parent_by_child
|
|
529
|
+
|
|
530
|
+
def _create_property(
|
|
531
|
+
self,
|
|
532
|
+
read_properties: list[_ReadProperties],
|
|
533
|
+
class_suffix: str,
|
|
534
|
+
type_uri: URIRef,
|
|
535
|
+
property_uri: URIRef,
|
|
536
|
+
prefixes: dict[str, Namespace],
|
|
537
|
+
) -> InformationInputProperty:
|
|
538
|
+
first = read_properties[0]
|
|
539
|
+
value_type = self._get_value_type(read_properties, prefixes)
|
|
540
|
+
property_name = remove_namespace_from_uri(property_uri)
|
|
541
|
+
self._add_uri_namespace_to_prefixes(property_uri, prefixes)
|
|
542
|
+
|
|
543
|
+
return InformationInputProperty(
|
|
544
|
+
class_=class_suffix,
|
|
545
|
+
property_=property_name,
|
|
546
|
+
max_count=first.max_occurrence,
|
|
547
|
+
value_type=value_type,
|
|
548
|
+
instance_source=(f"{uri_to_short_form(type_uri, prefixes)}({uri_to_short_form(property_uri, prefixes)})"),
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
def _get_value_type(
|
|
552
|
+
self, read_properties: list[_ReadProperties], prefixes: dict[str, Namespace]
|
|
553
|
+
) -> str | UnknownEntity:
|
|
554
|
+
value_types = {self.overwrite_data_types.get(prop.value_type, prop.value_type) for prop in read_properties}
|
|
555
|
+
if len(value_types) == 1:
|
|
556
|
+
uri_ref = value_types.pop()
|
|
557
|
+
if uri_ref == NEAT.UnknownType:
|
|
558
|
+
return UnknownEntity()
|
|
559
|
+
self._add_uri_namespace_to_prefixes(uri_ref, prefixes)
|
|
560
|
+
return remove_namespace_from_uri(uri_ref)
|
|
561
|
+
elif len(value_types) == 0:
|
|
562
|
+
return UnknownEntity()
|
|
563
|
+
for uri_ref in value_types:
|
|
564
|
+
self._add_uri_namespace_to_prefixes(uri_ref, prefixes)
|
|
565
|
+
return " | ".join(remove_namespace_from_uri(uri_ref) for uri_ref in value_types)
|
|
566
|
+
|
|
567
|
+
def _default_metadata(self) -> dict[str, Any]:
|
|
568
|
+
now = datetime.now(timezone.utc)
|
|
569
|
+
return InformationMetadata(
|
|
570
|
+
space=self.data_model_id.space,
|
|
571
|
+
external_id=self.data_model_id.external_id,
|
|
572
|
+
version=cast(str, self.data_model_id.version),
|
|
573
|
+
name="Inferred Model",
|
|
574
|
+
creator=["NEAT"],
|
|
575
|
+
created=now,
|
|
576
|
+
updated=now,
|
|
577
|
+
description="Inferred model from knowledge graph",
|
|
578
|
+
).model_dump()
|
|
@@ -21,6 +21,7 @@ from typing import (
|
|
|
21
21
|
)
|
|
22
22
|
|
|
23
23
|
import pandas as pd
|
|
24
|
+
from cognite.client import data_modeling as dm
|
|
24
25
|
from pydantic import (
|
|
25
26
|
BaseModel,
|
|
26
27
|
BeforeValidator,
|
|
@@ -180,6 +181,12 @@ class BaseMetadata(SchemaModel):
|
|
|
180
181
|
description="Date of the data model update",
|
|
181
182
|
)
|
|
182
183
|
|
|
184
|
+
source_id: URIRefType | None = Field(
|
|
185
|
+
None,
|
|
186
|
+
description="Id of source that produced this rules",
|
|
187
|
+
alias="sourceId",
|
|
188
|
+
)
|
|
189
|
+
|
|
183
190
|
@field_validator("*", mode="before")
|
|
184
191
|
def strip_string(cls, value: Any) -> Any:
|
|
185
192
|
if isinstance(value, str):
|
|
@@ -213,9 +220,6 @@ class BaseMetadata(SchemaModel):
|
|
|
213
220
|
def prefix(self) -> str:
|
|
214
221
|
return self.space
|
|
215
222
|
|
|
216
|
-
def as_identifier(self) -> str:
|
|
217
|
-
return f"{self.prefix}:{self.external_id}"
|
|
218
|
-
|
|
219
223
|
def get_prefix(self) -> str:
|
|
220
224
|
return self.prefix
|
|
221
225
|
|
|
@@ -234,6 +238,12 @@ class BaseMetadata(SchemaModel):
|
|
|
234
238
|
"""Namespace for the data model used for the entities in the data model."""
|
|
235
239
|
return Namespace(f"{self.identifier}/")
|
|
236
240
|
|
|
241
|
+
def as_data_model_id(self) -> dm.DataModelId:
|
|
242
|
+
return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
|
|
243
|
+
|
|
244
|
+
def as_identifier(self) -> str:
|
|
245
|
+
return repr(self.as_data_model_id())
|
|
246
|
+
|
|
237
247
|
|
|
238
248
|
class BaseRules(SchemaModel, ABC):
|
|
239
249
|
"""
|
|
@@ -291,6 +301,7 @@ class BaseRules(SchemaModel, ABC):
|
|
|
291
301
|
def dump(
|
|
292
302
|
self,
|
|
293
303
|
entities_exclude_defaults: bool = True,
|
|
304
|
+
sort: bool = False,
|
|
294
305
|
mode: Literal["python", "json"] = "python",
|
|
295
306
|
by_alias: bool = False,
|
|
296
307
|
exclude: IncEx | None = None,
|
|
@@ -307,6 +318,7 @@ class BaseRules(SchemaModel, ABC):
|
|
|
307
318
|
For example, given a class that is dumped as 'my_prefix:MyClass', if the prefix for the rules
|
|
308
319
|
set in metadata.prefix = 'my_prefix', then this class will be dumped as 'MyClass' when this flag is set.
|
|
309
320
|
Defaults to True.
|
|
321
|
+
sort: Whether to sort the entities in the output.
|
|
310
322
|
mode: The mode in which `to_python` should run.
|
|
311
323
|
If mode is 'json', the output will only contain JSON serializable types.
|
|
312
324
|
If mode is 'python', the output may contain non-JSON-serializable Python objects.
|
|
@@ -316,11 +328,12 @@ class BaseRules(SchemaModel, ABC):
|
|
|
316
328
|
exclude_unset: Whether to exclude fields that have not been explicitly set.
|
|
317
329
|
exclude_defaults: Whether to exclude fields that are set to their default value.
|
|
318
330
|
"""
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
value
|
|
331
|
+
if sort:
|
|
332
|
+
for field_name in self.model_fields.keys():
|
|
333
|
+
value = getattr(self, field_name)
|
|
334
|
+
# Ensure deterministic order of properties, classes, views, and so on
|
|
335
|
+
if isinstance(value, SheetList):
|
|
336
|
+
value.sort(key=lambda x: x._identifier())
|
|
324
337
|
|
|
325
338
|
context: dict[str, Any] = {}
|
|
326
339
|
if entities_exclude_defaults:
|
|
@@ -328,7 +341,7 @@ class BaseRules(SchemaModel, ABC):
|
|
|
328
341
|
|
|
329
342
|
exclude_input: IncEx | None = exclude
|
|
330
343
|
|
|
331
|
-
|
|
344
|
+
return self.model_dump(
|
|
332
345
|
mode=mode,
|
|
333
346
|
by_alias=by_alias,
|
|
334
347
|
exclude=exclude_input,
|
|
@@ -338,8 +351,6 @@ class BaseRules(SchemaModel, ABC):
|
|
|
338
351
|
context=context,
|
|
339
352
|
)
|
|
340
353
|
|
|
341
|
-
return output
|
|
342
|
-
|
|
343
354
|
|
|
344
355
|
class SheetRow(SchemaModel):
|
|
345
356
|
neatId: URIRefType | None = Field(
|
|
@@ -292,10 +292,11 @@ class _DMSExporter:
|
|
|
292
292
|
for container in containers:
|
|
293
293
|
container_id = container.as_id()
|
|
294
294
|
if not (container_properties := container_properties_by_id.get(container_id)):
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
295
|
+
if container_id.space not in COGNITE_SPACES:
|
|
296
|
+
warnings.warn(
|
|
297
|
+
EmptyContainerWarning(container_id),
|
|
298
|
+
stacklevel=2,
|
|
299
|
+
)
|
|
299
300
|
container_to_drop.add(container_id)
|
|
300
301
|
continue
|
|
301
302
|
for prop in container_properties:
|
|
@@ -64,9 +64,6 @@ class DMSMetadata(BaseMetadata):
|
|
|
64
64
|
space=self.space,
|
|
65
65
|
)
|
|
66
66
|
|
|
67
|
-
def as_data_model_id(self) -> dm.DataModelId:
|
|
68
|
-
return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
|
|
69
|
-
|
|
70
67
|
def as_data_model(self) -> dm.DataModelApply:
|
|
71
68
|
suffix = f"Creator: {', '.join(self.creator)}"
|
|
72
69
|
if self.description:
|
|
@@ -83,9 +80,6 @@ class DMSMetadata(BaseMetadata):
|
|
|
83
80
|
views=[],
|
|
84
81
|
)
|
|
85
82
|
|
|
86
|
-
def as_identifier(self) -> str:
|
|
87
|
-
return repr(self.as_data_model_id())
|
|
88
|
-
|
|
89
83
|
def get_prefix(self) -> str:
|
|
90
84
|
return self.space
|
|
91
85
|
|
|
@@ -109,8 +103,7 @@ class DMSProperty(SheetRow):
|
|
|
109
103
|
)
|
|
110
104
|
value_type: DataType | ViewEntity | DMSUnknownEntity = Field(
|
|
111
105
|
alias="Value Type",
|
|
112
|
-
description="Value type that the property can hold. "
|
|
113
|
-
"It takes either subset of CDF primitive types or a View id",
|
|
106
|
+
description="Value type that the property can hold. It takes either subset of CDF primitive types or a View id",
|
|
114
107
|
)
|
|
115
108
|
nullable: bool | None = Field(
|
|
116
109
|
default=None,
|
|
@@ -37,6 +37,7 @@ class DMSInputMetadata(InputComponent[DMSMetadata]):
|
|
|
37
37
|
created: datetime | str | None = None
|
|
38
38
|
updated: datetime | str | None = None
|
|
39
39
|
logical: str | URIRef | None = None
|
|
40
|
+
source_id: str | URIRef | None = None
|
|
40
41
|
|
|
41
42
|
@classmethod
|
|
42
43
|
def _get_verified_cls(cls) -> type[DMSMetadata]:
|
|
@@ -77,6 +78,9 @@ class DMSInputMetadata(InputComponent[DMSMetadata]):
|
|
|
77
78
|
description = None
|
|
78
79
|
return description, creator
|
|
79
80
|
|
|
81
|
+
def as_data_model_id(self) -> dm.DataModelId:
|
|
82
|
+
return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
|
|
83
|
+
|
|
80
84
|
@property
|
|
81
85
|
def identifier(self) -> URIRef:
|
|
82
86
|
"""Globally unique identifier for the data model.
|
|
@@ -3,6 +3,7 @@ from datetime import datetime
|
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
|
+
from cognite.client import data_modeling as dm
|
|
6
7
|
from rdflib import Namespace, URIRef
|
|
7
8
|
|
|
8
9
|
from cognite.neat._constants import DEFAULT_NAMESPACE
|
|
@@ -36,6 +37,7 @@ class InformationInputMetadata(InputComponent[InformationMetadata]):
|
|
|
36
37
|
updated: datetime | str | None = None
|
|
37
38
|
physical: str | URIRef | None = None
|
|
38
39
|
conceptual: str | URIRef | None = None
|
|
40
|
+
source_id: str | URIRef | None = None
|
|
39
41
|
|
|
40
42
|
@classmethod
|
|
41
43
|
def _get_verified_cls(cls) -> type[InformationMetadata]:
|
|
@@ -49,6 +51,9 @@ class InformationInputMetadata(InputComponent[InformationMetadata]):
|
|
|
49
51
|
output["updated"] = datetime.now()
|
|
50
52
|
return output
|
|
51
53
|
|
|
54
|
+
def as_data_model_id(self) -> dm.DataModelId:
|
|
55
|
+
return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
|
|
56
|
+
|
|
52
57
|
@property
|
|
53
58
|
def prefix(self) -> str:
|
|
54
59
|
return self.space
|
|
@@ -1,14 +1,17 @@
|
|
|
1
|
-
from ._base import RulesTransformer
|
|
1
|
+
from ._base import RulesTransformer, VerifiedRulesTransformer
|
|
2
2
|
from ._converters import (
|
|
3
3
|
AddClassImplements,
|
|
4
4
|
ChangeViewPrefix,
|
|
5
5
|
ClassicPrepareCore,
|
|
6
|
+
ConversionTransformer,
|
|
6
7
|
ConvertToRules,
|
|
7
8
|
DMSToInformation,
|
|
9
|
+
DropModelViews,
|
|
8
10
|
IncludeReferenced,
|
|
9
11
|
InformationToDMS,
|
|
12
|
+
MergeDMSRules,
|
|
13
|
+
MergeInformationRules,
|
|
10
14
|
PrefixEntities,
|
|
11
|
-
ReduceCogniteModel,
|
|
12
15
|
SetIDDMSModel,
|
|
13
16
|
ToCompliantEntities,
|
|
14
17
|
ToDataProductModel,
|
|
@@ -24,13 +27,16 @@ __all__ = [
|
|
|
24
27
|
"AsParentPropertyId",
|
|
25
28
|
"ChangeViewPrefix",
|
|
26
29
|
"ClassicPrepareCore",
|
|
30
|
+
"ConversionTransformer",
|
|
27
31
|
"ConvertToRules",
|
|
28
32
|
"DMSToInformation",
|
|
33
|
+
"DropModelViews",
|
|
29
34
|
"IncludeReferenced",
|
|
30
35
|
"InformationToDMS",
|
|
31
36
|
"MapOneToOne",
|
|
37
|
+
"MergeDMSRules",
|
|
38
|
+
"MergeInformationRules",
|
|
32
39
|
"PrefixEntities",
|
|
33
|
-
"ReduceCogniteModel",
|
|
34
40
|
"RuleMapper",
|
|
35
41
|
"RulesTransformer",
|
|
36
42
|
"SetIDDMSModel",
|
|
@@ -39,6 +45,7 @@ __all__ = [
|
|
|
39
45
|
"ToEnterpriseModel",
|
|
40
46
|
"ToExtensionModel",
|
|
41
47
|
"ToSolutionModel",
|
|
48
|
+
"VerifiedRulesTransformer",
|
|
42
49
|
"VerifyAnyRules",
|
|
43
50
|
"VerifyDMSRules",
|
|
44
51
|
"VerifyInformationRules",
|
|
@@ -5,12 +5,14 @@ from types import UnionType
|
|
|
5
5
|
from typing import Generic, TypeVar, Union, get_args, get_origin
|
|
6
6
|
|
|
7
7
|
from cognite.neat._constants import DEFAULT_NAMESPACE
|
|
8
|
-
from cognite.neat._rules._shared import ReadRules, Rules
|
|
8
|
+
from cognite.neat._rules._shared import ReadRules, Rules, VerifiedRules
|
|
9
9
|
from cognite.neat._rules.models import DMSInputRules, InformationInputRules
|
|
10
10
|
from cognite.neat._store._provenance import Agent as ProvenanceAgent
|
|
11
11
|
|
|
12
12
|
T_RulesIn = TypeVar("T_RulesIn", bound=Rules)
|
|
13
13
|
T_RulesOut = TypeVar("T_RulesOut", bound=Rules)
|
|
14
|
+
T_VerifiedIn = TypeVar("T_VerifiedIn", bound=VerifiedRules)
|
|
15
|
+
T_VerifiedOut = TypeVar("T_VerifiedOut", bound=VerifiedRules)
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
class RulesTransformer(ABC, Generic[T_RulesIn, T_RulesOut]):
|
|
@@ -62,3 +64,6 @@ class RulesTransformer(ABC, Generic[T_RulesIn, T_RulesOut]):
|
|
|
62
64
|
return ReadRules[DMSInputRules], ReadRules[InformationInputRules]
|
|
63
65
|
|
|
64
66
|
return (annotation,)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class VerifiedRulesTransformer(RulesTransformer[T_VerifiedIn, T_VerifiedOut], ABC): ...
|