cognite-neat 0.87.6__py3-none-any.whl → 0.88.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_version.py +1 -1
- cognite/neat/app/api/data_classes/rest.py +0 -19
- cognite/neat/app/api/explorer.py +6 -4
- cognite/neat/app/api/routers/configuration.py +1 -1
- cognite/neat/app/api/routers/crud.py +11 -21
- cognite/neat/app/api/routers/workflows.py +24 -94
- cognite/neat/app/ui/neat-app/build/asset-manifest.json +7 -7
- cognite/neat/app/ui/neat-app/build/index.html +1 -1
- cognite/neat/app/ui/neat-app/build/static/css/{main.38a62222.css → main.72e3d92e.css} +2 -2
- cognite/neat/app/ui/neat-app/build/static/css/main.72e3d92e.css.map +1 -0
- cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js +3 -0
- cognite/neat/app/ui/neat-app/build/static/js/{main.ec7f72e2.js.LICENSE.txt → main.5a52cf09.js.LICENSE.txt} +0 -9
- cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js.map +1 -0
- cognite/neat/config.py +44 -27
- cognite/neat/exceptions.py +6 -0
- cognite/neat/graph/extractors/_classic_cdf/_assets.py +21 -73
- cognite/neat/graph/extractors/_classic_cdf/_base.py +102 -0
- cognite/neat/graph/extractors/_classic_cdf/_events.py +46 -42
- cognite/neat/graph/extractors/_classic_cdf/_files.py +41 -45
- cognite/neat/graph/extractors/_classic_cdf/_labels.py +75 -52
- cognite/neat/graph/extractors/_classic_cdf/_relationships.py +49 -27
- cognite/neat/graph/extractors/_classic_cdf/_sequences.py +47 -50
- cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +47 -49
- cognite/neat/graph/queries/_base.py +22 -29
- cognite/neat/graph/queries/_shared.py +1 -1
- cognite/neat/graph/stores/_base.py +24 -11
- cognite/neat/graph/transformers/_rdfpath.py +3 -2
- cognite/neat/issues.py +8 -0
- cognite/neat/rules/exporters/_rules2ontology.py +28 -20
- cognite/neat/rules/exporters/_validation.py +15 -21
- cognite/neat/rules/importers/_inference2rules.py +31 -35
- cognite/neat/rules/importers/_owl2rules/_owl2metadata.py +3 -7
- cognite/neat/rules/importers/_spreadsheet2rules.py +30 -27
- cognite/neat/rules/issues/dms.py +20 -0
- cognite/neat/rules/issues/importing.py +15 -0
- cognite/neat/rules/issues/ontology.py +298 -0
- cognite/neat/rules/issues/spreadsheet.py +48 -0
- cognite/neat/rules/issues/tables.py +72 -0
- cognite/neat/rules/models/_rdfpath.py +4 -4
- cognite/neat/rules/models/_types/_field.py +9 -19
- cognite/neat/rules/models/information/_rules.py +5 -4
- cognite/neat/utils/rdf_.py +17 -9
- cognite/neat/utils/regex_patterns.py +52 -0
- cognite/neat/workflows/steps/data_contracts.py +17 -43
- cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
- cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
- cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
- cognite/neat/workflows/steps_registry.py +5 -7
- {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/METADATA +2 -6
- cognite_neat-0.88.1.dist-info/RECORD +209 -0
- cognite/neat/app/api/routers/core.py +0 -91
- cognite/neat/app/api/routers/data_exploration.py +0 -336
- cognite/neat/app/api/routers/rules.py +0 -203
- cognite/neat/app/ui/neat-app/build/static/css/main.38a62222.css.map +0 -1
- cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js +0 -3
- cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js.map +0 -1
- cognite/neat/graph/stores/_oxrdflib.py +0 -247
- cognite/neat/legacy/__init__.py +0 -0
- cognite/neat/legacy/graph/__init__.py +0 -3
- cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
- cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
- cognite/neat/legacy/graph/examples/__init__.py +0 -10
- cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
- cognite/neat/legacy/graph/exceptions.py +0 -90
- cognite/neat/legacy/graph/extractors/__init__.py +0 -6
- cognite/neat/legacy/graph/extractors/_base.py +0 -14
- cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
- cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
- cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
- cognite/neat/legacy/graph/loaders/__init__.py +0 -23
- cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
- cognite/neat/legacy/graph/loaders/_base.py +0 -67
- cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
- cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
- cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
- cognite/neat/legacy/graph/loaders/core/models.py +0 -136
- cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
- cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
- cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
- cognite/neat/legacy/graph/loaders/validator.py +0 -87
- cognite/neat/legacy/graph/models.py +0 -6
- cognite/neat/legacy/graph/stores/__init__.py +0 -13
- cognite/neat/legacy/graph/stores/_base.py +0 -400
- cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
- cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
- cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
- cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
- cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
- cognite/neat/legacy/graph/transformations/__init__.py +0 -0
- cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
- cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
- cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
- cognite/neat/legacy/graph/transformations/transformer.py +0 -322
- cognite/neat/legacy/rules/__init__.py +0 -0
- cognite/neat/legacy/rules/analysis.py +0 -231
- cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
- cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
- cognite/neat/legacy/rules/examples/__init__.py +0 -18
- cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
- cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
- cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
- cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
- cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
- cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
- cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
- cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
- cognite/neat/legacy/rules/exceptions.py +0 -2972
- cognite/neat/legacy/rules/exporters/__init__.py +0 -20
- cognite/neat/legacy/rules/exporters/_base.py +0 -45
- cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
- cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
- cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
- cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
- cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
- cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
- cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
- cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
- cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
- cognite/neat/legacy/rules/exporters/_validation.py +0 -146
- cognite/neat/legacy/rules/importers/__init__.py +0 -22
- cognite/neat/legacy/rules/importers/_base.py +0 -66
- cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
- cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
- cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
- cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
- cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
- cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
- cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
- cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
- cognite/neat/legacy/rules/models/__init__.py +0 -5
- cognite/neat/legacy/rules/models/_base.py +0 -151
- cognite/neat/legacy/rules/models/raw_rules.py +0 -316
- cognite/neat/legacy/rules/models/rdfpath.py +0 -237
- cognite/neat/legacy/rules/models/rules.py +0 -1289
- cognite/neat/legacy/rules/models/tables.py +0 -9
- cognite/neat/legacy/rules/models/value_types.py +0 -118
- cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
- cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
- cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
- cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
- cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
- cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
- cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
- cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
- cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
- cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
- cognite/neat/rules/exceptions.py +0 -2972
- cognite/neat/rules/models/_types/_base.py +0 -16
- cognite/neat/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
- cognite/neat/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
- cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
- cognite/neat/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
- cognite/neat/workflows/migration/__init__.py +0 -0
- cognite/neat/workflows/migration/steps.py +0 -91
- cognite/neat/workflows/migration/wf_manifests.py +0 -33
- cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
- cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
- cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
- cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
- cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
- cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
- cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
- cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
- cognite_neat-0.87.6.dist-info/RECORD +0 -319
- {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/LICENSE +0 -0
- {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/WHEEL +0 -0
- {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,33 +1,37 @@
|
|
|
1
|
-
from collections.abc import
|
|
1
|
+
from collections.abc import Callable, Set
|
|
2
2
|
from datetime import datetime, timezone
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import cast
|
|
5
4
|
|
|
6
5
|
from cognite.client import CogniteClient
|
|
7
6
|
from cognite.client.data_classes import LabelDefinition, LabelDefinitionList
|
|
8
7
|
from rdflib import RDF, Literal, Namespace
|
|
9
8
|
|
|
10
|
-
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
11
|
-
from cognite.neat.graph.extractors._base import BaseExtractor
|
|
12
9
|
from cognite.neat.graph.models import Triple
|
|
13
10
|
from cognite.neat.utils.auxiliary import create_sha256_hash
|
|
14
11
|
|
|
12
|
+
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
|
|
15
13
|
|
|
16
|
-
|
|
14
|
+
|
|
15
|
+
class LabelsExtractor(ClassicCDFExtractor[LabelDefinition]):
|
|
17
16
|
"""Extract data from Cognite Data Fusions Labels into Neat.
|
|
18
17
|
|
|
19
18
|
Args:
|
|
20
|
-
|
|
19
|
+
items (Iterable[LabelDefinition]): An iterable of items.
|
|
21
20
|
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
21
|
+
to_type (Callable[[LabelDefinition], str | None], optional): A function to convert an item to a type.
|
|
22
|
+
Defaults to None. If None or if the function returns None, the asset will be set to the default type.
|
|
23
|
+
total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
|
|
24
|
+
is installed. Defaults to None.
|
|
25
|
+
limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
|
|
26
|
+
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
27
|
+
limit the extraction to 1000 assets to test the setup.
|
|
28
|
+
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
29
|
+
a JSON string.
|
|
30
|
+
skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
|
|
31
|
+
values in this set will be skipped.
|
|
22
32
|
"""
|
|
23
33
|
|
|
24
|
-
|
|
25
|
-
self,
|
|
26
|
-
labels: Iterable[LabelDefinition],
|
|
27
|
-
namespace: Namespace | None = None,
|
|
28
|
-
):
|
|
29
|
-
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
30
|
-
self.labels = labels
|
|
34
|
+
_default_rdf_type = "Label"
|
|
31
35
|
|
|
32
36
|
@classmethod
|
|
33
37
|
def from_dataset(
|
|
@@ -35,57 +39,76 @@ class LabelsExtractor(BaseExtractor):
|
|
|
35
39
|
client: CogniteClient,
|
|
36
40
|
data_set_external_id: str,
|
|
37
41
|
namespace: Namespace | None = None,
|
|
42
|
+
to_type: Callable[[LabelDefinition], str | None] | None = None,
|
|
43
|
+
limit: int | None = None,
|
|
44
|
+
unpack_metadata: bool = True,
|
|
45
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
38
46
|
):
|
|
39
47
|
return cls(
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
48
|
+
client.labels(data_set_external_ids=data_set_external_id),
|
|
49
|
+
namespace=namespace,
|
|
50
|
+
to_type=to_type,
|
|
51
|
+
limit=limit,
|
|
52
|
+
unpack_metadata=unpack_metadata,
|
|
53
|
+
skip_metadata_values=skip_metadata_values,
|
|
45
54
|
)
|
|
46
55
|
|
|
47
56
|
@classmethod
|
|
48
|
-
def from_file(
|
|
49
|
-
|
|
57
|
+
def from_file(
|
|
58
|
+
cls,
|
|
59
|
+
file_path: str,
|
|
60
|
+
namespace: Namespace | None = None,
|
|
61
|
+
to_type: Callable[[LabelDefinition], str | None] | None = None,
|
|
62
|
+
limit: int | None = None,
|
|
63
|
+
unpack_metadata: bool = True,
|
|
64
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
65
|
+
):
|
|
66
|
+
labels = LabelDefinitionList.load(Path(file_path).read_text())
|
|
67
|
+
return cls(
|
|
68
|
+
labels,
|
|
69
|
+
total=len(labels),
|
|
70
|
+
namespace=namespace,
|
|
71
|
+
to_type=to_type,
|
|
72
|
+
limit=limit,
|
|
73
|
+
unpack_metadata=unpack_metadata,
|
|
74
|
+
skip_metadata_values=skip_metadata_values,
|
|
75
|
+
)
|
|
50
76
|
|
|
51
|
-
def
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
yield from self._labels2triples(label)
|
|
77
|
+
def _item2triples(self, label: LabelDefinition) -> list[Triple]:
|
|
78
|
+
if not label.external_id:
|
|
79
|
+
return []
|
|
55
80
|
|
|
56
|
-
|
|
57
|
-
if label.external_id:
|
|
58
|
-
id_ = self.namespace[f"Label_{create_sha256_hash(label.external_id)}"]
|
|
81
|
+
id_ = self.namespace[f"Label_{create_sha256_hash(label.external_id)}"]
|
|
59
82
|
|
|
60
|
-
|
|
61
|
-
|
|
83
|
+
type_ = self._get_rdf_type(label)
|
|
84
|
+
# Set rdf type
|
|
85
|
+
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
62
86
|
|
|
63
|
-
|
|
64
|
-
|
|
87
|
+
# Create attributes
|
|
88
|
+
triples.append((id_, self.namespace.external_id, Literal(label.external_id)))
|
|
65
89
|
|
|
66
|
-
|
|
67
|
-
|
|
90
|
+
if label.name:
|
|
91
|
+
triples.append((id_, self.namespace.name, Literal(label.name)))
|
|
68
92
|
|
|
69
|
-
|
|
70
|
-
|
|
93
|
+
if label.description:
|
|
94
|
+
triples.append((id_, self.namespace.description, Literal(label.description)))
|
|
71
95
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
)
|
|
96
|
+
if label.created_time:
|
|
97
|
+
triples.append(
|
|
98
|
+
(
|
|
99
|
+
id_,
|
|
100
|
+
self.namespace.created_time,
|
|
101
|
+
Literal(datetime.fromtimestamp(label.created_time / 1000, timezone.utc)),
|
|
79
102
|
)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
if label.data_set_id:
|
|
106
|
+
triples.append(
|
|
107
|
+
(
|
|
108
|
+
id_,
|
|
109
|
+
self.namespace.data_set_id,
|
|
110
|
+
self.namespace[f"Dataset_{label.data_set_id}"],
|
|
88
111
|
)
|
|
112
|
+
)
|
|
89
113
|
|
|
90
|
-
|
|
91
|
-
return []
|
|
114
|
+
return triples
|
|
@@ -1,34 +1,38 @@
|
|
|
1
|
-
from collections.abc import
|
|
1
|
+
from collections.abc import Callable, Set
|
|
2
2
|
from datetime import datetime, timezone
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import cast
|
|
5
4
|
from urllib.parse import quote
|
|
6
5
|
|
|
7
6
|
from cognite.client import CogniteClient
|
|
8
7
|
from cognite.client.data_classes import Relationship, RelationshipList
|
|
9
8
|
from rdflib import RDF, Literal, Namespace
|
|
10
9
|
|
|
11
|
-
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
12
|
-
from cognite.neat.graph.extractors._base import BaseExtractor
|
|
13
10
|
from cognite.neat.graph.models import Triple
|
|
14
11
|
from cognite.neat.utils.auxiliary import create_sha256_hash
|
|
15
12
|
|
|
13
|
+
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
|
|
16
14
|
|
|
17
|
-
|
|
15
|
+
|
|
16
|
+
class RelationshipsExtractor(ClassicCDFExtractor[Relationship]):
|
|
18
17
|
"""Extract data from Cognite Data Fusions Relationships into Neat.
|
|
19
18
|
|
|
20
19
|
Args:
|
|
21
|
-
|
|
20
|
+
items (Iterable[Relationship]): An iterable of items.
|
|
22
21
|
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
22
|
+
to_type (Callable[[Relationship], str | None], optional): A function to convert an item to a type.
|
|
23
|
+
Defaults to None. If None or if the function returns None, the asset will be set to the default type.
|
|
24
|
+
total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
|
|
25
|
+
is installed. Defaults to None.
|
|
26
|
+
limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
|
|
27
|
+
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
28
|
+
limit the extraction to 1000 assets to test the setup.
|
|
29
|
+
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
30
|
+
a JSON string.
|
|
31
|
+
skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
|
|
32
|
+
values in this set will be skipped.
|
|
23
33
|
"""
|
|
24
34
|
|
|
25
|
-
|
|
26
|
-
self,
|
|
27
|
-
relationships: Iterable[Relationship],
|
|
28
|
-
namespace: Namespace | None = None,
|
|
29
|
-
):
|
|
30
|
-
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
31
|
-
self.relationships = relationships
|
|
35
|
+
_default_rdf_type = "Relationship"
|
|
32
36
|
|
|
33
37
|
@classmethod
|
|
34
38
|
def from_dataset(
|
|
@@ -36,33 +40,51 @@ class RelationshipsExtractor(BaseExtractor):
|
|
|
36
40
|
client: CogniteClient,
|
|
37
41
|
data_set_external_id: str,
|
|
38
42
|
namespace: Namespace | None = None,
|
|
43
|
+
to_type: Callable[[Relationship], str | None] | None = None,
|
|
44
|
+
limit: int | None = None,
|
|
45
|
+
unpack_metadata: bool = True,
|
|
46
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
39
47
|
):
|
|
40
48
|
return cls(
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
49
|
+
client.relationships(data_set_external_ids=data_set_external_id),
|
|
50
|
+
namespace=namespace,
|
|
51
|
+
to_type=to_type,
|
|
52
|
+
limit=limit,
|
|
53
|
+
unpack_metadata=unpack_metadata,
|
|
54
|
+
skip_metadata_values=skip_metadata_values,
|
|
46
55
|
)
|
|
47
56
|
|
|
48
57
|
@classmethod
|
|
49
|
-
def from_file(
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
58
|
+
def from_file(
|
|
59
|
+
cls,
|
|
60
|
+
file_path: str,
|
|
61
|
+
namespace: Namespace | None = None,
|
|
62
|
+
to_type: Callable[[Relationship], str | None] | None = None,
|
|
63
|
+
limit: int | None = None,
|
|
64
|
+
unpack_metadata: bool = True,
|
|
65
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
66
|
+
):
|
|
67
|
+
relationships = RelationshipList.load(Path(file_path).read_text())
|
|
68
|
+
return cls(
|
|
69
|
+
relationships,
|
|
70
|
+
namespace=namespace,
|
|
71
|
+
total=len(relationships),
|
|
72
|
+
to_type=to_type,
|
|
73
|
+
limit=limit,
|
|
74
|
+
unpack_metadata=unpack_metadata,
|
|
75
|
+
skip_metadata_values=skip_metadata_values,
|
|
76
|
+
)
|
|
56
77
|
|
|
57
|
-
def
|
|
78
|
+
def _item2triples(self, relationship: Relationship) -> list[Triple]:
|
|
58
79
|
"""Converts an asset to triples."""
|
|
59
80
|
|
|
60
81
|
if relationship.external_id and relationship.source_external_id and relationship.target_external_id:
|
|
61
82
|
# relationships do not have an internal id, so we generate one
|
|
62
83
|
id_ = self.namespace[f"Relationship_{create_sha256_hash(relationship.external_id)}"]
|
|
63
84
|
|
|
85
|
+
type_ = self._get_rdf_type(relationship)
|
|
64
86
|
# Set rdf type
|
|
65
|
-
triples: list[Triple] = [(id_, RDF.type, self.namespace[
|
|
87
|
+
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
66
88
|
|
|
67
89
|
# Set source and target types
|
|
68
90
|
if source_type := relationship.source_type:
|
|
@@ -1,39 +1,36 @@
|
|
|
1
|
-
import
|
|
2
|
-
from collections.abc import Iterable
|
|
1
|
+
from collections.abc import Callable, Set
|
|
3
2
|
from datetime import datetime, timezone
|
|
4
3
|
from pathlib import Path
|
|
5
|
-
from typing import cast
|
|
6
4
|
|
|
7
5
|
from cognite.client import CogniteClient
|
|
8
|
-
from cognite.client.data_classes import Sequence, SequenceList
|
|
9
|
-
from
|
|
10
|
-
from rdflib import RDF, Literal, Namespace, URIRef
|
|
6
|
+
from cognite.client.data_classes import Sequence, SequenceFilter, SequenceList
|
|
7
|
+
from rdflib import RDF, Literal, Namespace
|
|
11
8
|
|
|
12
|
-
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
13
|
-
from cognite.neat.graph.extractors._base import BaseExtractor
|
|
14
9
|
from cognite.neat.graph.models import Triple
|
|
15
|
-
from cognite.neat.utils.auxiliary import string_to_ideal_type
|
|
16
10
|
|
|
11
|
+
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
|
|
17
12
|
|
|
18
|
-
|
|
13
|
+
|
|
14
|
+
class SequencesExtractor(ClassicCDFExtractor[Sequence]):
|
|
19
15
|
"""Extract data from Cognite Data Fusions Sequences into Neat.
|
|
20
16
|
|
|
21
17
|
Args:
|
|
22
|
-
|
|
18
|
+
items (Iterable[Sequence]): An iterable of items.
|
|
23
19
|
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
20
|
+
to_type (Callable[[Sequence], str | None], optional): A function to convert an item to a type.
|
|
21
|
+
Defaults to None. If None or if the function returns None, the asset will be set to the default type.
|
|
22
|
+
total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
|
|
23
|
+
is installed. Defaults to None.
|
|
24
|
+
limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
|
|
25
|
+
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
26
|
+
limit the extraction to 1000 assets to test the setup.
|
|
24
27
|
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
25
28
|
a JSON string.
|
|
29
|
+
skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
|
|
30
|
+
values in this set will be skipped.
|
|
26
31
|
"""
|
|
27
32
|
|
|
28
|
-
|
|
29
|
-
self,
|
|
30
|
-
sequence: Iterable[Sequence],
|
|
31
|
-
namespace: Namespace | None = None,
|
|
32
|
-
unpack_metadata: bool = True,
|
|
33
|
-
):
|
|
34
|
-
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
35
|
-
self.sequence = sequence
|
|
36
|
-
self.unpack_metadata = unpack_metadata
|
|
33
|
+
_default_rdf_type = "Sequence"
|
|
37
34
|
|
|
38
35
|
@classmethod
|
|
39
36
|
def from_dataset(
|
|
@@ -41,15 +38,22 @@ class SequencesExtractor(BaseExtractor):
|
|
|
41
38
|
client: CogniteClient,
|
|
42
39
|
data_set_external_id: str,
|
|
43
40
|
namespace: Namespace | None = None,
|
|
41
|
+
to_type: Callable[[Sequence], str | None] | None = None,
|
|
42
|
+
limit: int | None = None,
|
|
44
43
|
unpack_metadata: bool = True,
|
|
44
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
45
45
|
):
|
|
46
|
+
total = client.sequences.aggregate_count(
|
|
47
|
+
filter=SequenceFilter(data_set_ids=[{"externalId": data_set_external_id}])
|
|
48
|
+
)
|
|
46
49
|
return cls(
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
unpack_metadata,
|
|
50
|
+
client.sequences(data_set_external_ids=data_set_external_id),
|
|
51
|
+
total=total,
|
|
52
|
+
namespace=namespace,
|
|
53
|
+
to_type=to_type,
|
|
54
|
+
limit=limit,
|
|
55
|
+
unpack_metadata=unpack_metadata,
|
|
56
|
+
skip_metadata_values=skip_metadata_values,
|
|
53
57
|
)
|
|
54
58
|
|
|
55
59
|
@classmethod
|
|
@@ -57,20 +61,28 @@ class SequencesExtractor(BaseExtractor):
|
|
|
57
61
|
cls,
|
|
58
62
|
file_path: str,
|
|
59
63
|
namespace: Namespace | None = None,
|
|
64
|
+
to_type: Callable[[Sequence], str | None] | None = None,
|
|
65
|
+
limit: int | None = None,
|
|
60
66
|
unpack_metadata: bool = True,
|
|
67
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
61
68
|
):
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
69
|
+
sequences = SequenceList.load(Path(file_path).read_text())
|
|
70
|
+
return cls(
|
|
71
|
+
sequences,
|
|
72
|
+
total=len(sequences),
|
|
73
|
+
namespace=namespace,
|
|
74
|
+
to_type=to_type,
|
|
75
|
+
limit=limit,
|
|
76
|
+
unpack_metadata=unpack_metadata,
|
|
77
|
+
skip_metadata_values=skip_metadata_values,
|
|
78
|
+
)
|
|
68
79
|
|
|
69
|
-
def
|
|
80
|
+
def _item2triples(self, sequence: Sequence) -> list[Triple]:
|
|
70
81
|
id_ = self.namespace[f"Sequence_{sequence.id}"]
|
|
71
82
|
|
|
83
|
+
type_ = self._get_rdf_type(sequence)
|
|
72
84
|
# Set rdf type
|
|
73
|
-
triples: list[Triple] = [(id_, RDF.type, self.namespace
|
|
85
|
+
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
74
86
|
|
|
75
87
|
# Create attributes
|
|
76
88
|
|
|
@@ -81,22 +93,7 @@ class SequencesExtractor(BaseExtractor):
|
|
|
81
93
|
triples.append((id_, self.namespace.name, Literal(sequence.name)))
|
|
82
94
|
|
|
83
95
|
if sequence.metadata:
|
|
84
|
-
|
|
85
|
-
for key, value in sequence.metadata.items():
|
|
86
|
-
if value:
|
|
87
|
-
type_aware_value = string_to_ideal_type(value)
|
|
88
|
-
try:
|
|
89
|
-
triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
|
|
90
|
-
except ValidationError:
|
|
91
|
-
triples.append((id_, self.namespace[key], Literal(type_aware_value)))
|
|
92
|
-
else:
|
|
93
|
-
triples.append(
|
|
94
|
-
(
|
|
95
|
-
id_,
|
|
96
|
-
self.namespace.metadata,
|
|
97
|
-
Literal(json.dumps(sequence.metadata)),
|
|
98
|
-
)
|
|
99
|
-
)
|
|
96
|
+
triples.extend(self._metadata_to_triples(id_, sequence.metadata))
|
|
100
97
|
|
|
101
98
|
if sequence.description:
|
|
102
99
|
triples.append((id_, self.namespace.description, Literal(sequence.description)))
|
|
@@ -1,39 +1,37 @@
|
|
|
1
|
-
import
|
|
2
|
-
from collections.abc import Iterable
|
|
1
|
+
from collections.abc import Callable, Set
|
|
3
2
|
from datetime import datetime, timezone
|
|
4
3
|
from pathlib import Path
|
|
5
|
-
from typing import cast
|
|
6
4
|
|
|
7
5
|
from cognite.client import CogniteClient
|
|
8
|
-
from cognite.client.data_classes import TimeSeries, TimeSeriesList
|
|
6
|
+
from cognite.client.data_classes import TimeSeries, TimeSeriesFilter, TimeSeriesList
|
|
9
7
|
from pydantic import AnyHttpUrl, ValidationError
|
|
10
8
|
from rdflib import RDF, Literal, Namespace, URIRef
|
|
11
9
|
|
|
12
|
-
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
13
|
-
from cognite.neat.graph.extractors._base import BaseExtractor
|
|
14
10
|
from cognite.neat.graph.models import Triple
|
|
15
|
-
from cognite.neat.utils.auxiliary import string_to_ideal_type
|
|
16
11
|
|
|
12
|
+
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
|
|
17
13
|
|
|
18
|
-
|
|
14
|
+
|
|
15
|
+
class TimeSeriesExtractor(ClassicCDFExtractor[TimeSeries]):
|
|
19
16
|
"""Extract data from Cognite Data Fusions TimeSeries into Neat.
|
|
20
17
|
|
|
21
18
|
Args:
|
|
22
|
-
|
|
19
|
+
items (Iterable[TimeSeries]): An iterable of items.
|
|
23
20
|
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
21
|
+
to_type (Callable[[TimeSeries], str | None], optional): A function to convert an item to a type.
|
|
22
|
+
Defaults to None. If None or if the function returns None, the asset will be set to the default type.
|
|
23
|
+
total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
|
|
24
|
+
is installed. Defaults to None.
|
|
25
|
+
limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
|
|
26
|
+
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
27
|
+
limit the extraction to 1000 assets to test the setup.
|
|
24
28
|
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
25
29
|
a JSON string.
|
|
30
|
+
skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
|
|
31
|
+
values in this set will be skipped.
|
|
26
32
|
"""
|
|
27
33
|
|
|
28
|
-
|
|
29
|
-
self,
|
|
30
|
-
timeseries: Iterable[TimeSeries],
|
|
31
|
-
namespace: Namespace | None = None,
|
|
32
|
-
unpack_metadata: bool = True,
|
|
33
|
-
):
|
|
34
|
-
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
35
|
-
self.timeseries = timeseries
|
|
36
|
-
self.unpack_metadata = unpack_metadata
|
|
34
|
+
_default_rdf_type = "TimeSeries"
|
|
37
35
|
|
|
38
36
|
@classmethod
|
|
39
37
|
def from_dataset(
|
|
@@ -41,15 +39,23 @@ class TimeSeriesExtractor(BaseExtractor):
|
|
|
41
39
|
client: CogniteClient,
|
|
42
40
|
data_set_external_id: str,
|
|
43
41
|
namespace: Namespace | None = None,
|
|
42
|
+
to_type: Callable[[TimeSeries], str | None] | None = None,
|
|
43
|
+
limit: int | None = None,
|
|
44
44
|
unpack_metadata: bool = True,
|
|
45
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
45
46
|
):
|
|
47
|
+
total = client.time_series.aggregate_count(
|
|
48
|
+
filter=TimeSeriesFilter(data_set_ids=[{"externalId": data_set_external_id}])
|
|
49
|
+
)
|
|
50
|
+
|
|
46
51
|
return cls(
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
unpack_metadata,
|
|
52
|
+
client.time_series(data_set_external_ids=data_set_external_id),
|
|
53
|
+
total=total,
|
|
54
|
+
namespace=namespace,
|
|
55
|
+
to_type=to_type,
|
|
56
|
+
limit=limit,
|
|
57
|
+
unpack_metadata=unpack_metadata,
|
|
58
|
+
skip_metadata_values=skip_metadata_values,
|
|
53
59
|
)
|
|
54
60
|
|
|
55
61
|
@classmethod
|
|
@@ -57,23 +63,30 @@ class TimeSeriesExtractor(BaseExtractor):
|
|
|
57
63
|
cls,
|
|
58
64
|
file_path: str,
|
|
59
65
|
namespace: Namespace | None = None,
|
|
66
|
+
to_type: Callable[[TimeSeries], str | None] | None = None,
|
|
67
|
+
limit: int | None = None,
|
|
60
68
|
unpack_metadata: bool = True,
|
|
69
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
61
70
|
):
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
71
|
+
timeseries = TimeSeriesList.load(Path(file_path).read_text())
|
|
72
|
+
return cls(
|
|
73
|
+
timeseries,
|
|
74
|
+
total=len(timeseries),
|
|
75
|
+
namespace=namespace,
|
|
76
|
+
to_type=to_type,
|
|
77
|
+
limit=limit,
|
|
78
|
+
unpack_metadata=unpack_metadata,
|
|
79
|
+
skip_metadata_values=skip_metadata_values,
|
|
80
|
+
)
|
|
68
81
|
|
|
69
|
-
def
|
|
82
|
+
def _item2triples(self, timeseries: TimeSeries) -> list[Triple]:
|
|
70
83
|
id_ = self.namespace[f"TimeSeries_{timeseries.id}"]
|
|
71
84
|
|
|
72
85
|
# Set rdf type
|
|
73
|
-
|
|
86
|
+
type_ = self._get_rdf_type(timeseries)
|
|
87
|
+
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
74
88
|
|
|
75
89
|
# Create attributes
|
|
76
|
-
|
|
77
90
|
if timeseries.external_id:
|
|
78
91
|
triples.append((id_, self.namespace.external_id, Literal(timeseries.external_id)))
|
|
79
92
|
|
|
@@ -84,22 +97,7 @@ class TimeSeriesExtractor(BaseExtractor):
|
|
|
84
97
|
triples.append((id_, self.namespace.is_string, Literal(timeseries.is_string)))
|
|
85
98
|
|
|
86
99
|
if timeseries.metadata:
|
|
87
|
-
|
|
88
|
-
for key, value in timeseries.metadata.items():
|
|
89
|
-
if value:
|
|
90
|
-
type_aware_value = string_to_ideal_type(value)
|
|
91
|
-
try:
|
|
92
|
-
triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
|
|
93
|
-
except ValidationError:
|
|
94
|
-
triples.append((id_, self.namespace[key], Literal(type_aware_value)))
|
|
95
|
-
else:
|
|
96
|
-
triples.append(
|
|
97
|
-
(
|
|
98
|
-
id_,
|
|
99
|
-
self.namespace.metadata,
|
|
100
|
-
Literal(json.dumps(timeseries.metadata)),
|
|
101
|
-
)
|
|
102
|
-
)
|
|
100
|
+
triples.extend(self._metadata_to_triples(id_, timeseries.metadata))
|
|
103
101
|
|
|
104
102
|
if timeseries.unit:
|
|
105
103
|
triples.append((id_, self.namespace.unit, Literal(timeseries.unit)))
|
|
@@ -98,47 +98,40 @@ class Queries:
|
|
|
98
98
|
self,
|
|
99
99
|
instance_id: URIRef,
|
|
100
100
|
property_renaming_config: dict | None = None,
|
|
101
|
-
) -> tuple[str, dict[str, list[str]]]:
|
|
101
|
+
) -> tuple[str, dict[str, list[str]]] | None:
|
|
102
102
|
"""DESCRIBE instance for a given class from the graph store
|
|
103
103
|
|
|
104
104
|
Args:
|
|
105
105
|
instance_id: Instance id for which we want to generate query
|
|
106
|
-
|
|
106
|
+
property_renaming_config: Dictionary to rename properties, default None
|
|
107
107
|
|
|
108
108
|
Returns:
|
|
109
109
|
Dictionary of instance properties
|
|
110
110
|
"""
|
|
111
|
-
|
|
112
111
|
property_values: dict[str, list[str]] = defaultdict(list)
|
|
113
|
-
|
|
114
|
-
for
|
|
115
|
-
if object_.lower()
|
|
112
|
+
identifier = remove_namespace_from_uri(instance_id, validation="prefix")
|
|
113
|
+
for _, predicate, object_ in cast(list[ResultRow], self.graph.query(f"DESCRIBE <{instance_id}>")):
|
|
114
|
+
if object_.lower() in [
|
|
116
115
|
"",
|
|
117
116
|
"none",
|
|
118
117
|
"nan",
|
|
119
118
|
"null",
|
|
120
119
|
]:
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
elif property_renaming_config and (property_ := property_renaming_config.get(predicate, None)):
|
|
137
|
-
property_values[property_].append(value)
|
|
138
|
-
|
|
139
|
-
# use-case: skip the property if it is not defined in property_renaming_config
|
|
140
|
-
else:
|
|
141
|
-
continue
|
|
120
|
+
continue
|
|
121
|
+
# we are skipping deep validation with Pydantic to remove namespace here
|
|
122
|
+
# as it reduces time to process triples by 10-15x
|
|
123
|
+
value = remove_namespace_from_uri(object_, validation="prefix")
|
|
124
|
+
|
|
125
|
+
# use-case: calling describe without renaming properties
|
|
126
|
+
# losing the namespace from the predicate!
|
|
127
|
+
if not property_renaming_config and predicate != RDF.type:
|
|
128
|
+
property_values[remove_namespace_from_uri(predicate, validation="prefix")].append(value)
|
|
129
|
+
|
|
130
|
+
# use-case: calling describe with renaming properties
|
|
131
|
+
# renaming the property to the new name, if the property is defined
|
|
132
|
+
# in the RULES sheet
|
|
133
|
+
elif property_renaming_config and (property_ := property_renaming_config.get(predicate, None)):
|
|
134
|
+
property_values[property_].append(value)
|
|
142
135
|
|
|
143
136
|
if property_values:
|
|
144
137
|
return (
|
|
@@ -146,7 +139,7 @@ class Queries:
|
|
|
146
139
|
property_values,
|
|
147
140
|
)
|
|
148
141
|
else:
|
|
149
|
-
return
|
|
142
|
+
return None
|
|
150
143
|
|
|
151
144
|
def construct_instances_of_class(
|
|
152
145
|
self,
|
|
@@ -177,7 +170,7 @@ class Queries:
|
|
|
177
170
|
result = self.graph.query(query)
|
|
178
171
|
|
|
179
172
|
# We cannot include the RDF.type in case there is a neat:type property
|
|
180
|
-
return [remove_namespace_from_uri(
|
|
173
|
+
return [remove_namespace_from_uri(cast(ResultRow, triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index]
|
|
181
174
|
else:
|
|
182
175
|
warnings.warn(
|
|
183
176
|
"No rules found for the graph store, returning empty list.",
|