cognite-neat 0.87.4__py3-none-any.whl → 0.88.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_version.py +1 -1
- cognite/neat/app/api/data_classes/rest.py +0 -19
- cognite/neat/app/api/explorer.py +6 -4
- cognite/neat/app/api/routers/crud.py +11 -21
- cognite/neat/app/api/routers/workflows.py +24 -94
- cognite/neat/graph/extractors/_classic_cdf/_assets.py +8 -2
- cognite/neat/graph/extractors/_mock_graph_generator.py +2 -2
- cognite/neat/graph/loaders/_base.py +17 -12
- cognite/neat/graph/loaders/_rdf2asset.py +223 -58
- cognite/neat/graph/loaders/_rdf2dms.py +1 -1
- cognite/neat/graph/stores/_base.py +5 -0
- cognite/neat/rules/analysis/_asset.py +31 -1
- cognite/neat/rules/importers/_inference2rules.py +31 -35
- cognite/neat/rules/models/information/_rules.py +1 -1
- cognite/neat/workflows/steps/data_contracts.py +17 -43
- cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
- cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
- cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
- cognite/neat/workflows/steps_registry.py +5 -7
- {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/RECORD +24 -132
- cognite/neat/app/api/routers/core.py +0 -91
- cognite/neat/app/api/routers/data_exploration.py +0 -336
- cognite/neat/app/api/routers/rules.py +0 -203
- cognite/neat/legacy/__init__.py +0 -0
- cognite/neat/legacy/graph/__init__.py +0 -3
- cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
- cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
- cognite/neat/legacy/graph/examples/__init__.py +0 -10
- cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
- cognite/neat/legacy/graph/exceptions.py +0 -90
- cognite/neat/legacy/graph/extractors/__init__.py +0 -6
- cognite/neat/legacy/graph/extractors/_base.py +0 -14
- cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
- cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
- cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
- cognite/neat/legacy/graph/loaders/__init__.py +0 -23
- cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
- cognite/neat/legacy/graph/loaders/_base.py +0 -67
- cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
- cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
- cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
- cognite/neat/legacy/graph/loaders/core/models.py +0 -136
- cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
- cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
- cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
- cognite/neat/legacy/graph/loaders/validator.py +0 -87
- cognite/neat/legacy/graph/models.py +0 -6
- cognite/neat/legacy/graph/stores/__init__.py +0 -13
- cognite/neat/legacy/graph/stores/_base.py +0 -400
- cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
- cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
- cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
- cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
- cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
- cognite/neat/legacy/graph/transformations/__init__.py +0 -0
- cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
- cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
- cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
- cognite/neat/legacy/graph/transformations/transformer.py +0 -322
- cognite/neat/legacy/rules/__init__.py +0 -0
- cognite/neat/legacy/rules/analysis.py +0 -231
- cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
- cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
- cognite/neat/legacy/rules/examples/__init__.py +0 -18
- cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
- cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
- cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
- cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
- cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
- cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
- cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
- cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
- cognite/neat/legacy/rules/exceptions.py +0 -2972
- cognite/neat/legacy/rules/exporters/__init__.py +0 -20
- cognite/neat/legacy/rules/exporters/_base.py +0 -45
- cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
- cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
- cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
- cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
- cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
- cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
- cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
- cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
- cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
- cognite/neat/legacy/rules/exporters/_validation.py +0 -146
- cognite/neat/legacy/rules/importers/__init__.py +0 -22
- cognite/neat/legacy/rules/importers/_base.py +0 -66
- cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
- cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
- cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
- cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
- cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
- cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
- cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
- cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
- cognite/neat/legacy/rules/models/__init__.py +0 -5
- cognite/neat/legacy/rules/models/_base.py +0 -151
- cognite/neat/legacy/rules/models/raw_rules.py +0 -316
- cognite/neat/legacy/rules/models/rdfpath.py +0 -237
- cognite/neat/legacy/rules/models/rules.py +0 -1289
- cognite/neat/legacy/rules/models/tables.py +0 -9
- cognite/neat/legacy/rules/models/value_types.py +0 -118
- cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
- cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
- cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
- cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
- cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
- cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
- cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
- cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
- cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
- cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
- cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
- cognite/neat/workflows/migration/__init__.py +0 -0
- cognite/neat/workflows/migration/steps.py +0 -91
- cognite/neat/workflows/migration/wf_manifests.py +0 -33
- cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
- cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
- cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
- cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
- cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
- cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
- cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
- cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
- {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,322 +0,0 @@
|
|
|
1
|
-
"""Methods to transform Domain Knowledge Graph to App Knowledge Graph"""
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
import time
|
|
5
|
-
import traceback
|
|
6
|
-
from typing import Any
|
|
7
|
-
|
|
8
|
-
import pandas as pd
|
|
9
|
-
from cognite.client import CogniteClient
|
|
10
|
-
from prometheus_client import Gauge
|
|
11
|
-
from pydantic import BaseModel
|
|
12
|
-
from rdflib import RDF, Graph
|
|
13
|
-
from rdflib.term import Literal, Node
|
|
14
|
-
|
|
15
|
-
from cognite.neat.legacy.graph.exceptions import NamespaceRequired
|
|
16
|
-
from cognite.neat.legacy.graph.transformations.query_generator.sparql import build_sparql_query
|
|
17
|
-
from cognite.neat.legacy.rules.models._base import EntityTypes
|
|
18
|
-
from cognite.neat.legacy.rules.models.rdfpath import (
|
|
19
|
-
AllProperties,
|
|
20
|
-
AllReferences,
|
|
21
|
-
Query,
|
|
22
|
-
RawLookup,
|
|
23
|
-
Traversal,
|
|
24
|
-
parse_rule,
|
|
25
|
-
)
|
|
26
|
-
from cognite.neat.legacy.rules.models.rules import Rules
|
|
27
|
-
from cognite.neat.utils.rdf_ import remove_namespace_from_uri
|
|
28
|
-
|
|
29
|
-
prom_total_proc_rules_g = Gauge("neat_total_processed_rules", "Number of processed rules", ["state"])
|
|
30
|
-
rules_processing_timing_metric = Gauge(
|
|
31
|
-
"neat_rules_processing_timing", "Transformation rules processing timing metrics", ["aggregation"]
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
COMMIT_BATCH_SIZE = 10000
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class RuleProcessingReportRec(BaseModel):
|
|
39
|
-
"""Report record for rule processing"""
|
|
40
|
-
|
|
41
|
-
row_id: str | None = None
|
|
42
|
-
rule_name: str | None = None
|
|
43
|
-
rule_type: str | None = None
|
|
44
|
-
rule_expression: Any | None = None
|
|
45
|
-
status: str | None = None
|
|
46
|
-
error_message: str | None = None
|
|
47
|
-
elapsed_time: float = 0
|
|
48
|
-
rows_in_response: int = 0
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
class RuleProcessingReport(BaseModel):
|
|
52
|
-
"""Report for rule processing"""
|
|
53
|
-
|
|
54
|
-
total_rules: int = 0
|
|
55
|
-
total_success: int = 0
|
|
56
|
-
total_success_no_results: int = 0
|
|
57
|
-
total_failed: int = 0
|
|
58
|
-
records: list[RuleProcessingReportRec] = []
|
|
59
|
-
elapsed_time: float = 0
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def source2solution_graph(
|
|
63
|
-
source_knowledge_graph: Graph,
|
|
64
|
-
transformation_rules: Rules,
|
|
65
|
-
solution_knowledge_graph: Graph | None = None,
|
|
66
|
-
client: CogniteClient | None = None,
|
|
67
|
-
cdf_lookup_database: str | None = None,
|
|
68
|
-
extra_triples: list[tuple[Node, Node, Node]] | None = None,
|
|
69
|
-
stop_on_exception: bool = False,
|
|
70
|
-
missing_raw_lookup_value: str = "NaN",
|
|
71
|
-
processing_report: RuleProcessingReport | None = None,
|
|
72
|
-
) -> Graph:
|
|
73
|
-
"""Transforms solution knowledge graph based on Domain Knowledge Graph
|
|
74
|
-
|
|
75
|
-
Args:
|
|
76
|
-
source_knowledge_graph: Domain Knowledge Graph which represents the source graph being
|
|
77
|
-
transformed to app/solution specific graph
|
|
78
|
-
transformation_rules: Transformation rules holding data model definition and rules to
|
|
79
|
-
transform source/domain graph to app/solution specific graph
|
|
80
|
-
solution_knowledge_graph: Graph to store app/solution specific graph.
|
|
81
|
-
Defaults to None (i.e., empty graph).
|
|
82
|
-
client: CogniteClient. Defaults to None.
|
|
83
|
-
cdf_lookup_database: CDF RAW database name to use for `rawlookup` rules. Defaults to None.
|
|
84
|
-
extra_triples: Additional triples to add to app/solution knowledge graph. Defaults to None.
|
|
85
|
-
stop_on_exception: To stop on exception. Defaults to False.
|
|
86
|
-
missing_raw_lookup_value: If no value is find for `rawlookup` default value to use. Defaults to "NaN".
|
|
87
|
-
processing_report: Processing report to store results to. Defaults to None.
|
|
88
|
-
|
|
89
|
-
Returns:
|
|
90
|
-
Transformed knowledge graph based on transformation rules
|
|
91
|
-
"""
|
|
92
|
-
|
|
93
|
-
# TODO: This is to be improved and slowly sunset domain2app_knowledge_graph
|
|
94
|
-
|
|
95
|
-
return domain2app_knowledge_graph(
|
|
96
|
-
domain_knowledge_graph=source_knowledge_graph,
|
|
97
|
-
transformation_rules=transformation_rules,
|
|
98
|
-
app_instance_graph=solution_knowledge_graph,
|
|
99
|
-
client=client,
|
|
100
|
-
cdf_lookup_database=cdf_lookup_database,
|
|
101
|
-
extra_triples=extra_triples,
|
|
102
|
-
stop_on_exception=stop_on_exception,
|
|
103
|
-
missing_raw_lookup_value=missing_raw_lookup_value,
|
|
104
|
-
processing_report=processing_report,
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def domain2app_knowledge_graph(
|
|
109
|
-
domain_knowledge_graph: Graph,
|
|
110
|
-
transformation_rules: Rules,
|
|
111
|
-
app_instance_graph: Graph | None = None,
|
|
112
|
-
client: CogniteClient | None = None,
|
|
113
|
-
cdf_lookup_database: str | None = None,
|
|
114
|
-
extra_triples: list[tuple[Node, Node, Node]] | None = None,
|
|
115
|
-
stop_on_exception: bool = False,
|
|
116
|
-
missing_raw_lookup_value: str = "NaN",
|
|
117
|
-
processing_report: RuleProcessingReport | None = None,
|
|
118
|
-
) -> Graph:
|
|
119
|
-
"""Generates application/solution specific knowledge graph based on Domain Knowledge Graph
|
|
120
|
-
|
|
121
|
-
Args:
|
|
122
|
-
domain_knowledge_graph: Domain Knowledge Graph which represent the source graph being
|
|
123
|
-
transformed to app/solution specific graph
|
|
124
|
-
transformation_rules: Transformation rules holding data model definition and rules
|
|
125
|
-
to transform source/domain graph to app/solution specific graph
|
|
126
|
-
app_instance_graph: Graph to store app/solution specific graph. Defaults to None (i.e., empty graph).
|
|
127
|
-
client: CogniteClient. Defaults to None.
|
|
128
|
-
cdf_lookup_database: CDF RAW database name to use for `rawlookup` rules. Defaults to None.
|
|
129
|
-
extra_triples: Additional triples to add to app/solution knowledge graph. Defaults to None.
|
|
130
|
-
stop_on_exception: To stop on exception. Defaults to False.
|
|
131
|
-
missing_raw_lookup_value: If no value is find for `rawlookup` default value to use. Defaults to "NaN".
|
|
132
|
-
processing_report: Processing report to store results to. Defaults to None.
|
|
133
|
-
|
|
134
|
-
Returns:
|
|
135
|
-
Transformed knowledge graph based on transformation rules
|
|
136
|
-
"""
|
|
137
|
-
if transformation_rules.metadata.namespace is None:
|
|
138
|
-
raise NamespaceRequired("Transform domain to app knowledge graph")
|
|
139
|
-
rule_namespace = transformation_rules.metadata.namespace
|
|
140
|
-
|
|
141
|
-
if app_instance_graph is None:
|
|
142
|
-
app_instance_graph = Graph()
|
|
143
|
-
# Bind App namespace and prefix
|
|
144
|
-
app_instance_graph.bind(transformation_rules.metadata.prefix, rule_namespace)
|
|
145
|
-
# Bind other prefixes and namespaces
|
|
146
|
-
for prefix, namespace in transformation_rules.prefixes.items():
|
|
147
|
-
app_instance_graph.bind(prefix, namespace)
|
|
148
|
-
|
|
149
|
-
tables_by_name = {}
|
|
150
|
-
if cdf_lookup_database and client:
|
|
151
|
-
for table_name in transformation_rules.raw_tables:
|
|
152
|
-
logging.debug(f"Loading {table_name} table from database {cdf_lookup_database}")
|
|
153
|
-
table = client.raw.rows.retrieve_dataframe(cdf_lookup_database, table_name, limit=-1)
|
|
154
|
-
tables_by_name[table_name] = table
|
|
155
|
-
|
|
156
|
-
# Add references with their type first
|
|
157
|
-
types = []
|
|
158
|
-
success = 0
|
|
159
|
-
success_no_results = 0
|
|
160
|
-
failed = 0
|
|
161
|
-
commit_counter = 0
|
|
162
|
-
timing_traces = []
|
|
163
|
-
prom_total_proc_rules_g.labels(state="success_no_results").set(0)
|
|
164
|
-
prom_total_proc_rules_g.labels(state="success").set(0)
|
|
165
|
-
prom_total_proc_rules_g.labels(state="failed").set(0)
|
|
166
|
-
|
|
167
|
-
def check_commit(force_commit: bool = False):
|
|
168
|
-
"""'Commit nodes to the graph if batch counter is reached or if force_commit is True"""
|
|
169
|
-
|
|
170
|
-
if force_commit:
|
|
171
|
-
logging.debug("Committing nodes")
|
|
172
|
-
app_instance_graph.commit()
|
|
173
|
-
logging.debug("Nodes committed")
|
|
174
|
-
return
|
|
175
|
-
nonlocal commit_counter
|
|
176
|
-
commit_counter += 1
|
|
177
|
-
if commit_counter >= COMMIT_BATCH_SIZE:
|
|
178
|
-
logging.info(f"Committing {COMMIT_BATCH_SIZE} nodes")
|
|
179
|
-
app_instance_graph.commit()
|
|
180
|
-
logging.info(f" {COMMIT_BATCH_SIZE} nodes committed")
|
|
181
|
-
commit_counter = 0
|
|
182
|
-
|
|
183
|
-
proc_start_time = time.perf_counter()
|
|
184
|
-
for sheet_row, rule_definition in transformation_rules.properties.items():
|
|
185
|
-
if not rule_definition.rule or rule_definition.skip_rule:
|
|
186
|
-
continue
|
|
187
|
-
msg = f"Processing {sheet_row}: class <{rule_definition.class_id}> "
|
|
188
|
-
msg += f"property <{rule_definition.property_name}> rule <{rule_definition.rule}>"
|
|
189
|
-
|
|
190
|
-
processing_report_rec = RuleProcessingReportRec(
|
|
191
|
-
row_id=sheet_row,
|
|
192
|
-
rule_name=f"{rule_definition.class_id}_{rule_definition.property_name}",
|
|
193
|
-
rule_type=rule_definition.rule_type,
|
|
194
|
-
rule_expression=rule_definition.rule,
|
|
195
|
-
)
|
|
196
|
-
logging.info(msg)
|
|
197
|
-
try:
|
|
198
|
-
start_time = time.perf_counter()
|
|
199
|
-
# Parse rule:
|
|
200
|
-
rule = parse_rule(rule_definition.rule, rule_definition.rule_type) # type: ignore[arg-type]
|
|
201
|
-
|
|
202
|
-
# Build SPARQL if needed:
|
|
203
|
-
if isinstance(rule.traversal, Query) and rule_definition.rule_type == "sparql":
|
|
204
|
-
query = rule.traversal.query
|
|
205
|
-
elif isinstance(rule.traversal, Traversal | str):
|
|
206
|
-
query = build_sparql_query(domain_knowledge_graph, rule.traversal, transformation_rules.prefixes)
|
|
207
|
-
else:
|
|
208
|
-
raise ValueError(f"Unknown traversal type {type(rule.traversal)}")
|
|
209
|
-
logging.debug(f"Query: {query}")
|
|
210
|
-
|
|
211
|
-
if query_results := list(domain_knowledge_graph.query(query)):
|
|
212
|
-
# Generate URI for class and property in target namespace
|
|
213
|
-
class_URI = rule_namespace[rule_definition.class_id]
|
|
214
|
-
property_URI = rule_namespace[rule_definition.property_name] # type: ignore[index]
|
|
215
|
-
|
|
216
|
-
# Turn query results into dataframe
|
|
217
|
-
instance_df = pd.DataFrame(
|
|
218
|
-
query_results, columns=[EntityTypes.subject, EntityTypes.predicate, EntityTypes.object]
|
|
219
|
-
)
|
|
220
|
-
|
|
221
|
-
# If we are not grabbing all properties for class instances
|
|
222
|
-
# then we are able to replace source property URI with target property URI
|
|
223
|
-
# otherwise we should keep source property URI
|
|
224
|
-
if not isinstance(rule.traversal, AllProperties):
|
|
225
|
-
instance_df[EntityTypes.predicate] = property_URI
|
|
226
|
-
|
|
227
|
-
# If we are storing object from the source graph as literal value(propety type being Datatype Property)
|
|
228
|
-
# in the target graph then we should remove namespace from the object URI and store it as literal
|
|
229
|
-
if isinstance(rule.traversal, AllReferences) and rule_definition.property_type == "DatatypeProperty":
|
|
230
|
-
instance_df[EntityTypes.object] = instance_df[EntityTypes.object].apply(
|
|
231
|
-
lambda x: Literal(remove_namespace_from_uri(x))
|
|
232
|
-
)
|
|
233
|
-
|
|
234
|
-
if isinstance(rule, RawLookup):
|
|
235
|
-
lookup_map = tables_by_name[rule.table.name].set_index(rule.table.key)[rule.table.value].to_dict()
|
|
236
|
-
|
|
237
|
-
def lookup(
|
|
238
|
-
literal: Literal, lookup_table=lookup_map, missing_raw_lookup_value=missing_raw_lookup_value
|
|
239
|
-
):
|
|
240
|
-
if new_value := lookup_table.get(literal.value):
|
|
241
|
-
return Literal(new_value, literal.language, literal.datatype, bool(literal.normalize))
|
|
242
|
-
elif missing_raw_lookup_value:
|
|
243
|
-
return Literal(
|
|
244
|
-
missing_raw_lookup_value, literal.language, literal.datatype, bool(literal.normalize)
|
|
245
|
-
)
|
|
246
|
-
else:
|
|
247
|
-
return literal
|
|
248
|
-
|
|
249
|
-
instance_df[EntityTypes.object] = instance_df[EntityTypes.object].apply(lookup)
|
|
250
|
-
|
|
251
|
-
# Add instances
|
|
252
|
-
for _, triple in instance_df.iterrows():
|
|
253
|
-
app_instance_graph.add(triple.values) # type: ignore[arg-type]
|
|
254
|
-
check_commit()
|
|
255
|
-
# Setting instances type and merging them with df containing instance - type relations
|
|
256
|
-
instance_df[EntityTypes.predicate] = RDF.type
|
|
257
|
-
instance_df[EntityTypes.object] = class_URI
|
|
258
|
-
types.append(instance_df)
|
|
259
|
-
success += 1
|
|
260
|
-
prom_total_proc_rules_g.labels(state="success").inc()
|
|
261
|
-
elapsed_time = time.perf_counter() - start_time
|
|
262
|
-
timing_traces.append(elapsed_time)
|
|
263
|
-
processing_report_rec.elapsed_time = elapsed_time
|
|
264
|
-
processing_report_rec.status = "success"
|
|
265
|
-
processing_report_rec.rows_in_response = len(instance_df)
|
|
266
|
-
else:
|
|
267
|
-
success_no_results += 1
|
|
268
|
-
prom_total_proc_rules_g.labels(state="success_no_results").inc()
|
|
269
|
-
elapsed_time = time.perf_counter() - start_time
|
|
270
|
-
timing_traces.append(elapsed_time)
|
|
271
|
-
processing_report_rec.elapsed_time = elapsed_time
|
|
272
|
-
processing_report_rec.status = "success_no_results"
|
|
273
|
-
|
|
274
|
-
except Exception as e:
|
|
275
|
-
failed += 1
|
|
276
|
-
elapsed_time = time.perf_counter() - start_time
|
|
277
|
-
processing_report_rec.elapsed_time = elapsed_time
|
|
278
|
-
processing_report_rec.status = "failed"
|
|
279
|
-
processing_report_rec.error_message = str(e)
|
|
280
|
-
prom_total_proc_rules_g.labels(state="failed").inc()
|
|
281
|
-
logging.error(
|
|
282
|
-
f" Error while processing rule {rule_definition.rule} for class {rule_definition.class_id} \
|
|
283
|
-
and property {rule_definition.property_name}"
|
|
284
|
-
)
|
|
285
|
-
logging.error(traceback.format_exc())
|
|
286
|
-
if stop_on_exception:
|
|
287
|
-
raise e
|
|
288
|
-
|
|
289
|
-
if processing_report:
|
|
290
|
-
processing_report.records.append(processing_report_rec)
|
|
291
|
-
|
|
292
|
-
if processing_report:
|
|
293
|
-
processing_report.total_rules = len(transformation_rules.properties)
|
|
294
|
-
processing_report.total_success = success
|
|
295
|
-
processing_report.total_success_no_results = success_no_results
|
|
296
|
-
processing_report.total_failed = failed
|
|
297
|
-
processing_report.elapsed_time = time.perf_counter() - proc_start_time
|
|
298
|
-
|
|
299
|
-
if timing_traces:
|
|
300
|
-
df = pd.Series(timing_traces)
|
|
301
|
-
rules_processing_timing_metric.labels(aggregation="sum").set(df.sum())
|
|
302
|
-
rules_processing_timing_metric.labels(aggregation="std_div").set(df.std())
|
|
303
|
-
rules_processing_timing_metric.labels(aggregation="min").set(df.min())
|
|
304
|
-
rules_processing_timing_metric.labels(aggregation="max").set(df.max())
|
|
305
|
-
rules_processing_timing_metric.labels(aggregation="mean").set(df.mean())
|
|
306
|
-
|
|
307
|
-
type_df = pd.concat(types).drop_duplicates(EntityTypes.subject).reset_index(drop=True)
|
|
308
|
-
|
|
309
|
-
# Add instance - RDF Type relations
|
|
310
|
-
for _, triple in type_df.iterrows(): # type: ignore[assignment]
|
|
311
|
-
app_instance_graph.add(triple.values) # type: ignore[arg-type]
|
|
312
|
-
check_commit()
|
|
313
|
-
|
|
314
|
-
for i, triple in enumerate(extra_triples or []): # type: ignore[assignment]
|
|
315
|
-
try:
|
|
316
|
-
app_instance_graph.add(triple) # type: ignore[arg-type]
|
|
317
|
-
check_commit()
|
|
318
|
-
except ValueError as e:
|
|
319
|
-
raise ValueError(f"Triple {i} in extra_triples is not correct and cannot be added!") from e
|
|
320
|
-
|
|
321
|
-
check_commit(force_commit=True)
|
|
322
|
-
return app_instance_graph
|
|
File without changes
|
|
@@ -1,231 +0,0 @@
|
|
|
1
|
-
import warnings
|
|
2
|
-
from collections import defaultdict
|
|
3
|
-
|
|
4
|
-
import pandas as pd
|
|
5
|
-
|
|
6
|
-
from cognite.neat.legacy.rules.models.rdfpath import TransformationRuleType
|
|
7
|
-
from cognite.neat.legacy.rules.models.rules import Property, Rules
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def get_defined_classes(transformation_rules: Rules) -> set[str]:
|
|
11
|
-
"""Returns classes that have properties defined for them in the data model.
|
|
12
|
-
|
|
13
|
-
Args:
|
|
14
|
-
transformation_rules: Instance of TransformationRules holding the data model
|
|
15
|
-
|
|
16
|
-
Returns:
|
|
17
|
-
Set of classes that have been defined in the data model
|
|
18
|
-
"""
|
|
19
|
-
return {property.class_id for property in transformation_rules.properties.values()}
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def get_classes_with_properties(transformation_rules: Rules) -> dict[str, list[Property]]:
|
|
23
|
-
"""Returns classes that have been defined in the data model.
|
|
24
|
-
|
|
25
|
-
Args:
|
|
26
|
-
transformation_rules: Instance of TransformationRules holding the data model
|
|
27
|
-
|
|
28
|
-
Returns:
|
|
29
|
-
Dictionary of classes with a list of properties defined for them
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
class_property_pairs: dict[str, list[Property]] = {}
|
|
33
|
-
|
|
34
|
-
for property_ in transformation_rules.properties.values():
|
|
35
|
-
class_ = property_.class_id
|
|
36
|
-
if class_ in class_property_pairs:
|
|
37
|
-
class_property_pairs[class_] += [property_]
|
|
38
|
-
else:
|
|
39
|
-
class_property_pairs[class_] = [property_]
|
|
40
|
-
|
|
41
|
-
return class_property_pairs
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def to_class_property_pairs(transformation_rules: Rules, only_rdfpath: bool = False) -> dict[str, dict[str, Property]]:
|
|
45
|
-
"""Returns a dictionary of classes with a dictionary of properties associated with them.
|
|
46
|
-
|
|
47
|
-
Args:
|
|
48
|
-
transformation_rules : Instance of TransformationRules holding the data model
|
|
49
|
-
only_rdfpath : To consider only properties which have rule `rdfpath` set. Defaults False
|
|
50
|
-
|
|
51
|
-
Returns:
|
|
52
|
-
Dictionary of classes with a dictionary of properties associated with them.
|
|
53
|
-
|
|
54
|
-
!!! note "only_rdfpath"
|
|
55
|
-
If only_rdfpath is True, only properties with RuleType.rdfpath will be returned as
|
|
56
|
-
a part of the dictionary of properties related to a class. Otherwise, all properties
|
|
57
|
-
will be returned.
|
|
58
|
-
"""
|
|
59
|
-
|
|
60
|
-
class_property_pairs = {}
|
|
61
|
-
|
|
62
|
-
for class_, properties in get_classes_with_properties(transformation_rules).items():
|
|
63
|
-
processed_properties = {}
|
|
64
|
-
for property_ in properties:
|
|
65
|
-
if property_.property_id in processed_properties:
|
|
66
|
-
# TODO: use appropriate Warning class from _exceptions.py
|
|
67
|
-
# if missing make one !
|
|
68
|
-
warnings.warn(
|
|
69
|
-
"Property has been defined more than once! Only first definition will be considered.", stacklevel=2
|
|
70
|
-
)
|
|
71
|
-
continue
|
|
72
|
-
|
|
73
|
-
if (only_rdfpath and property_.rule_type == TransformationRuleType.rdfpath) or not only_rdfpath:
|
|
74
|
-
processed_properties[property_.property_id] = property_
|
|
75
|
-
class_property_pairs[class_] = processed_properties
|
|
76
|
-
|
|
77
|
-
return class_property_pairs
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
def get_class_linkage(transformation_rules: Rules) -> pd.DataFrame:
|
|
81
|
-
"""Returns a dataframe with the class linkage of the data model.
|
|
82
|
-
|
|
83
|
-
Args:
|
|
84
|
-
transformation_rules: Instance of TransformationRules holding the data model
|
|
85
|
-
|
|
86
|
-
Returns:
|
|
87
|
-
Dataframe with the class linkage of the data model
|
|
88
|
-
"""
|
|
89
|
-
|
|
90
|
-
class_linkage = pd.DataFrame(columns=["source_class", "target_class", "connecting_property", "max_occurrence"])
|
|
91
|
-
for property_ in transformation_rules.properties.values():
|
|
92
|
-
if property_.property_type == "ObjectProperty":
|
|
93
|
-
new_row = pd.Series(
|
|
94
|
-
{
|
|
95
|
-
"source_class": property_.class_id,
|
|
96
|
-
"target_class": property_.expected_value_type.suffix,
|
|
97
|
-
"connecting_property": property_.property_id,
|
|
98
|
-
"max_occurrence": property_.max_count,
|
|
99
|
-
"linking_type": "hierarchy" if property_.resource_type_property else "relationship",
|
|
100
|
-
}
|
|
101
|
-
)
|
|
102
|
-
class_linkage = pd.concat([class_linkage, new_row.to_frame().T], ignore_index=True)
|
|
103
|
-
|
|
104
|
-
class_linkage.drop_duplicates(inplace=True)
|
|
105
|
-
|
|
106
|
-
return class_linkage
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
def get_class_hierarchy_linkage(rules: Rules) -> pd.DataFrame:
|
|
110
|
-
"""Remove linkage which is not creating asset hierarchy."""
|
|
111
|
-
class_linkage = get_class_linkage(rules)
|
|
112
|
-
return class_linkage[class_linkage.linking_type == "hierarchy"]
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
def get_connected_classes(transformation_rules: Rules) -> set[str]:
|
|
116
|
-
"""Return a set of classes that are connected to other classes.
|
|
117
|
-
|
|
118
|
-
Args:
|
|
119
|
-
transformation_rules: Instance of TransformationRules holding the data model
|
|
120
|
-
|
|
121
|
-
Returns:
|
|
122
|
-
Set of classes that are connected to other classes
|
|
123
|
-
"""
|
|
124
|
-
class_linkage = get_class_linkage(transformation_rules)
|
|
125
|
-
return set(class_linkage.source_class.values).union(set(class_linkage.target_class.values))
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def get_disconnected_classes(transformation_rules: Rules) -> set[str]:
|
|
129
|
-
"""Return a set of classes that are disconnected (i.e. isolated) from other classes.
|
|
130
|
-
|
|
131
|
-
Args:
|
|
132
|
-
transformation_rules: Instance of TransformationRules holding the data model
|
|
133
|
-
|
|
134
|
-
Returns:
|
|
135
|
-
Set of classes that are disconnected from other classes
|
|
136
|
-
"""
|
|
137
|
-
return get_defined_classes(transformation_rules) - get_connected_classes(transformation_rules)
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def get_symmetric_pairs(transformation_rules: Rules) -> set[tuple[str, str]]:
|
|
141
|
-
"""Returns a set of pairs of symmetrically linked classes.
|
|
142
|
-
|
|
143
|
-
Args:
|
|
144
|
-
transformation_rules: Instance of TransformationRules holding the data model
|
|
145
|
-
|
|
146
|
-
Returns:
|
|
147
|
-
Set of pairs of symmetrically linked classes
|
|
148
|
-
"""
|
|
149
|
-
|
|
150
|
-
# TODO: Find better name for this method
|
|
151
|
-
sym_pairs: set[tuple[str, str]] = set()
|
|
152
|
-
|
|
153
|
-
class_linkage = get_class_linkage(transformation_rules)
|
|
154
|
-
if class_linkage.empty:
|
|
155
|
-
return sym_pairs
|
|
156
|
-
|
|
157
|
-
for _, row in class_linkage.iterrows():
|
|
158
|
-
source = row.source_class
|
|
159
|
-
target = row.target_class
|
|
160
|
-
target_targets = class_linkage[class_linkage.source_class == target].target_class.values
|
|
161
|
-
if source in target_targets and (source, target) not in sym_pairs:
|
|
162
|
-
sym_pairs.add((source, target))
|
|
163
|
-
return sym_pairs
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
def get_entity_ids(transformation_rules: Rules) -> set[str]:
|
|
167
|
-
"""Returns a set of entity ids (classes and properties) defined in the data model.
|
|
168
|
-
|
|
169
|
-
Args:
|
|
170
|
-
transformation_rules: Instance of TransformationRules holding the data model
|
|
171
|
-
|
|
172
|
-
Returns:
|
|
173
|
-
Set of entity ids (classes and properties) defined in the data model
|
|
174
|
-
"""
|
|
175
|
-
return set(transformation_rules.classes.keys()).union(
|
|
176
|
-
{property_.property_id for property_ in transformation_rules.properties.values()}
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
def to_property_dict(transformation_rules: Rules) -> dict[str, list[Property]]:
|
|
181
|
-
"""Convert list of properties to a dictionary of lists of properties with property_id as key.
|
|
182
|
-
|
|
183
|
-
Args:
|
|
184
|
-
transformation_rules: Instance of TransformationRules holding the data model
|
|
185
|
-
|
|
186
|
-
Returns:
|
|
187
|
-
Dictionary of lists of properties with property_id as key
|
|
188
|
-
"""
|
|
189
|
-
property_: dict[str, list[Property]] = defaultdict(list)
|
|
190
|
-
|
|
191
|
-
for prop in transformation_rules.properties.values():
|
|
192
|
-
if not (prop.property_id and prop.property_name == "*"):
|
|
193
|
-
property_[prop.property_id].append(prop)
|
|
194
|
-
|
|
195
|
-
return property_
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
def get_asset_related_properties(properties: list[Property]) -> list[Property]:
|
|
199
|
-
"""Return properties that are used to define CDF Assets
|
|
200
|
-
|
|
201
|
-
Args:
|
|
202
|
-
properties: List of properties
|
|
203
|
-
|
|
204
|
-
Returns:
|
|
205
|
-
List of properties that are used to define CDF Assets
|
|
206
|
-
"""
|
|
207
|
-
return [prop for prop in properties if "Asset" in prop.cdf_resource_type]
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
def define_class_asset_mapping(transformation_rules: Rules, class_: str) -> dict[str, list[str]]:
|
|
211
|
-
"""Define mapping between class and asset properties
|
|
212
|
-
|
|
213
|
-
Args:
|
|
214
|
-
transformation_rules: Instance of TransformationRules holding the data model
|
|
215
|
-
class_: Class id for which mapping is to be defined
|
|
216
|
-
|
|
217
|
-
Returns:
|
|
218
|
-
Dictionary with asset properties as keys and list of class properties as values
|
|
219
|
-
"""
|
|
220
|
-
mapping_dict: dict[str, list[str]] = {}
|
|
221
|
-
|
|
222
|
-
class_properties = to_class_property_pairs(transformation_rules, only_rdfpath=True)[class_]
|
|
223
|
-
|
|
224
|
-
for asset_property in get_asset_related_properties(list(class_properties.values())):
|
|
225
|
-
for resource_type_property in asset_property.resource_type_property or []:
|
|
226
|
-
if resource_type_property not in mapping_dict:
|
|
227
|
-
mapping_dict[resource_type_property] = [asset_property.property_id]
|
|
228
|
-
else:
|
|
229
|
-
mapping_dict[resource_type_property] += [asset_property.property_id]
|
|
230
|
-
|
|
231
|
-
return mapping_dict
|
|
Binary file
|
|
Binary file
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
# we should make this a proper package that loads examples
|
|
4
|
-
# similar how they handle it in xarray:
|
|
5
|
-
# https://github.com/pydata/xarray/blob/main/xarray/tutorial.py
|
|
6
|
-
# Currently there are simple paths to the examples which are then easily loaded in the notebooks
|
|
7
|
-
|
|
8
|
-
_EXAMPLES = Path(__file__).parent
|
|
9
|
-
|
|
10
|
-
power_grid_model = _EXAMPLES / "power-grid-example.xlsx"
|
|
11
|
-
power_grid_containers = _EXAMPLES / "power-grid-containers.yaml"
|
|
12
|
-
power_grid_data_model = _EXAMPLES / "power-grid-model.yaml"
|
|
13
|
-
simple_example = _EXAMPLES / "sheet2cdf-transformation-rules.xlsx"
|
|
14
|
-
source_to_solution_mapping = _EXAMPLES / "source-to-solution-mapping-rules.xlsx"
|
|
15
|
-
nordic44 = _EXAMPLES / "Rules-Nordic44.xlsx"
|
|
16
|
-
nordic44_graphql = _EXAMPLES / "Rules-Nordic44-to-graphql.xlsx"
|
|
17
|
-
skos = _EXAMPLES / "skos-rules.xlsx"
|
|
18
|
-
wind_energy_ontology = _EXAMPLES / "wind-energy.owl"
|