cognite-neat 0.87.4__py3-none-any.whl → 0.88.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (132) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/data_classes/rest.py +0 -19
  3. cognite/neat/app/api/explorer.py +6 -4
  4. cognite/neat/app/api/routers/crud.py +11 -21
  5. cognite/neat/app/api/routers/workflows.py +24 -94
  6. cognite/neat/graph/extractors/_classic_cdf/_assets.py +8 -2
  7. cognite/neat/graph/extractors/_mock_graph_generator.py +2 -2
  8. cognite/neat/graph/loaders/_base.py +17 -12
  9. cognite/neat/graph/loaders/_rdf2asset.py +223 -58
  10. cognite/neat/graph/loaders/_rdf2dms.py +1 -1
  11. cognite/neat/graph/stores/_base.py +5 -0
  12. cognite/neat/rules/analysis/_asset.py +31 -1
  13. cognite/neat/rules/importers/_inference2rules.py +31 -35
  14. cognite/neat/rules/models/information/_rules.py +1 -1
  15. cognite/neat/workflows/steps/data_contracts.py +17 -43
  16. cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
  17. cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
  18. cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
  19. cognite/neat/workflows/steps_registry.py +5 -7
  20. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/METADATA +1 -1
  21. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/RECORD +24 -132
  22. cognite/neat/app/api/routers/core.py +0 -91
  23. cognite/neat/app/api/routers/data_exploration.py +0 -336
  24. cognite/neat/app/api/routers/rules.py +0 -203
  25. cognite/neat/legacy/__init__.py +0 -0
  26. cognite/neat/legacy/graph/__init__.py +0 -3
  27. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
  28. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
  29. cognite/neat/legacy/graph/examples/__init__.py +0 -10
  30. cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
  31. cognite/neat/legacy/graph/exceptions.py +0 -90
  32. cognite/neat/legacy/graph/extractors/__init__.py +0 -6
  33. cognite/neat/legacy/graph/extractors/_base.py +0 -14
  34. cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
  35. cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
  36. cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
  37. cognite/neat/legacy/graph/loaders/__init__.py +0 -23
  38. cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
  39. cognite/neat/legacy/graph/loaders/_base.py +0 -67
  40. cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
  41. cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
  42. cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
  43. cognite/neat/legacy/graph/loaders/core/models.py +0 -136
  44. cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
  45. cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
  46. cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
  47. cognite/neat/legacy/graph/loaders/validator.py +0 -87
  48. cognite/neat/legacy/graph/models.py +0 -6
  49. cognite/neat/legacy/graph/stores/__init__.py +0 -13
  50. cognite/neat/legacy/graph/stores/_base.py +0 -400
  51. cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
  52. cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
  53. cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
  54. cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
  55. cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
  56. cognite/neat/legacy/graph/transformations/__init__.py +0 -0
  57. cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
  58. cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
  59. cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
  60. cognite/neat/legacy/graph/transformations/transformer.py +0 -322
  61. cognite/neat/legacy/rules/__init__.py +0 -0
  62. cognite/neat/legacy/rules/analysis.py +0 -231
  63. cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
  64. cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
  65. cognite/neat/legacy/rules/examples/__init__.py +0 -18
  66. cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
  67. cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
  68. cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
  69. cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
  70. cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
  71. cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
  72. cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
  73. cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
  74. cognite/neat/legacy/rules/exceptions.py +0 -2972
  75. cognite/neat/legacy/rules/exporters/__init__.py +0 -20
  76. cognite/neat/legacy/rules/exporters/_base.py +0 -45
  77. cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
  78. cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
  79. cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
  80. cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
  81. cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
  82. cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
  83. cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
  84. cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
  85. cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
  86. cognite/neat/legacy/rules/exporters/_validation.py +0 -146
  87. cognite/neat/legacy/rules/importers/__init__.py +0 -22
  88. cognite/neat/legacy/rules/importers/_base.py +0 -66
  89. cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
  90. cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
  91. cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
  92. cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
  93. cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
  94. cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
  95. cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
  96. cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
  97. cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
  98. cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
  99. cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
  100. cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
  101. cognite/neat/legacy/rules/models/__init__.py +0 -5
  102. cognite/neat/legacy/rules/models/_base.py +0 -151
  103. cognite/neat/legacy/rules/models/raw_rules.py +0 -316
  104. cognite/neat/legacy/rules/models/rdfpath.py +0 -237
  105. cognite/neat/legacy/rules/models/rules.py +0 -1289
  106. cognite/neat/legacy/rules/models/tables.py +0 -9
  107. cognite/neat/legacy/rules/models/value_types.py +0 -118
  108. cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
  109. cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  110. cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  111. cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  112. cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
  113. cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  114. cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
  115. cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
  116. cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
  117. cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
  118. cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  119. cognite/neat/workflows/migration/__init__.py +0 -0
  120. cognite/neat/workflows/migration/steps.py +0 -91
  121. cognite/neat/workflows/migration/wf_manifests.py +0 -33
  122. cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
  123. cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
  124. cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
  125. cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
  126. cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
  127. cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
  128. cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
  129. cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
  130. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/LICENSE +0 -0
  131. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/WHEEL +0 -0
  132. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/entry_points.txt +0 -0
@@ -1,309 +0,0 @@
1
- import logging
2
- from collections.abc import Iterable
3
- from itertools import islice
4
- from typing import Literal, cast, overload
5
-
6
- from cognite.client import CogniteClient
7
- from cognite.client.data_classes.data_modeling import EdgeApply, InstanceApply, NodeApply
8
- from pydantic_core import ErrorDetails
9
-
10
- from cognite.neat.exceptions import NeatException
11
- from cognite.neat.legacy.graph.stores import NeatGraphStoreBase
12
- from cognite.neat.legacy.graph.transformations.query_generator.sparql import triples2dictionary
13
- from cognite.neat.legacy.rules.exporters._rules2dms import DMSSchemaComponents
14
- from cognite.neat.legacy.rules.exporters._rules2pydantic_models import add_class_prefix_to_xid, rules_to_pydantic_models
15
- from cognite.neat.legacy.rules.models.rules import Rules
16
- from cognite.neat.utils.auxiliary import retry_decorator
17
- from cognite.neat.utils.collection_ import chunker
18
- from cognite.neat.utils.time_ import datetime_utc_now
19
-
20
- from ._base import CogniteLoader
21
-
22
-
23
- class DMSLoader(CogniteLoader[InstanceApply]):
24
- """Loads a Neat Graph into CDF as nodes and edges.
25
-
26
- Args:
27
- rules: Rules object
28
- graph_store: Graph store
29
- add_class_prefix: Add class prefix to external_id. Defaults to False.
30
-
31
- """
32
-
33
- def __init__(self, rules: Rules, graph_store: NeatGraphStoreBase, add_class_prefix: bool = False):
34
- super().__init__(rules, graph_store)
35
- self.add_class_prefix = add_class_prefix
36
-
37
- @overload
38
- def load(self, stop_on_exception: Literal[True]) -> Iterable[InstanceApply]: ...
39
-
40
- @overload
41
- def load(self, stop_on_exception: Literal[False] = False) -> Iterable[InstanceApply | ErrorDetails]: ...
42
-
43
- def load(self, stop_on_exception: bool = False) -> Iterable[InstanceApply | ErrorDetails]:
44
- """Load the graph with data."""
45
- if self.rules.metadata.namespace is None:
46
- raise ValueError("Namespace is not defined in transformation rules metadata")
47
-
48
- data_model = DMSSchemaComponents.from_rules(self.rules)
49
- pydantic_models = rules_to_pydantic_models(self.rules)
50
-
51
- exclude = {
52
- class_name
53
- for class_name in self.rules.classes
54
- if f"{self.rules.space}:{class_name}" not in data_model.containers
55
- }
56
-
57
- for class_name, triples in self._iterate_class_triples(exclude_classes=exclude):
58
- logging.info(f"<DMSLoader> Processing class : {class_name}")
59
- counter = 0
60
- start_time = datetime_utc_now()
61
- for instance_dict in triples2dictionary(triples).values():
62
- counter += 1
63
- try:
64
- instance = pydantic_models[class_name].from_dict(instance_dict) # type: ignore[attr-defined]
65
- if self.add_class_prefix:
66
- instance.external_id = add_class_prefix_to_xid(
67
- class_name=type(instance).__name__, external_id=instance.external_id
68
- )
69
- new_node = instance.to_node(data_model, self.add_class_prefix) # type: ignore[attr-defined]
70
- is_valid, reason = is_node_valid(new_node)
71
- if is_valid:
72
- yield new_node
73
- else:
74
- yield ErrorDetails(
75
- input=instance_dict["external_id"],
76
- loc=tuple(["Nodes"]),
77
- msg=f"Not valid node {new_node.external_id}. Reason: {reason}",
78
- type="Node validation error",
79
- )
80
- continue
81
-
82
- new_edges = instance.to_edge(data_model, self.add_class_prefix)
83
- for new_edge in new_edges:
84
- is_valid, reason = is_edge_valid(new_edge)
85
- if is_valid:
86
- yield new_edge
87
- else:
88
- yield ErrorDetails(
89
- input=instance_dict["external_id"],
90
- loc=tuple(["Edges"]),
91
- msg=f"Not valid edge {new_edge.external_id}. Reason: {reason}",
92
- type="Edge validation error",
93
- )
94
- continue
95
-
96
- delta_time = datetime_utc_now() - start_time
97
- delta_time = (delta_time.seconds * 1000000 + delta_time.microseconds) / 1000
98
-
99
- except Exception as e:
100
- logging.error(
101
- f"Instance {instance_dict['external_id']} of {class_name}"
102
- f" cannot be resolved to nodes and edges. Reason: {e}"
103
- )
104
- if stop_on_exception:
105
- raise e
106
-
107
- if isinstance(e, NeatException):
108
- yield e.to_error_dict()
109
- else:
110
- yield ErrorDetails(
111
- input=instance_dict["external_id"],
112
- loc=tuple(["rdf2nodes_and_edges"]),
113
- msg=str(e),
114
- type=f"Exception of type {type(e).__name__} occurred \
115
- when processing instance of {class_name}",
116
- )
117
-
118
- def load_to_cdf(
119
- self, client: CogniteClient, batch_size: int | None = 1000, max_retries: int = 1, retry_delay: int = 3
120
- ) -> None:
121
- """Uploads nodes to CDF
122
-
123
- Args:
124
- client: Instance of CogniteClient
125
- batch_size: Size of batch. Default to 1000.
126
- max_retries: Maximum times to retry the upload. Default to 1.
127
- retry_delay: Time delay before retrying the upload. Default to 3.
128
-
129
- !!! note "batch_size"
130
- If batch size is set to 1 or None, all nodes will be pushed to CDF in one go.
131
- """
132
- if batch_size is None:
133
- logging.info("Batch size not set, pushing all nodes and edges to CDF in one go!")
134
- nodes, edges, errors = self.as_nodes_and_edges(stop_on_exception=False)
135
-
136
- @retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name="create-instances")
137
- def create_instances():
138
- client.data_modeling.instances.apply(
139
- nodes=nodes, edges=edges, auto_create_start_nodes=True, auto_create_end_nodes=True
140
- )
141
-
142
- create_instances()
143
- return
144
- logging.info(f"Uploading nodes in batches of {batch_size}")
145
- for instances in _batched(self.load(stop_on_exception=False), batch_size):
146
- nodes = [instance for instance in instances if isinstance(instance, NodeApply)]
147
- edges = [instance for instance in instances if isinstance(instance, EdgeApply)]
148
- # Todo make _micro_batch_push handle both nodes and edges simultaneously
149
- _micro_batch_push(
150
- client, nodes, batch_size, message="Upload", max_retries=max_retries, retry_delay=retry_delay
151
- )
152
- _micro_batch_push(
153
- client, edges, batch_size, message="Upload", max_retries=max_retries, retry_delay=retry_delay
154
- )
155
-
156
- def as_nodes_and_edges(
157
- self, stop_on_exception: bool = False
158
- ) -> tuple[list[NodeApply], list[EdgeApply], list[ErrorDetails]]:
159
- nodes = []
160
- edges = []
161
- exceptions: list[ErrorDetails] = []
162
- for instance in self.load(stop_on_exception): # type: ignore[call-overload]
163
- if isinstance(instance, NodeApply):
164
- nodes.append(instance)
165
- elif isinstance(instance, EdgeApply):
166
- edges.append(instance)
167
- elif isinstance(instance, dict):
168
- exceptions.append(cast(ErrorDetails, instance))
169
- else:
170
- raise ValueError(f"Unknown instance type: {type(instance)}")
171
- return nodes, edges, exceptions
172
-
173
-
174
- def _batched(iterable: Iterable, size: int):
175
- "Batch data into lists of length n. The last batch may be shorter."
176
- # batched('ABCDEFG', 3) --> ABC DEF G
177
- it = iter(iterable)
178
- while True:
179
- batch = list(islice(it, size))
180
- if not batch:
181
- return
182
- yield batch
183
-
184
-
185
- def is_node_valid(node: NodeApply) -> tuple[bool, str]:
186
- return is_valid_external_id(node.external_id)
187
-
188
-
189
- def is_edge_valid(edge: EdgeApply) -> tuple[bool, str]:
190
- for external_id in [edge.external_id, edge.start_node.external_id, edge.end_node.external_id]:
191
- is_valid, reason = is_valid_external_id(external_id)
192
- if not is_valid:
193
- return False, reason
194
- return True, ""
195
-
196
-
197
- def is_valid_external_id(external_id: str) -> tuple[bool, str]:
198
- if external_id is None or external_id == "" or len(external_id) >= 255:
199
- return False, f"external_id {external_id} is empty of too long"
200
- return True, ""
201
-
202
-
203
- def upload_nodes(
204
- client: CogniteClient, nodes: list[NodeApply], batch_size: int = 1000, max_retries: int = 1, retry_delay: int = 3
205
- ):
206
- """Uploads nodes to CDF
207
-
208
- Args:
209
- client: Instance of CogniteClient
210
- nodes: List of nodes to upload to CDF
211
- batch_size: Size of batch. Defaults to 1000.
212
- max_retries: Maximum times to retry the upload. Defaults to 1.
213
- retry_delay: Time delay before retrying the upload. Defaults to 3.
214
-
215
- !!! note "batch_size"
216
- If batch size is set to 1 or None, all nodes will be pushed to CDF in one go.
217
- """
218
- if batch_size:
219
- logging.info(f"Uploading nodes in batches of {batch_size}")
220
- _micro_batch_push(client, nodes, batch_size, message="Upload", max_retries=max_retries, retry_delay=retry_delay)
221
-
222
- else:
223
- logging.info("Batch size not set, pushing all nodes to CDF in one go!")
224
-
225
- @retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name="create-nodes")
226
- def create_nodes():
227
- client.data_modeling.instances.apply(nodes=nodes)
228
-
229
- create_nodes()
230
-
231
-
232
- def upload_edges(
233
- client: CogniteClient, edges: list[EdgeApply], batch_size: int = 5000, max_retries: int = 1, retry_delay: int = 3
234
- ):
235
- """Uploads edges to CDF
236
-
237
- Args:
238
- client: Instance of CogniteClient
239
- edges: List of edges to upload to CDF
240
- batch_size: Size of batch. Defaults to 5000.
241
- max_retries: Maximum times to retry the upload. Defaults to 1.
242
- retry_delay: Time delay before retrying the upload. Defaults to 3.
243
-
244
- !!! note "batch_size"
245
- If batch size is set to 1 or None, all edges will be pushed to CDF in one go.
246
-
247
- """
248
- if batch_size:
249
- logging.info(f"Uploading edges in batches of {batch_size}")
250
- _micro_batch_push(client, edges, batch_size, message="Upload", max_retries=max_retries, retry_delay=retry_delay)
251
-
252
- else:
253
- logging.info("Batch size not set, pushing all edges to CDF in one go!")
254
-
255
- @retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name="create-edges")
256
- def create_nodes():
257
- client.data_modeling.instances.apply(edges=edges, auto_create_start_nodes=True, auto_create_end_nodes=True)
258
-
259
- create_nodes()
260
-
261
-
262
- def _micro_batch_push(
263
- client: CogniteClient,
264
- nodes_or_edges: list[NodeApply] | list[EdgeApply],
265
- batch_size: int = 1000,
266
- message: str = "Upload",
267
- max_retries: int = 1,
268
- retry_delay: int = 3,
269
- ):
270
- """Uploads nodes or edges in batches
271
-
272
- Args:
273
- client: Instance of CogniteClient
274
- nodes_or_edges: List of nodes or edges
275
- batch_size: Size of batch. Defaults to 1000.
276
- message: Message to logged. Defaults to "Upload".
277
- max_retries: Maximum times to retry the upload. Defaults to 1.
278
- retry_delay: Time delay before retrying the upload. Defaults to 3.
279
- """
280
- total = len(nodes_or_edges)
281
- counter = 0
282
-
283
- if nodes_or_edges and isinstance(nodes_or_edges[0], NodeApply):
284
- push_type = "nodes"
285
- elif nodes_or_edges and isinstance(nodes_or_edges[0], EdgeApply):
286
- push_type = "edges"
287
- else:
288
- raise ValueError("nodes_or_edges must be a list of NodeApply or EdgeApply objects")
289
-
290
- for batch in chunker(nodes_or_edges, batch_size):
291
- counter += len(batch)
292
- start_time = datetime_utc_now()
293
-
294
- @retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name=f"microbatch-{push_type}")
295
- def upsert_nodes_or_edges(upload_batch):
296
- if push_type == "nodes":
297
- client.data_modeling.instances.apply(nodes=upload_batch)
298
- elif push_type == "edges":
299
- client.data_modeling.instances.apply(
300
- edges=upload_batch, auto_create_start_nodes=True, auto_create_end_nodes=True
301
- )
302
-
303
- upsert_nodes_or_edges(batch)
304
-
305
- delta_time = (datetime_utc_now() - start_time).seconds
306
-
307
- msg = f"{message} {counter} of {total} {push_type}, batch processing time: {delta_time:.2f} "
308
- msg += f"seconds ETC: {delta_time * (total - counter) / (60*batch_size) :.2f} minutes"
309
- logging.info(msg)
@@ -1,87 +0,0 @@
1
- """Should contain methods to validate Graph Transformation Rules sheet,
2
- as well App Data Model (RDF)
3
- """
4
-
5
- import logging
6
- from typing import Any
7
-
8
-
9
- def _find_circular_reference_path(
10
- asset: dict[str, Any], assets: dict[str, dict[str, Any]], max_hierarchy_depth: int = 10000
11
- ) -> list:
12
- original_external_id = asset.get("external_id", "")
13
- circle: list[str] = [original_external_id]
14
- parent_external_id = asset.get("parent_external_id")
15
- if isinstance(parent_external_id, str):
16
- ref = assets.get(parent_external_id)
17
- else:
18
- ref = None
19
-
20
- hop = 0
21
- while ref is not None and hop < max_hierarchy_depth:
22
- hop += 1
23
- if external_id := ref.get("external_id"):
24
- circle.append(external_id)
25
- if len(circle) != len(set(circle)):
26
- msg = (
27
- f"Found circular reference in asset hierarchy which starts with "
28
- f"{original_external_id} and enters loop at {circle[-1]}. "
29
- )
30
- logging.error(msg)
31
- return circle
32
- if parent_external_id := ref.get("parent_external_id"):
33
- ref = assets.get(parent_external_id)
34
- else:
35
- ref = None
36
-
37
- if hop >= max_hierarchy_depth:
38
- msg = (
39
- f"Your asset hierarchy is too deep. Max depth is {max_hierarchy_depth}. "
40
- "You probably have a circular reference."
41
- )
42
- logging.error(msg)
43
- return circle
44
- else:
45
- return []
46
-
47
-
48
- def validate_asset_hierarchy(
49
- assets: dict[str, dict[str, Any]],
50
- ) -> tuple[list[str], list[list[str]], dict[str, list[str]]]:
51
- """Validates asset hierarchy and reports on orphan assets and circular dependency
52
-
53
- Args:
54
- assets : A dictionary of assets with external_id as key
55
-
56
- Returns:
57
- List of orphan assets external ids and list of circular path of external ids.
58
- If both lists are empty, the hierarchy is healthy.
59
- """
60
- orphan_assets: list[str] = []
61
- circular_reference_paths: list[list[str]] = []
62
- parent_children_map: dict[str, list[str]] = {}
63
-
64
- for asset in assets.values():
65
- parent_external_id = asset.get("parent_external_id")
66
- asset_extarnal_id = asset.get("external_id")
67
- if asset_extarnal_id and parent_external_id:
68
- if parent_external_id in parent_children_map:
69
- parent_children_map[parent_external_id].append(asset_extarnal_id)
70
- else:
71
- parent_children_map[parent_external_id] = [asset_extarnal_id]
72
- if parent_external_id is not None and parent_external_id not in assets:
73
- msg = (
74
- f"Found orphan asset {asset.get('external_id')} with parent {parent_external_id} which does not exist."
75
- )
76
- logging.error(msg)
77
- if external_id := asset.get("external_id"):
78
- orphan_assets.append(external_id)
79
- circular_reference_path = _find_circular_reference_path(asset, assets)
80
- if not len(circular_reference_path):
81
- continue
82
-
83
- # Save the circle only once, not once for every asset
84
- if set(circular_reference_path) in [set(path) for path in circular_reference_paths]:
85
- continue
86
- circular_reference_paths.append(circular_reference_path)
87
- return orphan_assets, circular_reference_paths, parent_children_map
@@ -1,6 +0,0 @@
1
- from typing import TypeAlias
2
-
3
- from rdflib import Literal
4
- from rdflib.term import URIRef
5
-
6
- Triple: TypeAlias = tuple[URIRef, URIRef, Literal | URIRef]
@@ -1,13 +0,0 @@
1
- from ._base import NeatGraphStoreBase
2
- from ._graphdb_store import GraphDBStore
3
- from ._memory_store import MemoryStore
4
- from ._oxigraph_store import OxiGraphStore
5
-
6
- STORE_BY_TYPE: dict[str, type[NeatGraphStoreBase]] = {}
7
- for store in NeatGraphStoreBase.__subclasses__():
8
- STORE_BY_TYPE[store.rdf_store_type] = store # type: ignore[type-abstract]
9
-
10
- del store # Cleanup namespace
11
- AVAILABLE_STORES = set(STORE_BY_TYPE.keys())
12
-
13
- __all__ = ["NeatGraphStoreBase", "MemoryStore", "OxiGraphStore", "GraphDBStore", "STORE_BY_TYPE", "AVAILABLE_STORES"]