cognite-neat 0.87.6__py3-none-any.whl → 0.88.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (171) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/data_classes/rest.py +0 -19
  3. cognite/neat/app/api/explorer.py +6 -4
  4. cognite/neat/app/api/routers/configuration.py +1 -1
  5. cognite/neat/app/api/routers/crud.py +11 -21
  6. cognite/neat/app/api/routers/workflows.py +24 -94
  7. cognite/neat/app/ui/neat-app/build/asset-manifest.json +7 -7
  8. cognite/neat/app/ui/neat-app/build/index.html +1 -1
  9. cognite/neat/app/ui/neat-app/build/static/css/{main.38a62222.css → main.72e3d92e.css} +2 -2
  10. cognite/neat/app/ui/neat-app/build/static/css/main.72e3d92e.css.map +1 -0
  11. cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js +3 -0
  12. cognite/neat/app/ui/neat-app/build/static/js/{main.ec7f72e2.js.LICENSE.txt → main.5a52cf09.js.LICENSE.txt} +0 -9
  13. cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js.map +1 -0
  14. cognite/neat/config.py +44 -27
  15. cognite/neat/exceptions.py +6 -0
  16. cognite/neat/graph/extractors/_classic_cdf/_assets.py +21 -73
  17. cognite/neat/graph/extractors/_classic_cdf/_base.py +102 -0
  18. cognite/neat/graph/extractors/_classic_cdf/_events.py +46 -42
  19. cognite/neat/graph/extractors/_classic_cdf/_files.py +41 -45
  20. cognite/neat/graph/extractors/_classic_cdf/_labels.py +75 -52
  21. cognite/neat/graph/extractors/_classic_cdf/_relationships.py +49 -27
  22. cognite/neat/graph/extractors/_classic_cdf/_sequences.py +47 -50
  23. cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +47 -49
  24. cognite/neat/graph/queries/_base.py +22 -29
  25. cognite/neat/graph/queries/_shared.py +1 -1
  26. cognite/neat/graph/stores/_base.py +24 -11
  27. cognite/neat/graph/transformers/_rdfpath.py +3 -2
  28. cognite/neat/issues.py +8 -0
  29. cognite/neat/rules/exporters/_rules2ontology.py +28 -20
  30. cognite/neat/rules/exporters/_validation.py +15 -21
  31. cognite/neat/rules/importers/_inference2rules.py +31 -35
  32. cognite/neat/rules/importers/_owl2rules/_owl2metadata.py +3 -7
  33. cognite/neat/rules/importers/_spreadsheet2rules.py +30 -27
  34. cognite/neat/rules/issues/dms.py +20 -0
  35. cognite/neat/rules/issues/importing.py +15 -0
  36. cognite/neat/rules/issues/ontology.py +298 -0
  37. cognite/neat/rules/issues/spreadsheet.py +48 -0
  38. cognite/neat/rules/issues/tables.py +72 -0
  39. cognite/neat/rules/models/_rdfpath.py +4 -4
  40. cognite/neat/rules/models/_types/_field.py +9 -19
  41. cognite/neat/rules/models/information/_rules.py +5 -4
  42. cognite/neat/utils/rdf_.py +17 -9
  43. cognite/neat/utils/regex_patterns.py +52 -0
  44. cognite/neat/workflows/steps/data_contracts.py +17 -43
  45. cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
  46. cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
  47. cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
  48. cognite/neat/workflows/steps_registry.py +5 -7
  49. {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/METADATA +2 -6
  50. cognite_neat-0.88.1.dist-info/RECORD +209 -0
  51. cognite/neat/app/api/routers/core.py +0 -91
  52. cognite/neat/app/api/routers/data_exploration.py +0 -336
  53. cognite/neat/app/api/routers/rules.py +0 -203
  54. cognite/neat/app/ui/neat-app/build/static/css/main.38a62222.css.map +0 -1
  55. cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js +0 -3
  56. cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js.map +0 -1
  57. cognite/neat/graph/stores/_oxrdflib.py +0 -247
  58. cognite/neat/legacy/__init__.py +0 -0
  59. cognite/neat/legacy/graph/__init__.py +0 -3
  60. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
  61. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
  62. cognite/neat/legacy/graph/examples/__init__.py +0 -10
  63. cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
  64. cognite/neat/legacy/graph/exceptions.py +0 -90
  65. cognite/neat/legacy/graph/extractors/__init__.py +0 -6
  66. cognite/neat/legacy/graph/extractors/_base.py +0 -14
  67. cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
  68. cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
  69. cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
  70. cognite/neat/legacy/graph/loaders/__init__.py +0 -23
  71. cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
  72. cognite/neat/legacy/graph/loaders/_base.py +0 -67
  73. cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
  74. cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
  75. cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
  76. cognite/neat/legacy/graph/loaders/core/models.py +0 -136
  77. cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
  78. cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
  79. cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
  80. cognite/neat/legacy/graph/loaders/validator.py +0 -87
  81. cognite/neat/legacy/graph/models.py +0 -6
  82. cognite/neat/legacy/graph/stores/__init__.py +0 -13
  83. cognite/neat/legacy/graph/stores/_base.py +0 -400
  84. cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
  85. cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
  86. cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
  87. cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
  88. cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
  89. cognite/neat/legacy/graph/transformations/__init__.py +0 -0
  90. cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
  91. cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
  92. cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
  93. cognite/neat/legacy/graph/transformations/transformer.py +0 -322
  94. cognite/neat/legacy/rules/__init__.py +0 -0
  95. cognite/neat/legacy/rules/analysis.py +0 -231
  96. cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
  97. cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
  98. cognite/neat/legacy/rules/examples/__init__.py +0 -18
  99. cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
  100. cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
  101. cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
  102. cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
  103. cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
  104. cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
  105. cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
  106. cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
  107. cognite/neat/legacy/rules/exceptions.py +0 -2972
  108. cognite/neat/legacy/rules/exporters/__init__.py +0 -20
  109. cognite/neat/legacy/rules/exporters/_base.py +0 -45
  110. cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
  111. cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
  112. cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
  113. cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
  114. cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
  115. cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
  116. cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
  117. cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
  118. cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
  119. cognite/neat/legacy/rules/exporters/_validation.py +0 -146
  120. cognite/neat/legacy/rules/importers/__init__.py +0 -22
  121. cognite/neat/legacy/rules/importers/_base.py +0 -66
  122. cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
  123. cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
  124. cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
  125. cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
  126. cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
  127. cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
  128. cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
  129. cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
  130. cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
  131. cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
  132. cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
  133. cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
  134. cognite/neat/legacy/rules/models/__init__.py +0 -5
  135. cognite/neat/legacy/rules/models/_base.py +0 -151
  136. cognite/neat/legacy/rules/models/raw_rules.py +0 -316
  137. cognite/neat/legacy/rules/models/rdfpath.py +0 -237
  138. cognite/neat/legacy/rules/models/rules.py +0 -1289
  139. cognite/neat/legacy/rules/models/tables.py +0 -9
  140. cognite/neat/legacy/rules/models/value_types.py +0 -118
  141. cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
  142. cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  143. cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  144. cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  145. cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
  146. cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  147. cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
  148. cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
  149. cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
  150. cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
  151. cognite/neat/rules/exceptions.py +0 -2972
  152. cognite/neat/rules/models/_types/_base.py +0 -16
  153. cognite/neat/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  154. cognite/neat/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  155. cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  156. cognite/neat/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  157. cognite/neat/workflows/migration/__init__.py +0 -0
  158. cognite/neat/workflows/migration/steps.py +0 -91
  159. cognite/neat/workflows/migration/wf_manifests.py +0 -33
  160. cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
  161. cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
  162. cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
  163. cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
  164. cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
  165. cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
  166. cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
  167. cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
  168. cognite_neat-0.87.6.dist-info/RECORD +0 -319
  169. {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/LICENSE +0 -0
  170. {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/WHEEL +0 -0
  171. {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/entry_points.txt +0 -0
@@ -1,33 +1,37 @@
1
- from collections.abc import Iterable
1
+ from collections.abc import Callable, Set
2
2
  from datetime import datetime, timezone
3
3
  from pathlib import Path
4
- from typing import cast
5
4
 
6
5
  from cognite.client import CogniteClient
7
6
  from cognite.client.data_classes import LabelDefinition, LabelDefinitionList
8
7
  from rdflib import RDF, Literal, Namespace
9
8
 
10
- from cognite.neat.constants import DEFAULT_NAMESPACE
11
- from cognite.neat.graph.extractors._base import BaseExtractor
12
9
  from cognite.neat.graph.models import Triple
13
10
  from cognite.neat.utils.auxiliary import create_sha256_hash
14
11
 
12
+ from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
15
13
 
16
- class LabelsExtractor(BaseExtractor):
14
+
15
+ class LabelsExtractor(ClassicCDFExtractor[LabelDefinition]):
17
16
  """Extract data from Cognite Data Fusions Labels into Neat.
18
17
 
19
18
  Args:
20
- labels (Iterable[LabelDefinition]): An iterable of labels.
19
+ items (Iterable[LabelDefinition]): An iterable of items.
21
20
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
21
+ to_type (Callable[[LabelDefinition], str | None], optional): A function to convert an item to a type.
22
+ Defaults to None. If None or if the function returns None, the asset will be set to the default type.
23
+ total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
24
+ is installed. Defaults to None.
25
+ limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
26
+ testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
27
+ limit the extraction to 1000 assets to test the setup.
28
+ unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
29
+ a JSON string.
30
+ skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
31
+ values in this set will be skipped.
22
32
  """
23
33
 
24
- def __init__(
25
- self,
26
- labels: Iterable[LabelDefinition],
27
- namespace: Namespace | None = None,
28
- ):
29
- self.namespace = namespace or DEFAULT_NAMESPACE
30
- self.labels = labels
34
+ _default_rdf_type = "Label"
31
35
 
32
36
  @classmethod
33
37
  def from_dataset(
@@ -35,57 +39,76 @@ class LabelsExtractor(BaseExtractor):
35
39
  client: CogniteClient,
36
40
  data_set_external_id: str,
37
41
  namespace: Namespace | None = None,
42
+ to_type: Callable[[LabelDefinition], str | None] | None = None,
43
+ limit: int | None = None,
44
+ unpack_metadata: bool = True,
45
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
38
46
  ):
39
47
  return cls(
40
- cast(
41
- Iterable[LabelDefinition],
42
- client.labels(data_set_external_ids=data_set_external_id),
43
- ),
44
- namespace,
48
+ client.labels(data_set_external_ids=data_set_external_id),
49
+ namespace=namespace,
50
+ to_type=to_type,
51
+ limit=limit,
52
+ unpack_metadata=unpack_metadata,
53
+ skip_metadata_values=skip_metadata_values,
45
54
  )
46
55
 
47
56
  @classmethod
48
- def from_file(cls, file_path: str, namespace: Namespace | None = None):
49
- return cls(LabelDefinitionList.load(Path(file_path).read_text()), namespace)
57
+ def from_file(
58
+ cls,
59
+ file_path: str,
60
+ namespace: Namespace | None = None,
61
+ to_type: Callable[[LabelDefinition], str | None] | None = None,
62
+ limit: int | None = None,
63
+ unpack_metadata: bool = True,
64
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
65
+ ):
66
+ labels = LabelDefinitionList.load(Path(file_path).read_text())
67
+ return cls(
68
+ labels,
69
+ total=len(labels),
70
+ namespace=namespace,
71
+ to_type=to_type,
72
+ limit=limit,
73
+ unpack_metadata=unpack_metadata,
74
+ skip_metadata_values=skip_metadata_values,
75
+ )
50
76
 
51
- def extract(self) -> Iterable[Triple]:
52
- """Extract labels as triples."""
53
- for label in self.labels:
54
- yield from self._labels2triples(label)
77
+ def _item2triples(self, label: LabelDefinition) -> list[Triple]:
78
+ if not label.external_id:
79
+ return []
55
80
 
56
- def _labels2triples(self, label: LabelDefinition) -> list[Triple]:
57
- if label.external_id:
58
- id_ = self.namespace[f"Label_{create_sha256_hash(label.external_id)}"]
81
+ id_ = self.namespace[f"Label_{create_sha256_hash(label.external_id)}"]
59
82
 
60
- # Set rdf type
61
- triples: list[Triple] = [(id_, RDF.type, self.namespace.Label)]
83
+ type_ = self._get_rdf_type(label)
84
+ # Set rdf type
85
+ triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
62
86
 
63
- # Create attributes
64
- triples.append((id_, self.namespace.external_id, Literal(label.external_id)))
87
+ # Create attributes
88
+ triples.append((id_, self.namespace.external_id, Literal(label.external_id)))
65
89
 
66
- if label.name:
67
- triples.append((id_, self.namespace.name, Literal(label.name)))
90
+ if label.name:
91
+ triples.append((id_, self.namespace.name, Literal(label.name)))
68
92
 
69
- if label.description:
70
- triples.append((id_, self.namespace.description, Literal(label.description)))
93
+ if label.description:
94
+ triples.append((id_, self.namespace.description, Literal(label.description)))
71
95
 
72
- if label.created_time:
73
- triples.append(
74
- (
75
- id_,
76
- self.namespace.created_time,
77
- Literal(datetime.fromtimestamp(label.created_time / 1000, timezone.utc)),
78
- )
96
+ if label.created_time:
97
+ triples.append(
98
+ (
99
+ id_,
100
+ self.namespace.created_time,
101
+ Literal(datetime.fromtimestamp(label.created_time / 1000, timezone.utc)),
79
102
  )
80
-
81
- if label.data_set_id:
82
- triples.append(
83
- (
84
- id_,
85
- self.namespace.data_set_id,
86
- self.namespace[f"Dataset_{label.data_set_id}"],
87
- )
103
+ )
104
+
105
+ if label.data_set_id:
106
+ triples.append(
107
+ (
108
+ id_,
109
+ self.namespace.data_set_id,
110
+ self.namespace[f"Dataset_{label.data_set_id}"],
88
111
  )
112
+ )
89
113
 
90
- return triples
91
- return []
114
+ return triples
@@ -1,34 +1,38 @@
1
- from collections.abc import Iterable
1
+ from collections.abc import Callable, Set
2
2
  from datetime import datetime, timezone
3
3
  from pathlib import Path
4
- from typing import cast
5
4
  from urllib.parse import quote
6
5
 
7
6
  from cognite.client import CogniteClient
8
7
  from cognite.client.data_classes import Relationship, RelationshipList
9
8
  from rdflib import RDF, Literal, Namespace
10
9
 
11
- from cognite.neat.constants import DEFAULT_NAMESPACE
12
- from cognite.neat.graph.extractors._base import BaseExtractor
13
10
  from cognite.neat.graph.models import Triple
14
11
  from cognite.neat.utils.auxiliary import create_sha256_hash
15
12
 
13
+ from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
16
14
 
17
- class RelationshipsExtractor(BaseExtractor):
15
+
16
+ class RelationshipsExtractor(ClassicCDFExtractor[Relationship]):
18
17
  """Extract data from Cognite Data Fusions Relationships into Neat.
19
18
 
20
19
  Args:
21
- relationships (Iterable[Asset]): An iterable of relationships.
20
+ items (Iterable[Relationship]): An iterable of items.
22
21
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
22
+ to_type (Callable[[Relationship], str | None], optional): A function to convert an item to a type.
23
+ Defaults to None. If None or if the function returns None, the asset will be set to the default type.
24
+ total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
25
+ is installed. Defaults to None.
26
+ limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
27
+ testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
28
+ limit the extraction to 1000 assets to test the setup.
29
+ unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
30
+ a JSON string.
31
+ skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
32
+ values in this set will be skipped.
23
33
  """
24
34
 
25
- def __init__(
26
- self,
27
- relationships: Iterable[Relationship],
28
- namespace: Namespace | None = None,
29
- ):
30
- self.namespace = namespace or DEFAULT_NAMESPACE
31
- self.relationships = relationships
35
+ _default_rdf_type = "Relationship"
32
36
 
33
37
  @classmethod
34
38
  def from_dataset(
@@ -36,33 +40,51 @@ class RelationshipsExtractor(BaseExtractor):
36
40
  client: CogniteClient,
37
41
  data_set_external_id: str,
38
42
  namespace: Namespace | None = None,
43
+ to_type: Callable[[Relationship], str | None] | None = None,
44
+ limit: int | None = None,
45
+ unpack_metadata: bool = True,
46
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
39
47
  ):
40
48
  return cls(
41
- cast(
42
- Iterable[Relationship],
43
- client.relationships(data_set_external_ids=data_set_external_id),
44
- ),
45
- namespace,
49
+ client.relationships(data_set_external_ids=data_set_external_id),
50
+ namespace=namespace,
51
+ to_type=to_type,
52
+ limit=limit,
53
+ unpack_metadata=unpack_metadata,
54
+ skip_metadata_values=skip_metadata_values,
46
55
  )
47
56
 
48
57
  @classmethod
49
- def from_file(cls, file_path: str, namespace: Namespace | None = None):
50
- return cls(RelationshipList.load(Path(file_path).read_text()), namespace)
51
-
52
- def extract(self) -> Iterable[Triple]:
53
- """Extracts an asset with the given asset_id."""
54
- for relationship in self.relationships:
55
- yield from self._relationship2triples(relationship)
58
+ def from_file(
59
+ cls,
60
+ file_path: str,
61
+ namespace: Namespace | None = None,
62
+ to_type: Callable[[Relationship], str | None] | None = None,
63
+ limit: int | None = None,
64
+ unpack_metadata: bool = True,
65
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
66
+ ):
67
+ relationships = RelationshipList.load(Path(file_path).read_text())
68
+ return cls(
69
+ relationships,
70
+ namespace=namespace,
71
+ total=len(relationships),
72
+ to_type=to_type,
73
+ limit=limit,
74
+ unpack_metadata=unpack_metadata,
75
+ skip_metadata_values=skip_metadata_values,
76
+ )
56
77
 
57
- def _relationship2triples(self, relationship: Relationship) -> list[Triple]:
78
+ def _item2triples(self, relationship: Relationship) -> list[Triple]:
58
79
  """Converts an asset to triples."""
59
80
 
60
81
  if relationship.external_id and relationship.source_external_id and relationship.target_external_id:
61
82
  # relationships do not have an internal id, so we generate one
62
83
  id_ = self.namespace[f"Relationship_{create_sha256_hash(relationship.external_id)}"]
63
84
 
85
+ type_ = self._get_rdf_type(relationship)
64
86
  # Set rdf type
65
- triples: list[Triple] = [(id_, RDF.type, self.namespace["Relationship"])]
87
+ triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
66
88
 
67
89
  # Set source and target types
68
90
  if source_type := relationship.source_type:
@@ -1,39 +1,36 @@
1
- import json
2
- from collections.abc import Iterable
1
+ from collections.abc import Callable, Set
3
2
  from datetime import datetime, timezone
4
3
  from pathlib import Path
5
- from typing import cast
6
4
 
7
5
  from cognite.client import CogniteClient
8
- from cognite.client.data_classes import Sequence, SequenceList
9
- from pydantic import AnyHttpUrl, ValidationError
10
- from rdflib import RDF, Literal, Namespace, URIRef
6
+ from cognite.client.data_classes import Sequence, SequenceFilter, SequenceList
7
+ from rdflib import RDF, Literal, Namespace
11
8
 
12
- from cognite.neat.constants import DEFAULT_NAMESPACE
13
- from cognite.neat.graph.extractors._base import BaseExtractor
14
9
  from cognite.neat.graph.models import Triple
15
- from cognite.neat.utils.auxiliary import string_to_ideal_type
16
10
 
11
+ from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
17
12
 
18
- class SequencesExtractor(BaseExtractor):
13
+
14
+ class SequencesExtractor(ClassicCDFExtractor[Sequence]):
19
15
  """Extract data from Cognite Data Fusions Sequences into Neat.
20
16
 
21
17
  Args:
22
- sequence (Iterable[Sequence]): An iterable of sequences.
18
+ items (Iterable[Sequence]): An iterable of items.
23
19
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
20
+ to_type (Callable[[Sequence], str | None], optional): A function to convert an item to a type.
21
+ Defaults to None. If None or if the function returns None, the asset will be set to the default type.
22
+ total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
23
+ is installed. Defaults to None.
24
+ limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
25
+ testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
26
+ limit the extraction to 1000 assets to test the setup.
24
27
  unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
25
28
  a JSON string.
29
+ skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
30
+ values in this set will be skipped.
26
31
  """
27
32
 
28
- def __init__(
29
- self,
30
- sequence: Iterable[Sequence],
31
- namespace: Namespace | None = None,
32
- unpack_metadata: bool = True,
33
- ):
34
- self.namespace = namespace or DEFAULT_NAMESPACE
35
- self.sequence = sequence
36
- self.unpack_metadata = unpack_metadata
33
+ _default_rdf_type = "Sequence"
37
34
 
38
35
  @classmethod
39
36
  def from_dataset(
@@ -41,15 +38,22 @@ class SequencesExtractor(BaseExtractor):
41
38
  client: CogniteClient,
42
39
  data_set_external_id: str,
43
40
  namespace: Namespace | None = None,
41
+ to_type: Callable[[Sequence], str | None] | None = None,
42
+ limit: int | None = None,
44
43
  unpack_metadata: bool = True,
44
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
45
45
  ):
46
+ total = client.sequences.aggregate_count(
47
+ filter=SequenceFilter(data_set_ids=[{"externalId": data_set_external_id}])
48
+ )
46
49
  return cls(
47
- cast(
48
- Iterable[Sequence],
49
- client.sequences(data_set_external_ids=data_set_external_id),
50
- ),
51
- namespace,
52
- unpack_metadata,
50
+ client.sequences(data_set_external_ids=data_set_external_id),
51
+ total=total,
52
+ namespace=namespace,
53
+ to_type=to_type,
54
+ limit=limit,
55
+ unpack_metadata=unpack_metadata,
56
+ skip_metadata_values=skip_metadata_values,
53
57
  )
54
58
 
55
59
  @classmethod
@@ -57,20 +61,28 @@ class SequencesExtractor(BaseExtractor):
57
61
  cls,
58
62
  file_path: str,
59
63
  namespace: Namespace | None = None,
64
+ to_type: Callable[[Sequence], str | None] | None = None,
65
+ limit: int | None = None,
60
66
  unpack_metadata: bool = True,
67
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
61
68
  ):
62
- return cls(SequenceList.load(Path(file_path).read_text()), namespace, unpack_metadata)
63
-
64
- def extract(self) -> Iterable[Triple]:
65
- """Extract sequences as triples."""
66
- for sequence in self.sequence:
67
- yield from self._sequence2triples(sequence)
69
+ sequences = SequenceList.load(Path(file_path).read_text())
70
+ return cls(
71
+ sequences,
72
+ total=len(sequences),
73
+ namespace=namespace,
74
+ to_type=to_type,
75
+ limit=limit,
76
+ unpack_metadata=unpack_metadata,
77
+ skip_metadata_values=skip_metadata_values,
78
+ )
68
79
 
69
- def _sequence2triples(self, sequence: Sequence) -> list[Triple]:
80
+ def _item2triples(self, sequence: Sequence) -> list[Triple]:
70
81
  id_ = self.namespace[f"Sequence_{sequence.id}"]
71
82
 
83
+ type_ = self._get_rdf_type(sequence)
72
84
  # Set rdf type
73
- triples: list[Triple] = [(id_, RDF.type, self.namespace.Sequence)]
85
+ triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
74
86
 
75
87
  # Create attributes
76
88
 
@@ -81,22 +93,7 @@ class SequencesExtractor(BaseExtractor):
81
93
  triples.append((id_, self.namespace.name, Literal(sequence.name)))
82
94
 
83
95
  if sequence.metadata:
84
- if self.unpack_metadata:
85
- for key, value in sequence.metadata.items():
86
- if value:
87
- type_aware_value = string_to_ideal_type(value)
88
- try:
89
- triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
90
- except ValidationError:
91
- triples.append((id_, self.namespace[key], Literal(type_aware_value)))
92
- else:
93
- triples.append(
94
- (
95
- id_,
96
- self.namespace.metadata,
97
- Literal(json.dumps(sequence.metadata)),
98
- )
99
- )
96
+ triples.extend(self._metadata_to_triples(id_, sequence.metadata))
100
97
 
101
98
  if sequence.description:
102
99
  triples.append((id_, self.namespace.description, Literal(sequence.description)))
@@ -1,39 +1,37 @@
1
- import json
2
- from collections.abc import Iterable
1
+ from collections.abc import Callable, Set
3
2
  from datetime import datetime, timezone
4
3
  from pathlib import Path
5
- from typing import cast
6
4
 
7
5
  from cognite.client import CogniteClient
8
- from cognite.client.data_classes import TimeSeries, TimeSeriesList
6
+ from cognite.client.data_classes import TimeSeries, TimeSeriesFilter, TimeSeriesList
9
7
  from pydantic import AnyHttpUrl, ValidationError
10
8
  from rdflib import RDF, Literal, Namespace, URIRef
11
9
 
12
- from cognite.neat.constants import DEFAULT_NAMESPACE
13
- from cognite.neat.graph.extractors._base import BaseExtractor
14
10
  from cognite.neat.graph.models import Triple
15
- from cognite.neat.utils.auxiliary import string_to_ideal_type
16
11
 
12
+ from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
17
13
 
18
- class TimeSeriesExtractor(BaseExtractor):
14
+
15
+ class TimeSeriesExtractor(ClassicCDFExtractor[TimeSeries]):
19
16
  """Extract data from Cognite Data Fusions TimeSeries into Neat.
20
17
 
21
18
  Args:
22
- timeseries (Iterable[TimeSeries]): An iterable of timeseries.
19
+ items (Iterable[TimeSeries]): An iterable of items.
23
20
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
21
+ to_type (Callable[[TimeSeries], str | None], optional): A function to convert an item to a type.
22
+ Defaults to None. If None or if the function returns None, the asset will be set to the default type.
23
+ total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
24
+ is installed. Defaults to None.
25
+ limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
26
+ testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
27
+ limit the extraction to 1000 assets to test the setup.
24
28
  unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
25
29
  a JSON string.
30
+ skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
31
+ values in this set will be skipped.
26
32
  """
27
33
 
28
- def __init__(
29
- self,
30
- timeseries: Iterable[TimeSeries],
31
- namespace: Namespace | None = None,
32
- unpack_metadata: bool = True,
33
- ):
34
- self.namespace = namespace or DEFAULT_NAMESPACE
35
- self.timeseries = timeseries
36
- self.unpack_metadata = unpack_metadata
34
+ _default_rdf_type = "TimeSeries"
37
35
 
38
36
  @classmethod
39
37
  def from_dataset(
@@ -41,15 +39,23 @@ class TimeSeriesExtractor(BaseExtractor):
41
39
  client: CogniteClient,
42
40
  data_set_external_id: str,
43
41
  namespace: Namespace | None = None,
42
+ to_type: Callable[[TimeSeries], str | None] | None = None,
43
+ limit: int | None = None,
44
44
  unpack_metadata: bool = True,
45
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
45
46
  ):
47
+ total = client.time_series.aggregate_count(
48
+ filter=TimeSeriesFilter(data_set_ids=[{"externalId": data_set_external_id}])
49
+ )
50
+
46
51
  return cls(
47
- cast(
48
- Iterable[TimeSeries],
49
- client.time_series(data_set_external_ids=data_set_external_id),
50
- ),
51
- namespace,
52
- unpack_metadata,
52
+ client.time_series(data_set_external_ids=data_set_external_id),
53
+ total=total,
54
+ namespace=namespace,
55
+ to_type=to_type,
56
+ limit=limit,
57
+ unpack_metadata=unpack_metadata,
58
+ skip_metadata_values=skip_metadata_values,
53
59
  )
54
60
 
55
61
  @classmethod
@@ -57,23 +63,30 @@ class TimeSeriesExtractor(BaseExtractor):
57
63
  cls,
58
64
  file_path: str,
59
65
  namespace: Namespace | None = None,
66
+ to_type: Callable[[TimeSeries], str | None] | None = None,
67
+ limit: int | None = None,
60
68
  unpack_metadata: bool = True,
69
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
61
70
  ):
62
- return cls(TimeSeriesList.load(Path(file_path).read_text()), namespace, unpack_metadata)
63
-
64
- def extract(self) -> Iterable[Triple]:
65
- """Extract timeseries as triples."""
66
- for timeseries in self.timeseries:
67
- yield from self._timeseries2triples(timeseries)
71
+ timeseries = TimeSeriesList.load(Path(file_path).read_text())
72
+ return cls(
73
+ timeseries,
74
+ total=len(timeseries),
75
+ namespace=namespace,
76
+ to_type=to_type,
77
+ limit=limit,
78
+ unpack_metadata=unpack_metadata,
79
+ skip_metadata_values=skip_metadata_values,
80
+ )
68
81
 
69
- def _timeseries2triples(self, timeseries: TimeSeries) -> list[Triple]:
82
+ def _item2triples(self, timeseries: TimeSeries) -> list[Triple]:
70
83
  id_ = self.namespace[f"TimeSeries_{timeseries.id}"]
71
84
 
72
85
  # Set rdf type
73
- triples: list[Triple] = [(id_, RDF.type, self.namespace.TimeSeries)]
86
+ type_ = self._get_rdf_type(timeseries)
87
+ triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
74
88
 
75
89
  # Create attributes
76
-
77
90
  if timeseries.external_id:
78
91
  triples.append((id_, self.namespace.external_id, Literal(timeseries.external_id)))
79
92
 
@@ -84,22 +97,7 @@ class TimeSeriesExtractor(BaseExtractor):
84
97
  triples.append((id_, self.namespace.is_string, Literal(timeseries.is_string)))
85
98
 
86
99
  if timeseries.metadata:
87
- if self.unpack_metadata:
88
- for key, value in timeseries.metadata.items():
89
- if value:
90
- type_aware_value = string_to_ideal_type(value)
91
- try:
92
- triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
93
- except ValidationError:
94
- triples.append((id_, self.namespace[key], Literal(type_aware_value)))
95
- else:
96
- triples.append(
97
- (
98
- id_,
99
- self.namespace.metadata,
100
- Literal(json.dumps(timeseries.metadata)),
101
- )
102
- )
100
+ triples.extend(self._metadata_to_triples(id_, timeseries.metadata))
103
101
 
104
102
  if timeseries.unit:
105
103
  triples.append((id_, self.namespace.unit, Literal(timeseries.unit)))
@@ -98,47 +98,40 @@ class Queries:
98
98
  self,
99
99
  instance_id: URIRef,
100
100
  property_renaming_config: dict | None = None,
101
- ) -> tuple[str, dict[str, list[str]]]:
101
+ ) -> tuple[str, dict[str, list[str]]] | None:
102
102
  """DESCRIBE instance for a given class from the graph store
103
103
 
104
104
  Args:
105
105
  instance_id: Instance id for which we want to generate query
106
- property_rename_config: Dictionary to rename properties, default None
106
+ property_renaming_config: Dictionary to rename properties, default None
107
107
 
108
108
  Returns:
109
109
  Dictionary of instance properties
110
110
  """
111
-
112
111
  property_values: dict[str, list[str]] = defaultdict(list)
113
-
114
- for subject, predicate, object_ in cast(list[ResultRow], self.graph.query(f"DESCRIBE <{instance_id}>")):
115
- if object_.lower() not in [
112
+ identifier = remove_namespace_from_uri(instance_id, validation="prefix")
113
+ for _, predicate, object_ in cast(list[ResultRow], self.graph.query(f"DESCRIBE <{instance_id}>")):
114
+ if object_.lower() in [
116
115
  "",
117
116
  "none",
118
117
  "nan",
119
118
  "null",
120
119
  ]:
121
- # we are skipping deep validation with Pydantic to remove namespace here
122
- # as it reduces time to process triples by 10-15x
123
- identifier, value = cast( # type: ignore[misc]
124
- (str, str),
125
- remove_namespace_from_uri(*(subject, object_), validation="prefix"),
126
- ) # type: ignore[misc, index]
127
-
128
- # use-case: calling describe without renaming properties
129
- # losing the namespace from the predicate!
130
- if not property_renaming_config and predicate != RDF.type:
131
- property_values[remove_namespace_from_uri(predicate, validation="prefix")].append(value)
132
-
133
- # use-case: calling describe with renaming properties
134
- # renaming the property to the new name, if the property is defined
135
- # in the RULES sheet
136
- elif property_renaming_config and (property_ := property_renaming_config.get(predicate, None)):
137
- property_values[property_].append(value)
138
-
139
- # use-case: skip the property if it is not defined in property_renaming_config
140
- else:
141
- continue
120
+ continue
121
+ # we are skipping deep validation with Pydantic to remove namespace here
122
+ # as it reduces time to process triples by 10-15x
123
+ value = remove_namespace_from_uri(object_, validation="prefix")
124
+
125
+ # use-case: calling describe without renaming properties
126
+ # losing the namespace from the predicate!
127
+ if not property_renaming_config and predicate != RDF.type:
128
+ property_values[remove_namespace_from_uri(predicate, validation="prefix")].append(value)
129
+
130
+ # use-case: calling describe with renaming properties
131
+ # renaming the property to the new name, if the property is defined
132
+ # in the RULES sheet
133
+ elif property_renaming_config and (property_ := property_renaming_config.get(predicate, None)):
134
+ property_values[property_].append(value)
142
135
 
143
136
  if property_values:
144
137
  return (
@@ -146,7 +139,7 @@ class Queries:
146
139
  property_values,
147
140
  )
148
141
  else:
149
- return () # type: ignore [return-value]
142
+ return None
150
143
 
151
144
  def construct_instances_of_class(
152
145
  self,
@@ -177,7 +170,7 @@ class Queries:
177
170
  result = self.graph.query(query)
178
171
 
179
172
  # We cannot include the RDF.type in case there is a neat:type property
180
- return [remove_namespace_from_uri(*triple) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index]
173
+ return [remove_namespace_from_uri(cast(ResultRow, triple)) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index]
181
174
  else:
182
175
  warnings.warn(
183
176
  "No rules found for the graph store, returning empty list.",