cognite-neat 0.87.6__py3-none-any.whl → 0.88.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (171) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/data_classes/rest.py +0 -19
  3. cognite/neat/app/api/explorer.py +6 -4
  4. cognite/neat/app/api/routers/configuration.py +1 -1
  5. cognite/neat/app/api/routers/crud.py +11 -21
  6. cognite/neat/app/api/routers/workflows.py +24 -94
  7. cognite/neat/app/ui/neat-app/build/asset-manifest.json +7 -7
  8. cognite/neat/app/ui/neat-app/build/index.html +1 -1
  9. cognite/neat/app/ui/neat-app/build/static/css/{main.38a62222.css → main.72e3d92e.css} +2 -2
  10. cognite/neat/app/ui/neat-app/build/static/css/main.72e3d92e.css.map +1 -0
  11. cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js +3 -0
  12. cognite/neat/app/ui/neat-app/build/static/js/{main.ec7f72e2.js.LICENSE.txt → main.5a52cf09.js.LICENSE.txt} +0 -9
  13. cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js.map +1 -0
  14. cognite/neat/config.py +44 -27
  15. cognite/neat/exceptions.py +6 -0
  16. cognite/neat/graph/extractors/_classic_cdf/_assets.py +21 -73
  17. cognite/neat/graph/extractors/_classic_cdf/_base.py +102 -0
  18. cognite/neat/graph/extractors/_classic_cdf/_events.py +46 -42
  19. cognite/neat/graph/extractors/_classic_cdf/_files.py +41 -45
  20. cognite/neat/graph/extractors/_classic_cdf/_labels.py +75 -52
  21. cognite/neat/graph/extractors/_classic_cdf/_relationships.py +49 -27
  22. cognite/neat/graph/extractors/_classic_cdf/_sequences.py +47 -50
  23. cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +47 -49
  24. cognite/neat/graph/queries/_base.py +22 -29
  25. cognite/neat/graph/queries/_shared.py +1 -1
  26. cognite/neat/graph/stores/_base.py +24 -11
  27. cognite/neat/graph/transformers/_rdfpath.py +3 -2
  28. cognite/neat/issues.py +8 -0
  29. cognite/neat/rules/exporters/_rules2ontology.py +28 -20
  30. cognite/neat/rules/exporters/_validation.py +15 -21
  31. cognite/neat/rules/importers/_inference2rules.py +31 -35
  32. cognite/neat/rules/importers/_owl2rules/_owl2metadata.py +3 -7
  33. cognite/neat/rules/importers/_spreadsheet2rules.py +30 -27
  34. cognite/neat/rules/issues/dms.py +20 -0
  35. cognite/neat/rules/issues/importing.py +15 -0
  36. cognite/neat/rules/issues/ontology.py +298 -0
  37. cognite/neat/rules/issues/spreadsheet.py +48 -0
  38. cognite/neat/rules/issues/tables.py +72 -0
  39. cognite/neat/rules/models/_rdfpath.py +4 -4
  40. cognite/neat/rules/models/_types/_field.py +9 -19
  41. cognite/neat/rules/models/information/_rules.py +5 -4
  42. cognite/neat/utils/rdf_.py +17 -9
  43. cognite/neat/utils/regex_patterns.py +52 -0
  44. cognite/neat/workflows/steps/data_contracts.py +17 -43
  45. cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
  46. cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
  47. cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
  48. cognite/neat/workflows/steps_registry.py +5 -7
  49. {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/METADATA +2 -6
  50. cognite_neat-0.88.1.dist-info/RECORD +209 -0
  51. cognite/neat/app/api/routers/core.py +0 -91
  52. cognite/neat/app/api/routers/data_exploration.py +0 -336
  53. cognite/neat/app/api/routers/rules.py +0 -203
  54. cognite/neat/app/ui/neat-app/build/static/css/main.38a62222.css.map +0 -1
  55. cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js +0 -3
  56. cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js.map +0 -1
  57. cognite/neat/graph/stores/_oxrdflib.py +0 -247
  58. cognite/neat/legacy/__init__.py +0 -0
  59. cognite/neat/legacy/graph/__init__.py +0 -3
  60. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
  61. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
  62. cognite/neat/legacy/graph/examples/__init__.py +0 -10
  63. cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
  64. cognite/neat/legacy/graph/exceptions.py +0 -90
  65. cognite/neat/legacy/graph/extractors/__init__.py +0 -6
  66. cognite/neat/legacy/graph/extractors/_base.py +0 -14
  67. cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
  68. cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
  69. cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
  70. cognite/neat/legacy/graph/loaders/__init__.py +0 -23
  71. cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
  72. cognite/neat/legacy/graph/loaders/_base.py +0 -67
  73. cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
  74. cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
  75. cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
  76. cognite/neat/legacy/graph/loaders/core/models.py +0 -136
  77. cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
  78. cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
  79. cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
  80. cognite/neat/legacy/graph/loaders/validator.py +0 -87
  81. cognite/neat/legacy/graph/models.py +0 -6
  82. cognite/neat/legacy/graph/stores/__init__.py +0 -13
  83. cognite/neat/legacy/graph/stores/_base.py +0 -400
  84. cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
  85. cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
  86. cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
  87. cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
  88. cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
  89. cognite/neat/legacy/graph/transformations/__init__.py +0 -0
  90. cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
  91. cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
  92. cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
  93. cognite/neat/legacy/graph/transformations/transformer.py +0 -322
  94. cognite/neat/legacy/rules/__init__.py +0 -0
  95. cognite/neat/legacy/rules/analysis.py +0 -231
  96. cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
  97. cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
  98. cognite/neat/legacy/rules/examples/__init__.py +0 -18
  99. cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
  100. cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
  101. cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
  102. cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
  103. cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
  104. cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
  105. cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
  106. cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
  107. cognite/neat/legacy/rules/exceptions.py +0 -2972
  108. cognite/neat/legacy/rules/exporters/__init__.py +0 -20
  109. cognite/neat/legacy/rules/exporters/_base.py +0 -45
  110. cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
  111. cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
  112. cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
  113. cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
  114. cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
  115. cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
  116. cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
  117. cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
  118. cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
  119. cognite/neat/legacy/rules/exporters/_validation.py +0 -146
  120. cognite/neat/legacy/rules/importers/__init__.py +0 -22
  121. cognite/neat/legacy/rules/importers/_base.py +0 -66
  122. cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
  123. cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
  124. cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
  125. cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
  126. cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
  127. cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
  128. cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
  129. cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
  130. cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
  131. cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
  132. cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
  133. cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
  134. cognite/neat/legacy/rules/models/__init__.py +0 -5
  135. cognite/neat/legacy/rules/models/_base.py +0 -151
  136. cognite/neat/legacy/rules/models/raw_rules.py +0 -316
  137. cognite/neat/legacy/rules/models/rdfpath.py +0 -237
  138. cognite/neat/legacy/rules/models/rules.py +0 -1289
  139. cognite/neat/legacy/rules/models/tables.py +0 -9
  140. cognite/neat/legacy/rules/models/value_types.py +0 -118
  141. cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
  142. cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  143. cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  144. cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  145. cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
  146. cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  147. cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
  148. cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
  149. cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
  150. cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
  151. cognite/neat/rules/exceptions.py +0 -2972
  152. cognite/neat/rules/models/_types/_base.py +0 -16
  153. cognite/neat/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  154. cognite/neat/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  155. cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  156. cognite/neat/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  157. cognite/neat/workflows/migration/__init__.py +0 -0
  158. cognite/neat/workflows/migration/steps.py +0 -91
  159. cognite/neat/workflows/migration/wf_manifests.py +0 -33
  160. cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
  161. cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
  162. cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
  163. cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
  164. cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
  165. cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
  166. cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
  167. cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
  168. cognite_neat-0.87.6.dist-info/RECORD +0 -319
  169. {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/LICENSE +0 -0
  170. {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/WHEEL +0 -0
  171. {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/entry_points.txt +0 -0
cognite/neat/config.py CHANGED
@@ -7,7 +7,7 @@ from pathlib import Path
7
7
  from typing import Any, Literal, cast
8
8
 
9
9
  import yaml
10
- from pydantic import BaseModel, Field, field_serializer, model_validator
10
+ from pydantic import BaseModel, Field, model_validator
11
11
  from yaml import safe_load
12
12
 
13
13
  from cognite.neat.constants import EXAMPLE_GRAPHS, EXAMPLE_RULES, EXAMPLE_WORKFLOWS
@@ -58,7 +58,8 @@ class Config(BaseModel, arbitrary_types_allowed=True):
58
58
  log_level: Literal["ERROR", "WARNING", "INFO", "DEBUG"] = "INFO"
59
59
  log_format: str = LOG_FORMAT
60
60
  download_workflows_from_cdf: bool = Field(
61
- default=False, description="Downloads all workflows from CDF automatically and stores them locally"
61
+ default=False,
62
+ description="Downloads all workflows from CDF automatically and stores them locally",
62
63
  )
63
64
  stop_on_error: bool = False
64
65
 
@@ -93,30 +94,43 @@ class Config(BaseModel, arbitrary_types_allowed=True):
93
94
  )
94
95
  return data
95
96
 
96
- @staticmethod
97
- @field_serializer("cdf_auth_config", when_used="always", return_type=dict)
98
- def backwards_compatible_serialize(cdf_auth_config: EnvironmentVariables) -> dict[str, Any]:
99
- output: dict[str, Any] = {}
100
- config = cdf_auth_config
101
- if config.CDF_PROJECT not in {"Missing", "NOT SET"}:
102
- output["project"] = config.CDF_PROJECT
103
- if config.CDF_CLUSTER not in {"Missing", "NOT SET"}:
104
- output["cluster"] = config.CDF_CLUSTER
105
- if config.CDF_URL:
106
- output["base_url"] = config.CDF_URL
107
- if config.IDP_CLIENT_ID:
108
- output["client_id"] = config.IDP_CLIENT_ID
109
- if config.IDP_CLIENT_SECRET:
110
- output["client_secret"] = config.IDP_CLIENT_SECRET
111
- if config.IDP_TOKEN_URL:
112
- output["token_url"] = config.IDP_TOKEN_URL
113
- if config.IDP_SCOPES:
114
- output["scopes"] = config.idp_scopes
115
- if config.CDF_TIMEOUT:
116
- output["timeout"] = config.CDF_TIMEOUT
117
- if config.CDF_MAX_WORKERS:
118
- output["max_workers"] = config.CDF_MAX_WORKERS
119
- return output
97
+ def as_legacy_config(
98
+ self,
99
+ ) -> dict[str, Any]:
100
+ config: dict[str, Any] = {}
101
+
102
+ config["workflows_store_type"] = self.workflows_store_type
103
+ config["data_store_path"] = str(self.data_store_path)
104
+ config["workflows_downloader_filter"] = self.workflow_downloader_filter
105
+
106
+ config["cdf_client"] = {}
107
+ if self.cdf_auth_config.CDF_PROJECT not in {"Missing", "NOT SET"}:
108
+ config["cdf_client"]["project"] = self.cdf_auth_config.CDF_PROJECT
109
+ if self.cdf_auth_config.CDF_CLUSTER not in {"Missing", "NOT SET"}:
110
+ config["cdf_client"]["cluster"] = self.cdf_auth_config.CDF_CLUSTER
111
+ if self.cdf_auth_config.CDF_URL:
112
+ config["cdf_client"]["base_url"] = self.cdf_auth_config.CDF_URL
113
+ if self.cdf_auth_config.IDP_CLIENT_ID:
114
+ config["cdf_client"]["client_id"] = self.cdf_auth_config.IDP_CLIENT_ID
115
+ if self.cdf_auth_config.IDP_CLIENT_SECRET:
116
+ config["cdf_client"]["client_secret"] = self.cdf_auth_config.IDP_CLIENT_SECRET
117
+ if self.cdf_auth_config.IDP_TOKEN_URL:
118
+ config["cdf_client"]["token_url"] = self.cdf_auth_config.IDP_TOKEN_URL
119
+ if self.cdf_auth_config.IDP_SCOPES:
120
+ config["cdf_client"]["scopes"] = self.cdf_auth_config.idp_scopes
121
+ if self.cdf_auth_config.CDF_TIMEOUT:
122
+ config["cdf_client"]["timeout"] = self.cdf_auth_config.CDF_TIMEOUT
123
+ if self.cdf_auth_config.CDF_MAX_WORKERS:
124
+ config["cdf_client"]["max_workers"] = self.cdf_auth_config.CDF_MAX_WORKERS
125
+
126
+ config["cdf_default_dataset_id"] = self.cdf_default_dataset_id
127
+ config["load_examples"] = self.load_examples
128
+ config["log_level"] = self.log_level
129
+ config["log_format"] = self.log_format
130
+ config["download_workflows_from_cdf"] = self.download_workflows_from_cdf
131
+ config["stop_on_error"] = self.stop_on_error
132
+
133
+ return config
120
134
 
121
135
  @property
122
136
  def _dir_suffix(self) -> str:
@@ -191,7 +205,10 @@ class Config(BaseModel, arbitrary_types_allowed=True):
191
205
  ),
192
206
  data_store_path=Path(os.environ.get("NEAT_DATA_PATH", "/app/data")),
193
207
  cdf_default_dataset_id=int(os.environ.get("NEAT_CDF_DEFAULT_DATASET_ID", 6476640149881990)),
194
- log_level=cast(Literal["ERROR", "WARNING", "INFO", "DEBUG"], os.environ.get("NEAT_LOG_LEVEL", "INFO")),
208
+ log_level=cast(
209
+ Literal["ERROR", "WARNING", "INFO", "DEBUG"],
210
+ os.environ.get("NEAT_LOG_LEVEL", "INFO"),
211
+ ),
195
212
  workflow_downloader_filter=workflow_downloader_filter,
196
213
  load_examples=bool(os.environ.get("NEAT_LOAD_EXAMPLES", True) in ["True", "true", "1"]),
197
214
  )
@@ -102,6 +102,12 @@ class InvalidWorkFlowError(NeatException):
102
102
  return self.message
103
103
 
104
104
 
105
+ class NeatValueError(NeatException, ValueError): ...
106
+
107
+
108
+ class NeatTypeError(NeatException, TypeError): ...
109
+
110
+
105
111
  def wrangle_warnings(list_of_warnings: list[WarningMessage]) -> list[dict]:
106
112
  warning_list: list[dict] = []
107
113
  for warning in list_of_warnings:
@@ -1,6 +1,4 @@
1
- import json
2
- import re
3
- from collections.abc import Callable, Iterable
1
+ from collections.abc import Callable, Iterable, Set
4
2
  from datetime import datetime, timezone
5
3
  from pathlib import Path
6
4
  from typing import cast
@@ -9,17 +7,17 @@ from cognite.client import CogniteClient
9
7
  from cognite.client.data_classes import Asset, AssetFilter, AssetList
10
8
  from rdflib import RDF, Literal, Namespace
11
9
 
12
- from cognite.neat.constants import DEFAULT_NAMESPACE
13
- from cognite.neat.graph.extractors._base import BaseExtractor
14
10
  from cognite.neat.graph.models import Triple
15
- from cognite.neat.utils.auxiliary import create_sha256_hash, string_to_ideal_type
11
+ from cognite.neat.utils.auxiliary import create_sha256_hash
16
12
 
13
+ from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
17
14
 
18
- class AssetsExtractor(BaseExtractor):
15
+
16
+ class AssetsExtractor(ClassicCDFExtractor[Asset]):
19
17
  """Extract data from Cognite Data Fusions Assets into Neat.
20
18
 
21
19
  Args:
22
- assets (Iterable[Asset]): An iterable of assets.
20
+ items (Iterable[Asset]): An iterable of assets.
23
21
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
24
22
  to_type (Callable[[Asset], str | None], optional): A function to convert an asset to a type. Defaults to None.
25
23
  If None or if the function returns None, the asset will be set to the default type "Asset".
@@ -34,25 +32,7 @@ class AssetsExtractor(BaseExtractor):
34
32
  metadata. Defaults to frozenset({"nan", "null", "none", ""}).
35
33
  """
36
34
 
37
- _SPACE_PATTERN = re.compile(r"\s+")
38
-
39
- def __init__(
40
- self,
41
- assets: Iterable[Asset],
42
- namespace: Namespace | None = None,
43
- to_type: Callable[[Asset], str | None] | None = None,
44
- total: int | None = None,
45
- limit: int | None = None,
46
- unpack_metadata: bool = True,
47
- skip_metadata_values: set[str] | frozenset[str] | None = frozenset({"nan", "null", "none", ""}),
48
- ):
49
- self.namespace = namespace or DEFAULT_NAMESPACE
50
- self.assets = assets
51
- self.to_type = to_type
52
- self.total = total
53
- self.limit = min(limit, total) if limit and total else limit
54
- self.unpack_metadata = unpack_metadata
55
- self.skip_metadata_values = skip_metadata_values
35
+ _default_rdf_type = "Asset"
56
36
 
57
37
  @classmethod
58
38
  def from_dataset(
@@ -63,19 +43,18 @@ class AssetsExtractor(BaseExtractor):
63
43
  to_type: Callable[[Asset], str | None] | None = None,
64
44
  limit: int | None = None,
65
45
  unpack_metadata: bool = True,
46
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
66
47
  ):
67
48
  total = client.assets.aggregate_count(filter=AssetFilter(data_set_ids=[{"externalId": data_set_external_id}]))
68
49
 
69
50
  return cls(
70
- cast(
71
- Iterable[Asset],
72
- client.assets(data_set_external_ids=data_set_external_id),
73
- ),
51
+ client.assets(data_set_external_ids=data_set_external_id),
74
52
  namespace,
75
53
  to_type,
76
54
  total,
77
55
  limit,
78
56
  unpack_metadata=unpack_metadata,
57
+ skip_metadata_values=skip_metadata_values,
79
58
  )
80
59
 
81
60
  @classmethod
@@ -87,6 +66,7 @@ class AssetsExtractor(BaseExtractor):
87
66
  to_type: Callable[[Asset], str | None] | None = None,
88
67
  limit: int | None = None,
89
68
  unpack_metadata: bool = True,
69
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
90
70
  ):
91
71
  total = client.assets.aggregate_count(
92
72
  filter=AssetFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
@@ -102,6 +82,7 @@ class AssetsExtractor(BaseExtractor):
102
82
  total,
103
83
  limit,
104
84
  unpack_metadata=unpack_metadata,
85
+ skip_metadata_values=skip_metadata_values,
105
86
  )
106
87
 
107
88
  @classmethod
@@ -112,44 +93,24 @@ class AssetsExtractor(BaseExtractor):
112
93
  to_type: Callable[[Asset], str] | None = None,
113
94
  limit: int | None = None,
114
95
  unpack_metadata: bool = True,
96
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
115
97
  ):
98
+ assets = AssetList.load(Path(file_path).read_text())
116
99
  return cls(
117
- AssetList.load(Path(file_path).read_text()),
100
+ assets,
118
101
  namespace,
119
102
  to_type,
120
- limit,
103
+ total=len(assets),
104
+ limit=limit,
121
105
  unpack_metadata=unpack_metadata,
106
+ skip_metadata_values=skip_metadata_values,
122
107
  )
123
108
 
124
- def extract(self) -> Iterable[Triple]:
125
- """Extracts an asset with the given asset_id."""
126
- if self.total:
127
- try:
128
- from rich.progress import track
129
- except ModuleNotFoundError:
130
- to_iterate = self.assets
131
- else:
132
- to_iterate = track(
133
- self.assets,
134
- total=self.limit or self.total,
135
- description="Extracting Assets",
136
- )
137
- else:
138
- to_iterate = self.assets
139
- for no, asset in enumerate(to_iterate):
140
- yield from self._asset2triples(asset)
141
- if self.limit and no >= self.limit:
142
- break
143
-
144
- def _asset2triples(self, asset: Asset) -> list[Triple]:
109
+ def _item2triples(self, asset: Asset) -> list[Triple]:
145
110
  """Converts an asset to triples."""
146
111
  id_ = self.namespace[f"Asset_{asset.id}"]
147
112
 
148
- # Set rdf type
149
- type_ = "Asset"
150
- if self.to_type:
151
- type_ = self.to_type(asset) or type_
152
- type_ = self._SPACE_PATTERN.sub("_", type_)
113
+ type_ = self._get_rdf_type(asset)
153
114
 
154
115
  triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
155
116
 
@@ -195,20 +156,7 @@ class AssetsExtractor(BaseExtractor):
195
156
  )
196
157
 
197
158
  if asset.metadata:
198
- if self.unpack_metadata:
199
- for key, value in asset.metadata.items():
200
- if value and (
201
- self.skip_metadata_values is None or value.casefold() not in self.skip_metadata_values
202
- ):
203
- triples.append(
204
- (
205
- id_,
206
- self.namespace[key],
207
- Literal(string_to_ideal_type(value)),
208
- )
209
- )
210
- else:
211
- triples.append((id_, self.namespace.metadata, Literal(json.dumps(asset.metadata))))
159
+ triples.extend(self._metadata_to_triples(id_, asset.metadata))
212
160
 
213
161
  # Create connections:
214
162
  if asset.parent_id:
@@ -0,0 +1,102 @@
1
+ import json
2
+ import re
3
+ from abc import ABC, abstractmethod
4
+ from collections.abc import Callable, Iterable, Set
5
+ from typing import Generic, TypeVar
6
+
7
+ from cognite.client.data_classes._base import CogniteResource
8
+ from rdflib import Literal, Namespace, URIRef
9
+
10
+ from cognite.neat.constants import DEFAULT_NAMESPACE
11
+ from cognite.neat.graph.extractors._base import BaseExtractor
12
+ from cognite.neat.graph.models import Triple
13
+ from cognite.neat.utils.auxiliary import string_to_ideal_type
14
+
15
+ T_CogniteResource = TypeVar("T_CogniteResource", bound=CogniteResource)
16
+
17
+ DEFAULT_SKIP_METADATA_VALUES = frozenset({"nan", "null", "none", ""})
18
+
19
+
20
+ class ClassicCDFExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
21
+ """This is the Base Extractor for all classic CDF resources.
22
+
23
+ A classic resource is recognized in that it has a metadata attribute of type dict[str, str].
24
+
25
+ Args:
26
+ items (Iterable[T_CogniteResource]): An iterable of classic resource.
27
+ namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
28
+ to_type (Callable[[T_CogniteResource], str | None], optional): A function to convert an item to a type.
29
+ Defaults to None. If None or if the function returns None, the asset will be set to the default type.
30
+ total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
31
+ is installed. Defaults to None.
32
+ limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
33
+ testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
34
+ limit the extraction to 1000 assets to test the setup.
35
+ unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
36
+ a JSON string.
37
+ skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
38
+ values in this set will be skipped.
39
+ """
40
+
41
+ _default_rdf_type: str
42
+ _SPACE_PATTERN = re.compile(r"\s+")
43
+
44
+ def __init__(
45
+ self,
46
+ items: Iterable[T_CogniteResource],
47
+ namespace: Namespace | None = None,
48
+ to_type: Callable[[T_CogniteResource], str | None] | None = None,
49
+ total: int | None = None,
50
+ limit: int | None = None,
51
+ unpack_metadata: bool = True,
52
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
53
+ ):
54
+ self.namespace = namespace or DEFAULT_NAMESPACE
55
+ self.items = items
56
+ self.to_type = to_type
57
+ self.total = total
58
+ self.limit = min(limit, total) if limit and total else limit
59
+ self.unpack_metadata = unpack_metadata
60
+ self.skip_metadata_values = skip_metadata_values
61
+
62
+ def extract(self) -> Iterable[Triple]:
63
+ """Extracts an asset with the given asset_id."""
64
+ if self.total:
65
+ try:
66
+ from rich.progress import track
67
+ except ModuleNotFoundError:
68
+ to_iterate = self.items
69
+ else:
70
+ to_iterate = track(
71
+ self.items,
72
+ total=self.limit or self.total,
73
+ description=f"Extracting {type(self).__name__.removesuffix('Extractor')}",
74
+ )
75
+ else:
76
+ to_iterate = self.items
77
+ for no, asset in enumerate(to_iterate):
78
+ yield from self._item2triples(asset)
79
+ if self.limit and no >= self.limit:
80
+ break
81
+
82
+ @abstractmethod
83
+ def _item2triples(self, item: T_CogniteResource) -> list[Triple]:
84
+ raise NotImplementedError()
85
+
86
+ def _metadata_to_triples(self, id_: URIRef, metadata: dict[str, str]) -> Iterable[Triple]:
87
+ if self.unpack_metadata:
88
+ for key, value in metadata.items():
89
+ if value and (self.skip_metadata_values is None or value.casefold() not in self.skip_metadata_values):
90
+ yield (
91
+ id_,
92
+ self.namespace[key],
93
+ Literal(string_to_ideal_type(value)),
94
+ )
95
+ else:
96
+ yield id_, self.namespace.metadata, Literal(json.dumps(metadata))
97
+
98
+ def _get_rdf_type(self, item: T_CogniteResource) -> str:
99
+ type_ = self._default_rdf_type
100
+ if self.to_type:
101
+ type_ = self.to_type(item) or type_
102
+ return self._SPACE_PATTERN.sub("_", type_)
@@ -1,39 +1,36 @@
1
- import json
2
- from collections.abc import Iterable
1
+ from collections.abc import Callable, Set
3
2
  from datetime import datetime, timezone
4
3
  from pathlib import Path
5
- from typing import cast
6
4
 
7
5
  from cognite.client import CogniteClient
8
- from cognite.client.data_classes import Event, EventList
9
- from pydantic import AnyHttpUrl, ValidationError
10
- from rdflib import RDF, Literal, Namespace, URIRef
6
+ from cognite.client.data_classes import Event, EventFilter, EventList
7
+ from rdflib import RDF, Literal, Namespace
11
8
 
12
- from cognite.neat.constants import DEFAULT_NAMESPACE
13
- from cognite.neat.graph.extractors._base import BaseExtractor
14
9
  from cognite.neat.graph.models import Triple
15
- from cognite.neat.utils.auxiliary import string_to_ideal_type
16
10
 
11
+ from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
17
12
 
18
- class EventsExtractor(BaseExtractor):
13
+
14
+ class EventsExtractor(ClassicCDFExtractor[Event]):
19
15
  """Extract data from Cognite Data Fusions Events into Neat.
20
16
 
21
17
  Args:
22
- events (Iterable[Event]): An iterable of events.
18
+ items (Iterable[Event]): An iterable of items.
23
19
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
20
+ to_type (Callable[[Event], str | None], optional): A function to convert an item to a type.
21
+ Defaults to None. If None or if the function returns None, the asset will be set to the default type.
22
+ total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
23
+ is installed. Defaults to None.
24
+ limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
25
+ testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
26
+ limit the extraction to 1000 assets to test the setup.
24
27
  unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
25
28
  a JSON string.
29
+ skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
30
+ values in this set will be skipped.
26
31
  """
27
32
 
28
- def __init__(
29
- self,
30
- events: Iterable[Event],
31
- namespace: Namespace | None = None,
32
- unpack_metadata: bool = True,
33
- ):
34
- self.namespace = namespace or DEFAULT_NAMESPACE
35
- self.events = events
36
- self.unpack_metadata = unpack_metadata
33
+ _default_rdf_type = "Event"
37
34
 
38
35
  @classmethod
39
36
  def from_dataset(
@@ -41,15 +38,21 @@ class EventsExtractor(BaseExtractor):
41
38
  client: CogniteClient,
42
39
  data_set_external_id: str,
43
40
  namespace: Namespace | None = None,
41
+ to_type: Callable[[Event], str | None] | None = None,
42
+ limit: int | None = None,
44
43
  unpack_metadata: bool = True,
44
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
45
45
  ):
46
+ total = client.events.aggregate_count(filter=EventFilter(data_set_ids=[{"externalId": data_set_external_id}]))
47
+
46
48
  return cls(
47
- cast(
48
- Iterable[Event],
49
- client.events(data_set_external_ids=data_set_external_id),
50
- ),
49
+ client.events(data_set_external_ids=data_set_external_id),
51
50
  namespace,
52
- unpack_metadata,
51
+ to_type,
52
+ total=total,
53
+ limit=limit,
54
+ unpack_metadata=unpack_metadata,
55
+ skip_metadata_values=skip_metadata_values,
53
56
  )
54
57
 
55
58
  @classmethod
@@ -57,20 +60,30 @@ class EventsExtractor(BaseExtractor):
57
60
  cls,
58
61
  file_path: str,
59
62
  namespace: Namespace | None = None,
63
+ to_type: Callable[[Event], str | None] | None = None,
64
+ limit: int | None = None,
60
65
  unpack_metadata: bool = True,
66
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
61
67
  ):
62
- return cls(EventList.load(Path(file_path).read_text()), namespace, unpack_metadata)
68
+ events = EventList.load(Path(file_path).read_text())
63
69
 
64
- def extract(self) -> Iterable[Triple]:
65
- """Extract events as triples."""
66
- for event in self.events:
67
- yield from self._event2triples(event)
70
+ return cls(
71
+ events,
72
+ namespace,
73
+ to_type,
74
+ total=len(events),
75
+ limit=limit,
76
+ unpack_metadata=unpack_metadata,
77
+ skip_metadata_values=skip_metadata_values,
78
+ )
68
79
 
69
- def _event2triples(self, event: Event) -> list[Triple]:
80
+ def _item2triples(self, event: Event) -> list[Triple]:
70
81
  id_ = self.namespace[f"Event_{event.id}"]
71
82
 
83
+ type_ = self._get_rdf_type(event)
84
+
72
85
  # Set rdf type
73
- triples: list[Triple] = [(id_, RDF.type, self.namespace.Event)]
86
+ triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
74
87
 
75
88
  # Create attributes
76
89
 
@@ -87,16 +100,7 @@ class EventsExtractor(BaseExtractor):
87
100
  triples.append((id_, self.namespace.subtype, Literal(event.subtype)))
88
101
 
89
102
  if event.metadata:
90
- if self.unpack_metadata:
91
- for key, value in event.metadata.items():
92
- if value:
93
- type_aware_value = string_to_ideal_type(value)
94
- try:
95
- triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
96
- except ValidationError:
97
- triples.append((id_, self.namespace[key], Literal(type_aware_value)))
98
- else:
99
- triples.append((id_, self.namespace.metadata, Literal(json.dumps(event.metadata))))
103
+ triples.extend(self._metadata_to_triples(id_, event.metadata))
100
104
 
101
105
  if event.description:
102
106
  triples.append((id_, self.namespace.description, Literal(event.description)))
@@ -1,40 +1,37 @@
1
- import json
2
- from collections.abc import Iterable
1
+ from collections.abc import Callable, Set
3
2
  from datetime import datetime, timezone
4
3
  from pathlib import Path
5
- from typing import cast
6
4
  from urllib.parse import quote
7
5
 
8
6
  from cognite.client import CogniteClient
9
7
  from cognite.client.data_classes import FileMetadata, FileMetadataList
10
- from pydantic import AnyHttpUrl, ValidationError
11
- from rdflib import RDF, Literal, Namespace, URIRef
8
+ from rdflib import RDF, Literal, Namespace
12
9
 
13
- from cognite.neat.constants import DEFAULT_NAMESPACE
14
- from cognite.neat.graph.extractors._base import BaseExtractor
15
10
  from cognite.neat.graph.models import Triple
16
- from cognite.neat.utils.auxiliary import string_to_ideal_type
17
11
 
12
+ from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
18
13
 
19
- class FilesExtractor(BaseExtractor):
14
+
15
+ class FilesExtractor(ClassicCDFExtractor[FileMetadata]):
20
16
  """Extract data from Cognite Data Fusions files metadata into Neat.
21
17
 
22
18
  Args:
23
- files_metadata (Iterable[FileMetadata]): An iterable of files metadata.
19
+ items (Iterable[FileMetadata]): An iterable of items.
24
20
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
21
+ to_type (Callable[[FileMetadata], str | None], optional): A function to convert an item to a type.
22
+ Defaults to None. If None or if the function returns None, the asset will be set to the default type.
23
+ total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
24
+ is installed. Defaults to None.
25
+ limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
26
+ testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
27
+ limit the extraction to 1000 assets to test the setup.
25
28
  unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
26
29
  a JSON string.
30
+ skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
31
+ values in this set will be skipped.
27
32
  """
28
33
 
29
- def __init__(
30
- self,
31
- files_metadata: Iterable[FileMetadata],
32
- namespace: Namespace | None = None,
33
- unpack_metadata: bool = True,
34
- ):
35
- self.namespace = namespace or DEFAULT_NAMESPACE
36
- self.files_metadata = files_metadata
37
- self.unpack_metadata = unpack_metadata
34
+ _default_rdf_type = "File"
38
35
 
39
36
  @classmethod
40
37
  def from_dataset(
@@ -42,15 +39,18 @@ class FilesExtractor(BaseExtractor):
42
39
  client: CogniteClient,
43
40
  data_set_external_id: str,
44
41
  namespace: Namespace | None = None,
42
+ to_type: Callable[[FileMetadata], str | None] | None = None,
43
+ limit: int | None = None,
45
44
  unpack_metadata: bool = True,
45
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
46
46
  ):
47
47
  return cls(
48
- cast(
49
- Iterable[FileMetadata],
50
- client.files(data_set_external_ids=data_set_external_id),
51
- ),
52
- namespace,
53
- unpack_metadata,
48
+ client.files(data_set_external_ids=data_set_external_id),
49
+ namespace=namespace,
50
+ to_type=to_type,
51
+ limit=limit,
52
+ unpack_metadata=unpack_metadata,
53
+ skip_metadata_values=skip_metadata_values,
54
54
  )
55
55
 
56
56
  @classmethod
@@ -58,24 +58,29 @@ class FilesExtractor(BaseExtractor):
58
58
  cls,
59
59
  file_path: str,
60
60
  namespace: Namespace | None = None,
61
+ to_type: Callable[[FileMetadata], str | None] | None = None,
62
+ limit: int | None = None,
61
63
  unpack_metadata: bool = True,
64
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
62
65
  ):
66
+ file_metadata = FileMetadataList.load(Path(file_path).read_text())
63
67
  return cls(
64
- FileMetadataList.load(Path(file_path).read_text()),
65
- namespace,
66
- unpack_metadata,
68
+ file_metadata,
69
+ namespace=namespace,
70
+ to_type=to_type,
71
+ limit=limit,
72
+ total=len(file_metadata),
73
+ unpack_metadata=unpack_metadata,
74
+ skip_metadata_values=skip_metadata_values,
67
75
  )
68
76
 
69
- def extract(self) -> Iterable[Triple]:
70
- """Extract files metadata as triples."""
71
- for event in self.files_metadata:
72
- yield from self._file2triples(event)
73
-
74
- def _file2triples(self, file: FileMetadata) -> list[Triple]:
77
+ def _item2triples(self, file: FileMetadata) -> list[Triple]:
75
78
  id_ = self.namespace[f"File_{file.id}"]
76
79
 
80
+ type_ = self._get_rdf_type(file)
81
+
77
82
  # Set rdf type
78
- triples: list[Triple] = [(id_, RDF.type, self.namespace.File)]
83
+ triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
79
84
 
80
85
  # Create attributes
81
86
 
@@ -95,16 +100,7 @@ class FilesExtractor(BaseExtractor):
95
100
  triples.append((id_, self.namespace.source, Literal(file.source)))
96
101
 
97
102
  if file.metadata:
98
- if self.unpack_metadata:
99
- for key, value in file.metadata.items():
100
- if value:
101
- type_aware_value = string_to_ideal_type(value)
102
- try:
103
- triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
104
- except ValidationError:
105
- triples.append((id_, self.namespace[key], Literal(type_aware_value)))
106
- else:
107
- triples.append((id_, self.namespace.metadata, Literal(json.dumps(file.metadata))))
103
+ triples.extend(self._metadata_to_triples(id_, file.metadata))
108
104
 
109
105
  if file.source_created_time:
110
106
  triples.append(