cognite-neat 0.87.4__py3-none-any.whl → 0.88.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (132) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/data_classes/rest.py +0 -19
  3. cognite/neat/app/api/explorer.py +6 -4
  4. cognite/neat/app/api/routers/crud.py +11 -21
  5. cognite/neat/app/api/routers/workflows.py +24 -94
  6. cognite/neat/graph/extractors/_classic_cdf/_assets.py +8 -2
  7. cognite/neat/graph/extractors/_mock_graph_generator.py +2 -2
  8. cognite/neat/graph/loaders/_base.py +17 -12
  9. cognite/neat/graph/loaders/_rdf2asset.py +223 -58
  10. cognite/neat/graph/loaders/_rdf2dms.py +1 -1
  11. cognite/neat/graph/stores/_base.py +5 -0
  12. cognite/neat/rules/analysis/_asset.py +31 -1
  13. cognite/neat/rules/importers/_inference2rules.py +31 -35
  14. cognite/neat/rules/models/information/_rules.py +1 -1
  15. cognite/neat/workflows/steps/data_contracts.py +17 -43
  16. cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
  17. cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
  18. cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
  19. cognite/neat/workflows/steps_registry.py +5 -7
  20. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/METADATA +1 -1
  21. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/RECORD +24 -132
  22. cognite/neat/app/api/routers/core.py +0 -91
  23. cognite/neat/app/api/routers/data_exploration.py +0 -336
  24. cognite/neat/app/api/routers/rules.py +0 -203
  25. cognite/neat/legacy/__init__.py +0 -0
  26. cognite/neat/legacy/graph/__init__.py +0 -3
  27. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
  28. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
  29. cognite/neat/legacy/graph/examples/__init__.py +0 -10
  30. cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
  31. cognite/neat/legacy/graph/exceptions.py +0 -90
  32. cognite/neat/legacy/graph/extractors/__init__.py +0 -6
  33. cognite/neat/legacy/graph/extractors/_base.py +0 -14
  34. cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
  35. cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
  36. cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
  37. cognite/neat/legacy/graph/loaders/__init__.py +0 -23
  38. cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
  39. cognite/neat/legacy/graph/loaders/_base.py +0 -67
  40. cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
  41. cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
  42. cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
  43. cognite/neat/legacy/graph/loaders/core/models.py +0 -136
  44. cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
  45. cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
  46. cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
  47. cognite/neat/legacy/graph/loaders/validator.py +0 -87
  48. cognite/neat/legacy/graph/models.py +0 -6
  49. cognite/neat/legacy/graph/stores/__init__.py +0 -13
  50. cognite/neat/legacy/graph/stores/_base.py +0 -400
  51. cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
  52. cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
  53. cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
  54. cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
  55. cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
  56. cognite/neat/legacy/graph/transformations/__init__.py +0 -0
  57. cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
  58. cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
  59. cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
  60. cognite/neat/legacy/graph/transformations/transformer.py +0 -322
  61. cognite/neat/legacy/rules/__init__.py +0 -0
  62. cognite/neat/legacy/rules/analysis.py +0 -231
  63. cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
  64. cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
  65. cognite/neat/legacy/rules/examples/__init__.py +0 -18
  66. cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
  67. cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
  68. cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
  69. cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
  70. cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
  71. cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
  72. cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
  73. cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
  74. cognite/neat/legacy/rules/exceptions.py +0 -2972
  75. cognite/neat/legacy/rules/exporters/__init__.py +0 -20
  76. cognite/neat/legacy/rules/exporters/_base.py +0 -45
  77. cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
  78. cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
  79. cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
  80. cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
  81. cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
  82. cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
  83. cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
  84. cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
  85. cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
  86. cognite/neat/legacy/rules/exporters/_validation.py +0 -146
  87. cognite/neat/legacy/rules/importers/__init__.py +0 -22
  88. cognite/neat/legacy/rules/importers/_base.py +0 -66
  89. cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
  90. cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
  91. cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
  92. cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
  93. cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
  94. cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
  95. cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
  96. cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
  97. cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
  98. cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
  99. cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
  100. cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
  101. cognite/neat/legacy/rules/models/__init__.py +0 -5
  102. cognite/neat/legacy/rules/models/_base.py +0 -151
  103. cognite/neat/legacy/rules/models/raw_rules.py +0 -316
  104. cognite/neat/legacy/rules/models/rdfpath.py +0 -237
  105. cognite/neat/legacy/rules/models/rules.py +0 -1289
  106. cognite/neat/legacy/rules/models/tables.py +0 -9
  107. cognite/neat/legacy/rules/models/value_types.py +0 -118
  108. cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
  109. cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  110. cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  111. cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  112. cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
  113. cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  114. cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
  115. cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
  116. cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
  117. cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
  118. cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  119. cognite/neat/workflows/migration/__init__.py +0 -0
  120. cognite/neat/workflows/migration/steps.py +0 -91
  121. cognite/neat/workflows/migration/wf_manifests.py +0 -33
  122. cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
  123. cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
  124. cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
  125. cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
  126. cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
  127. cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
  128. cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
  129. cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
  130. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/LICENSE +0 -0
  131. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/WHEEL +0 -0
  132. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/entry_points.txt +0 -0
@@ -1,1289 +0,0 @@
1
- """This module contains the definition of `TransformationRules` pydantic model and all
2
- its sub-models and validators.
3
- """
4
-
5
- from __future__ import annotations
6
-
7
- import math
8
- import re
9
- import sys
10
- import warnings
11
- from collections.abc import ItemsView, Iterator, KeysView, ValuesView
12
- from datetime import datetime
13
- from functools import wraps
14
- from typing import Any, ClassVar, Generic, TypeAlias, TypeVar, cast
15
-
16
- import pandas as pd
17
- from pydantic import (
18
- BaseModel,
19
- ConfigDict,
20
- Field,
21
- HttpUrl,
22
- TypeAdapter,
23
- ValidationError,
24
- constr,
25
- field_validator,
26
- model_validator,
27
- validator,
28
- )
29
- from pydantic.fields import FieldInfo
30
- from rdflib import XSD, Literal, Namespace, URIRef
31
-
32
- from cognite.neat.constants import get_default_prefixes
33
- from cognite.neat.legacy.rules import exceptions
34
- from cognite.neat.legacy.rules.models._base import (
35
- ENTITY_ID_REGEX_COMPILED,
36
- VERSIONED_ENTITY_REGEX_COMPILED,
37
- ContainerEntity,
38
- EntityTypes,
39
- ParentClass,
40
- )
41
- from cognite.neat.legacy.rules.models.rdfpath import (
42
- AllReferences,
43
- Entity,
44
- Hop,
45
- RawLookup,
46
- SingleProperty,
47
- SPARQLQuery,
48
- TransformationRuleType,
49
- Traversal,
50
- parse_rule,
51
- )
52
- from cognite.neat.legacy.rules.models.value_types import (
53
- XSD_VALUE_TYPE_MAPPINGS,
54
- ValueType,
55
- )
56
-
57
- if sys.version_info >= (3, 11):
58
- from typing import Self
59
- else:
60
- from typing_extensions import Self
61
-
62
- __all__ = [
63
- "Class",
64
- "Classes",
65
- "Instance",
66
- "Metadata",
67
- "Prefixes",
68
- "Property",
69
- "Properties",
70
- "Resource",
71
- "Rules",
72
- ]
73
-
74
- METADATA_VALUE_MAX_LENGTH = 5120
75
-
76
-
77
- def replace_nan_floats_with_default(values: dict, model_fields: dict[str, FieldInfo]) -> dict:
78
- output = {}
79
- for field_name, value in values.items():
80
- is_nan_float = isinstance(value, float) and math.isnan(value)
81
- if not is_nan_float:
82
- output[field_name] = value.strip() if isinstance(value, str) else value
83
- continue
84
- if field_name in model_fields:
85
- output[field_name] = model_fields[field_name].default
86
- else:
87
- # field_name may be an alias
88
- source_name = next(
89
- (name for name, field in model_fields.items() if field.alias == field_name),
90
- None,
91
- )
92
- if source_name:
93
- output[field_name] = model_fields[source_name].default
94
- else:
95
- # Just pass it through if it is not an alias.
96
- output[field_name] = value.strip() if isinstance(value, str) else value
97
- return output
98
-
99
-
100
- def skip_field_validator(validators_field):
101
- def decorator(func):
102
- @wraps(func)
103
- def wrapper(cls, value, values):
104
- if isinstance(values, dict):
105
- to_skip = values.get(validators_field, set())
106
- else:
107
- try:
108
- to_skip = values.data.get(validators_field, set())
109
- except Exception:
110
- to_skip = set()
111
-
112
- if "all" in to_skip or func.__name__ in to_skip:
113
- return value
114
- return func(cls, value, values)
115
-
116
- return wrapper
117
-
118
- return decorator
119
-
120
-
121
- def skip_model_validator(validators_field):
122
- def decorator(func):
123
- @wraps(func)
124
- def wrapper(self):
125
- to_skip = getattr(self, validators_field, set())
126
- if "all" in to_skip or func.__name__ in to_skip:
127
- return self
128
-
129
- return func(self)
130
-
131
- return wrapper
132
-
133
- return decorator
134
-
135
-
136
- class RuleModel(BaseModel):
137
- model_config: ClassVar[ConfigDict] = ConfigDict(
138
- populate_by_name=True,
139
- str_strip_whitespace=True,
140
- arbitrary_types_allowed=True,
141
- strict=False,
142
- extra="allow",
143
- )
144
- validators_to_skip: set[str] = Field(default_factory=set, exclude=True)
145
-
146
- @classmethod
147
- def mandatory_fields(cls, use_alias=False) -> set[str]:
148
- """Returns a set of mandatory fields for the model."""
149
- return _get_required_fields(cls, use_alias)
150
-
151
-
152
- def _get_required_fields(model: type[BaseModel], use_alias: bool = False) -> set[str]:
153
- """Get required fields from a pydantic model.
154
-
155
- Parameters
156
- ----------
157
- model : type[BaseModel]
158
- Pydantic data model
159
- use_alias : bool, optional
160
- Whether to return field alias name, by default False
161
-
162
- Returns
163
- -------
164
- list[str]
165
- List of required fields
166
- """
167
- required_fields = set()
168
- for name, field in model.model_fields.items():
169
- if not field.is_required():
170
- continue
171
-
172
- alias = getattr(field, "alias", None)
173
- if use_alias and alias:
174
- required_fields.add(alias)
175
- else:
176
- required_fields.add(name)
177
- return required_fields
178
-
179
-
180
- class URL(BaseModel):
181
- url: HttpUrl
182
-
183
-
184
- ########################################################################################
185
- ### These highly depend on CDF API endpoint limitations we need to keep them updated ###
186
- ########################################################################################
187
- Description: TypeAlias = constr(min_length=1, max_length=1024) # type: ignore[valid-type]
188
-
189
- more_than_one_none_alphanumerics_regex = r"([_-]{2,})"
190
-
191
- prefix_compliance_regex = r"^([a-zA-Z]+)([a-zA-Z0-9]*[_-]{0,1}[a-zA-Z0-9_-]*)([a-zA-Z0-9]*)$"
192
- data_model_id_compliance_regex = r"^[a-zA-Z]([a-zA-Z0-9_]{0,253}[a-zA-Z0-9])?$"
193
- cdf_space_compliance_regex = (
194
- r"(?!^(space|cdf|dms|pg3|shared|system|node|edge)$)(^[a-zA-Z][a-zA-Z0-9_-]{0,41}[a-zA-Z0-9]?$)"
195
- )
196
-
197
- view_id_compliance_regex = (
198
- r"(?!^(Query|Mutation|Subscription|String|Int32|Int64|Int|Float32|Float64|Float|"
199
- r"Timestamp|JSONObject|Date|Numeric|Boolean|PageInfo|File|Sequence|TimeSeries)$)"
200
- r"(^[a-zA-Z][a-zA-Z0-9_]{0,253}[a-zA-Z0-9]?$)"
201
- )
202
-
203
- value_id_compliance_regex = r"(^[a-zA-Z][a-zA-Z0-9_]{0,253}[a-zA-Z0-9]?$)"
204
-
205
- dms_property_id_compliance_regex = (
206
- r"(?!^(space|externalId|createdTime|lastUpdatedTime|deletedTime|edge_id|"
207
- r"node_id|project_id|property_group|seq|tg_table_name|extensions)$)"
208
- r"(^[a-zA-Z][a-zA-Z0-9_]{0,253}[a-zA-Z0-9]?$)"
209
- )
210
-
211
-
212
- class_id_compliance_regex = r"(?!^(Class|class)$)(^[a-zA-Z][a-zA-Z0-9._-]{0,253}[a-zA-Z0-9]?$)"
213
- property_id_compliance_regex = r"^(\*)|(?!^(Property|property)$)(^[a-zA-Z][a-zA-Z0-9._-]{0,253}[a-zA-Z0-9]?$)"
214
-
215
- version_compliance_regex = r"^[a-zA-Z0-9]([.a-zA-Z0-9_-]{0,41}[a-zA-Z0-9])?$"
216
- ########################################################################################
217
- ########################################################################################
218
-
219
-
220
- Prefix: TypeAlias = str
221
- ExternalId: TypeAlias = str
222
-
223
-
224
- class Metadata(RuleModel):
225
- """
226
- Metadata model for data model
227
-
228
- Args:
229
- prefix: This is used as prefix for generation of RDF OWL/SHACL data model representation
230
- suffix: Suffix is used as the data model external id when resolving rules as CDF data model
231
- namespace: This is used as RDF namespace for generation of RDF OWL/SHACL data model representation and/or for
232
- generation of RDF graphs
233
- title: This is used as data model name in CDF, or as a data model title in RDF
234
- version: This is used as RDF and CDF data model version
235
- created: This is used as RDF data model creation date for generation of RDF OWL/SHACL data model representation
236
- updated: This is used as RDF data model update date for generation of RDF OWL/SHACL data model representation
237
- description: This is used as RDF data model description for generation of RDF
238
- OWL/SHACL data model representation
239
- creator: This is used as RDF data model creator for generation of RDF OWL/SHACL data model representation
240
- contributor: This is used as RDF data model contributor for generation of
241
- RDF OWL/SHACL data model representation
242
- rights: This is used as RDF data model rights for generation of RDF OWL/SHACL data model representation
243
- """
244
-
245
- prefix: Prefix = Field(
246
- alias="space",
247
- description=(
248
- "This is used as prefix for generation of RDF OWL/SHACL data model representation"
249
- " and/or as CDF space name to which model is intend to be stored"
250
- ),
251
- )
252
-
253
- suffix: ExternalId | None = Field(
254
- description=(
255
- "Suffix is used as the data model external id when resolving rules as CDF data model"
256
- " This field is optional and if not provided it will be generated from prefix."
257
- ),
258
- alias="external_id",
259
- default=None,
260
- min_length=1,
261
- max_length=255,
262
- )
263
-
264
- namespace: Namespace | None = Field(
265
- description="This is used as RDF namespace for generation of RDF OWL/SHACL data model representation "
266
- "and/or for generation of RDF graphs.",
267
- min_length=1,
268
- max_length=2048,
269
- default=None,
270
- )
271
-
272
- version: str = Field(min_length=1, max_length=43)
273
- title: str | None = Field(alias="name", min_length=1, max_length=255, default=None)
274
-
275
- description: Description | None = None
276
-
277
- created: datetime = Field(default_factory=lambda: datetime.utcnow())
278
- updated: datetime = Field(default_factory=lambda: datetime.utcnow())
279
-
280
- creator: str | list[str] | None = None
281
- contributor: str | list[str] | None = None
282
- rights: str | None = "Restricted for Internal Use of Cognite"
283
- license: str | None = "Proprietary License"
284
-
285
- @field_validator("contributor", "contributor", "description", "rights", mode="before")
286
- def replace_float_nan_with_default(cls, value, info):
287
- if isinstance(value, float) and math.isnan(value):
288
- return cls.model_fields[info.field_name].default
289
- return value
290
-
291
- @field_validator("version", mode="before")
292
- def convert_to_string(cls, value):
293
- return str(value)
294
-
295
- @validator("prefix", always=True)
296
- @skip_field_validator("validators_to_skip")
297
- def is_prefix_compliant(cls, value, values):
298
- if re.search(more_than_one_none_alphanumerics_regex, value):
299
- raise exceptions.MoreThanOneNonAlphanumericCharacter("prefix", value).to_pydantic_custom_error()
300
- if not re.match(cdf_space_compliance_regex, value):
301
- raise exceptions.PrefixRegexViolation(value, cdf_space_compliance_regex).to_pydantic_custom_error()
302
- else:
303
- return value
304
-
305
- @validator("suffix", always=True)
306
- @skip_field_validator("validators_to_skip")
307
- def set_suffix_if_none(cls, value, values):
308
- if value is not None:
309
- return value
310
- warnings.warn(
311
- exceptions.DataModelIdMissing(values["prefix"].replace("-", "_")).message,
312
- category=exceptions.DataModelIdMissing,
313
- stacklevel=2,
314
- )
315
- return values["prefix"].replace("-", "_")
316
-
317
- @validator("suffix", always=True)
318
- @skip_field_validator("validators_to_skip")
319
- def is_suffix_compliant(cls, value, values):
320
- if re.search(more_than_one_none_alphanumerics_regex, value):
321
- raise exceptions.MoreThanOneNonAlphanumericCharacter("suffix", value).to_pydantic_custom_error()
322
- if not re.match(data_model_id_compliance_regex, value):
323
- raise exceptions.DataModelIdRegexViolation(value, data_model_id_compliance_regex).to_pydantic_custom_error()
324
- else:
325
- return value
326
-
327
- @validator("namespace", always=True)
328
- @skip_field_validator("validators_to_skip")
329
- def set_namespace_if_none(cls, value, values):
330
- if value is None:
331
- suffix = f"/{values['suffix']}" if values["prefix"] != values["suffix"] else ""
332
- return Namespace(f"http://purl.org/cognite/{values['prefix']}{suffix}#")
333
- try:
334
- return Namespace(TypeAdapter(HttpUrl).validate_python(value))
335
- except ValidationError as e:
336
- raise exceptions.MetadataSheetNamespaceNotValidURL(value).to_pydantic_custom_error() from e
337
-
338
- @validator("namespace", always=True)
339
- @skip_field_validator("validators_to_skip")
340
- def fix_namespace_ending(cls, value, values):
341
- if value.endswith("#") or value.endswith("/"):
342
- return value
343
- warnings.warn(
344
- exceptions.NamespaceEndingFixed(value).message,
345
- category=exceptions.NamespaceEndingFixed,
346
- stacklevel=2,
347
- )
348
- return Namespace(f"{value}#")
349
-
350
- @validator("title", always=True)
351
- @skip_field_validator("validators_to_skip")
352
- def set_title_if_none(cls, value, values):
353
- if value is not None:
354
- return value
355
- elif values["suffix"]:
356
- return values["suffix"]
357
- else:
358
- return values["prefix"]
359
-
360
- @validator("creator", always=True)
361
- @skip_field_validator("validators_to_skip")
362
- def set_creator_if_none(cls, value, values):
363
- if value is not None:
364
- return value
365
- else:
366
- return ["neat"]
367
-
368
- @validator("contributor", always=True)
369
- @skip_field_validator("validators_to_skip")
370
- def set_contributor_if_none(cls, value, values):
371
- if value is not None:
372
- return value
373
- else:
374
- return ["Cognite"]
375
-
376
- @validator("version", always=True)
377
- @skip_field_validator("validators_to_skip")
378
- def is_version_compliant(cls, value, values):
379
- if not re.match(version_compliance_regex, value):
380
- raise exceptions.VersionRegexViolation(value, version_compliance_regex).to_pydantic_custom_error()
381
- else:
382
- return value
383
-
384
- @field_validator("creator", "contributor", mode="before")
385
- def to_list_if_comma(cls, value, values):
386
- if isinstance(value, str):
387
- if value:
388
- return value.replace(", ", ",").split(",")
389
- if cls.model_fields[values.field_name].default is None:
390
- return None
391
- return value
392
-
393
- @property
394
- def space(self) -> str:
395
- """Returns data model space."""
396
- return cast(str, self.prefix)
397
-
398
- @property
399
- def external_id(self) -> str:
400
- """Returns data model external."""
401
- return cast(str, self.suffix)
402
-
403
- @property
404
- def name(self) -> str:
405
- """Returns data model name."""
406
- return cast(str, self.title)
407
-
408
- def to_pandas(self) -> pd.Series:
409
- """Converts Metadata to pandas Series."""
410
- return pd.Series(self.model_dump())
411
-
412
- def _repr_html_(self) -> str:
413
- """Returns HTML representation of Metadata."""
414
- return self.to_pandas().to_frame("value")._repr_html_() # type: ignore[operator]
415
-
416
-
417
- class Resource(RuleModel):
418
- """
419
- Base class for resources that constitute data model (i.e., classes, properties)
420
-
421
- Args:
422
- description: The description of the resource.
423
- cdf_resource_type: The CDF resource type to which resource resolves to
424
- deprecated: Whether the resource is deprecated or not.
425
- deprecation_date: The date when the resource was deprecated.
426
- replaced_by: The resource that replaced this resource.
427
- source: Source of information for given resource
428
- source_entity_name: The name of the source entity that is closest to the resource being described.
429
- match_type: The match type of the resource being described and the source entity.
430
- comment: Additional comment about mapping between the resource being described and the source entity.
431
-
432
- """
433
-
434
- # Solution model
435
- description: Description | None = Field(alias="Description", default=None)
436
-
437
- # Solution CDF resource, it is not needed when working with FDM, this is only for
438
- # Classic CDF data model
439
- cdf_resource_type: list[str] | str | None = Field(alias="Resource Type", default=None)
440
-
441
- # Advance data modeling: Keeping track if Resource got deprecated or not
442
- deprecated: bool = Field(default=False)
443
- deprecation_date: datetime | None = Field(alias="deprecationDate", default=None)
444
- replaced_by: str | None = Field(alias="replacedBy", default=None)
445
-
446
- # Advance data modeling: Relation to existing resources for purpose of mapping
447
- source: HttpUrl | None = Field(
448
- alias="Source",
449
- description=(
450
- "Source of information for given entity, e.g. https://www.entsoe.eu/digital/common-information-model/"
451
- ),
452
- default=None,
453
- )
454
- source_entity_name: str | None = Field(
455
- alias="Source Entity Name",
456
- description="Closest entity in source, e.g. Substation",
457
- default=None,
458
- )
459
- match_type: str | None = Field(
460
- alias="Match Type",
461
- description="Type of match between source entity and one being defined",
462
- default=None,
463
- )
464
- comment: str | None = Field(alias="Comment", description="Comment about mapping", default=None)
465
-
466
- @model_validator(mode="before")
467
- def replace_float_nan_with_default(cls, values: dict) -> dict:
468
- return replace_nan_floats_with_default(values, cls.model_fields)
469
-
470
-
471
- T_Resource = TypeVar("T_Resource", bound=Resource)
472
-
473
-
474
- class ResourceDict(BaseModel, Generic[T_Resource]):
475
- data: dict[str, T_Resource] = Field(default_factory=dict)
476
-
477
- def __getitem__(self, item: str) -> T_Resource:
478
- return self.data[item]
479
-
480
- def __setitem__(self, key: str, value: T_Resource):
481
- self.data[key] = value
482
-
483
- def __contains__(self, item: str) -> bool:
484
- return item in self.data
485
-
486
- def __len__(self) -> int:
487
- return len(self.data)
488
-
489
- def __iter__(self) -> Iterator[str]: # type: ignore[override]
490
- return iter(self.data)
491
-
492
- def values(self) -> ValuesView[T_Resource]:
493
- return self.data.values()
494
-
495
- def keys(self) -> KeysView[str]:
496
- return self.data.keys()
497
-
498
- def items(self) -> ItemsView[str, T_Resource]:
499
- return self.data.items()
500
-
501
- def to_pandas(self, drop_na_columns: bool = True, include: list[str] | None = None) -> pd.DataFrame:
502
- """Converts ResourceDict to pandas DataFrame."""
503
- df = pd.DataFrame([class_.model_dump() for class_ in self.data.values()])
504
- if drop_na_columns:
505
- df = df.dropna(axis=1, how="all")
506
- if include is not None:
507
- df = df[include]
508
- return df
509
-
510
- def groupby(self, by: str) -> dict[str, ResourceDict[T_Resource]]:
511
- """Groups ResourceDict by given column(s)."""
512
- groups: dict[str, ResourceDict[T_Resource]] = {}
513
- for key, resource in self.data.items():
514
- value = getattr(resource, by)
515
- if value not in groups:
516
- groups[value] = ResourceDict()
517
- groups[value][key] = resource
518
- return groups
519
-
520
- def _repr_html_(self) -> str:
521
- """Returns HTML representation of ResourceDict."""
522
- return self.to_pandas(drop_na_columns=True)._repr_html_() # type: ignore[operator]
523
-
524
-
525
- class Class(Resource):
526
- """
527
- Base class for all classes that are part of the data model.
528
-
529
- Args:
530
- class_id: The class ID of the class.
531
- class_name: The name of the class.
532
- parent_class: The parent class of the class.
533
- """
534
-
535
- class_id: ExternalId = Field(alias="Class", min_length=1, max_length=255)
536
- class_name: ExternalId | None = Field(alias="Name", default=None, min_length=1, max_length=255)
537
- # Solution model
538
- parent_class: list[ParentClass] | None = Field(alias="Parent Class", default=None)
539
- # Todo: Remove? Does not seem to be used anywhere
540
- filter_: str | None = Field(alias="Filter", default=None, min_length=1)
541
-
542
- @model_validator(mode="before")
543
- def replace_nan_floats_with_default(cls, values: dict) -> dict:
544
- return replace_nan_floats_with_default(values, cls.model_fields)
545
-
546
- @validator("class_id", always=True)
547
- @skip_field_validator("validators_to_skip")
548
- def is_class_id_compliant(cls, value, values):
549
- if re.search(more_than_one_none_alphanumerics_regex, value):
550
- raise exceptions.MoreThanOneNonAlphanumericCharacter("class_id", value).to_pydantic_custom_error()
551
- if not re.match(class_id_compliance_regex, value):
552
- raise exceptions.ClassSheetClassIDRegexViolation(
553
- value, class_id_compliance_regex
554
- ).to_pydantic_custom_error()
555
- else:
556
- return value
557
-
558
- @validator("class_name", always=True)
559
- def set_class_name_if_none(cls, value, values):
560
- if value is None:
561
- if "class_id" not in values:
562
- raise exceptions.ClassIDMissing().to_pydantic_custom_error()
563
- warnings.warn(
564
- exceptions.ClassNameNotProvided(values["class_id"]).message,
565
- category=exceptions.ClassNameNotProvided,
566
- stacklevel=2,
567
- )
568
- value = values["class_id"]
569
- return value
570
-
571
- @field_validator("parent_class", mode="before")
572
- @skip_field_validator("validators_to_skip")
573
- def parent_class_to_list_of_entities(cls, value, values):
574
- if isinstance(value, str) and value:
575
- parent_classes = []
576
- for v in value.replace(", ", ",").split(","):
577
- if ENTITY_ID_REGEX_COMPILED.match(v) or VERSIONED_ENTITY_REGEX_COMPILED.match(v):
578
- parent_classes.append(ParentClass.from_string(entity_string=v))
579
- else:
580
- # if all fails defaults "neat" object which ends up being updated to proper
581
- # prefix and version upon completion of Rules validation
582
- parent_classes.append(ParentClass(prefix="undefined", suffix=v, name=v))
583
-
584
- return parent_classes
585
- else:
586
- return None
587
-
588
- @field_validator("parent_class", mode="after")
589
- @skip_field_validator("validators_to_skip")
590
- def is_parent_class_id_compliant(cls, value, values):
591
- if isinstance(value, list):
592
- if illegal_ids := [v for v in value if re.search(more_than_one_none_alphanumerics_regex, v.suffix)]:
593
- raise exceptions.MoreThanOneNonAlphanumericCharacter(
594
- "parent_class", ", ".join(illegal_ids)
595
- ).to_pydantic_custom_error()
596
- if illegal_ids := [v for v in value if not re.match(class_id_compliance_regex, v.suffix)]:
597
- for v in illegal_ids:
598
- print(v.id)
599
- raise exceptions.ClassSheetParentClassIDRegexViolation(
600
- illegal_ids, class_id_compliance_regex
601
- ).to_pydantic_custom_error()
602
- return value
603
-
604
-
605
- class Classes(ResourceDict[Class]):
606
- """This represents a collection of classes that are part of the data model."""
607
-
608
- ...
609
-
610
-
611
- class Property(Resource):
612
- """
613
- A property is a characteristic of a class. It is a named attribute of a class that describes a range of values
614
- or a relationship to another class.
615
-
616
- Args:
617
- class_id: Class ID to which property belongs
618
- property_id: Property ID of the property
619
- property_name: Property name. Defaults to property_id
620
- expected_value_type: Expected value type of the property
621
- min_count: Minimum count of the property values. Defaults to 0
622
- max_count: Maximum count of the property values. Defaults to None
623
- default: Default value of the property
624
- property_type: Property type (DatatypeProperty/attribute or ObjectProperty/edge/relationship)
625
- cdf_resource_type: CDF resource to under which property will be resolved to (e.g., Asset)
626
- resource_type_property: To what property of CDF resource given property resolves to (e.g., Asset name)
627
- source_type: In case if property resolves as CDF relationship, this argument indicates
628
- relationship source type (defaults to Asset)
629
- target_type: In case if property resolves as CDF relationship, this argument
630
- indicates relationship target type (defaults to Asset)
631
- label: CDF Label used for relationship. Defaults to property_id
632
- relationship_external_id_rule: Rule to use when generating CDF relationship externalId
633
- rule_type: Rule type for the transformation from source to target representation
634
- of knowledge graph. Defaults to None (no transformation)
635
- rule: Actual rule for the transformation from source to target representation of
636
- knowledge graph. Defaults to None (no transformation)
637
- skip_rule: Flag indicating if rule should be skipped when performing
638
- knowledge graph transformations. Defaults to False
639
-
640
- """
641
-
642
- # Solution model
643
- class_id: ExternalId = Field(alias="Class", min_length=1, max_length=255)
644
- property_id: ExternalId = Field(alias="Property", min_length=1, max_length=255)
645
- property_name: ExternalId | None = Field(alias="Name", default=None, min_length=1, max_length=255)
646
- expected_value_type: ValueType = Field(alias="Type")
647
- min_count: int | None = Field(alias="Min Count", default=0)
648
- max_count: int | None = Field(alias="Max Count", default=None)
649
- default: Any | None = Field(alias="Default", default=None)
650
-
651
- # OWL property
652
- property_type: str = EntityTypes.data_property
653
-
654
- # Core CDF resources (Asset, Relationship, and Labels)
655
- resource_type_property: list[str] | None = Field(
656
- alias="Resource Type Property",
657
- default=None,
658
- description="This is what property to resolve to in CDF resource, for "
659
- "example f cdf_resource_type is 'Asset', then this could"
660
- "be 'name' or 'description'. Note you can specify "
661
- "multiple properties ['name', 'metadata'] which would store"
662
- "this property twice in CDF, once as 'name' and once as 'metadata",
663
- )
664
- source_type: str = Field(alias="Relationship Source Type", default="Asset")
665
- target_type: str = Field(alias="Relationship Target Type", default="Asset")
666
- label: str | None = Field(alias="Relationship Label", default=None)
667
- relationship_external_id_rule: str | None = Field(alias="Relationship ExternalID Rule", default=None)
668
- # Specialization of cdf_resource_type to allow definition of both
669
- # Asset and Relationship at the same time
670
- cdf_resource_type: list[str] = Field(
671
- alias="Resource Type",
672
- default_factory=list,
673
- description="This is typically 'Asset' or 'Relationship'",
674
- )
675
-
676
- # Transformation rule (domain to solution)
677
- rule_type: TransformationRuleType | None = Field(alias="Rule Type", default=None)
678
- rule: str | AllReferences | SingleProperty | Hop | RawLookup | SPARQLQuery | Traversal | None = Field(
679
- alias="Rule", default=None
680
- )
681
- skip_rule: bool = Field(alias="Skip", default=False)
682
-
683
- # Container-specific things, only used for advance modeling or auto-filled by neat
684
- container: ContainerEntity | None = Field(alias="Container", default=None)
685
- container_property: str | None = Field(alias="Container Property", default=None)
686
- index: bool | None = Field(alias="Index", default=False)
687
- constraints: str | None = Field(alias="Constraints", default=None, min_length=1)
688
-
689
- @property
690
- def is_raw_lookup(self) -> bool:
691
- return self.rule_type == TransformationRuleType.rawlookup
692
-
693
- @model_validator(mode="before")
694
- def replace_float_nan_with_default(cls, values: dict) -> dict:
695
- return replace_nan_floats_with_default(values, cls.model_fields)
696
-
697
- @field_validator("container", mode="before")
698
- def container_string_to_entity(cls, value):
699
- if not value:
700
- return value
701
-
702
- if ENTITY_ID_REGEX_COMPILED.match(value) or VERSIONED_ENTITY_REGEX_COMPILED.match(value):
703
- return ContainerEntity.from_string(entity_string=value)
704
- else:
705
- return ContainerEntity(prefix="undefined", suffix=value, name=value)
706
-
707
- @field_validator("expected_value_type", mode="before")
708
- def expected_value_type_string_to_entity(cls, value):
709
- # handle simple types
710
- if value in XSD_VALUE_TYPE_MAPPINGS.keys():
711
- return XSD_VALUE_TYPE_MAPPINGS[value]
712
-
713
- # complex types correspond to relations to other classes
714
- if ENTITY_ID_REGEX_COMPILED.match(value) or VERSIONED_ENTITY_REGEX_COMPILED.match(value):
715
- return ValueType.from_string(entity_string=value, type_=EntityTypes.object_value_type, mapping=None)
716
- else:
717
- return ValueType(
718
- prefix="undefined",
719
- suffix=value,
720
- name=value,
721
- type_=EntityTypes.object_value_type,
722
- mapping=None,
723
- )
724
- # return ValueType(
725
-
726
- @validator("class_id", always=True)
727
- @skip_field_validator("validators_to_skip")
728
- def is_class_id_compliant(cls, value, values):
729
- if re.search(more_than_one_none_alphanumerics_regex, value):
730
- raise exceptions.MoreThanOneNonAlphanumericCharacter("class_id", value).to_pydantic_custom_error()
731
- if not re.match(class_id_compliance_regex, value):
732
- raise exceptions.PropertiesSheetClassIDRegexViolation(
733
- value, class_id_compliance_regex
734
- ).to_pydantic_custom_error()
735
- else:
736
- return value
737
-
738
- @validator("property_id", always=True)
739
- @skip_field_validator("validators_to_skip")
740
- def is_property_id_compliant(cls, value, values):
741
- if re.search(more_than_one_none_alphanumerics_regex, value):
742
- raise exceptions.MoreThanOneNonAlphanumericCharacter("property_id", value).to_pydantic_custom_error()
743
- if not re.match(property_id_compliance_regex, value):
744
- raise exceptions.PropertyIDRegexViolation(value, property_id_compliance_regex).to_pydantic_custom_error()
745
- else:
746
- return value
747
-
748
- @validator("expected_value_type", always=True)
749
- @skip_field_validator("validators_to_skip")
750
- def is_expected_value_type_compliant(cls, value, values):
751
- if re.search(more_than_one_none_alphanumerics_regex, value.suffix):
752
- raise exceptions.MoreThanOneNonAlphanumericCharacter(
753
- "expected_value_type", value
754
- ).to_pydantic_custom_error()
755
- if not re.match(class_id_compliance_regex, value.suffix):
756
- raise exceptions.ValueTypeIDRegexViolation(value, class_id_compliance_regex).to_pydantic_custom_error()
757
- else:
758
- return value
759
-
760
- @validator("rule_type", pre=True)
761
- def to_lowercase(cls, value):
762
- return value.casefold() if value else value
763
-
764
- @validator("skip_rule", pre=True)
765
- def from_string(cls, value):
766
- if isinstance(value, str):
767
- return value.casefold() in {"true", "skip", "yes", "y"}
768
- return value
769
-
770
- @validator("rule")
771
- @skip_field_validator("validators_to_skip")
772
- def is_valid_rule(cls, value, values):
773
- if rule_type := values.get("rule_type"):
774
- if not value:
775
- raise exceptions.RuleTypeProvidedButRuleMissing(
776
- values["property_id"], values["class_id"], values["rule_type"]
777
- ).to_pydantic_custom_error()
778
- _ = parse_rule(value, rule_type)
779
- return value
780
-
781
- @validator("resource_type_property", pre=True)
782
- def split_str(cls, v):
783
- if v:
784
- return [v.strip() for v in v.split(",")] if "," in v else [v]
785
-
786
- @field_validator("cdf_resource_type", mode="before")
787
- def to_list_if_comma(cls, value, info):
788
- if isinstance(value, str):
789
- if value:
790
- return value.replace(", ", ",").split(",")
791
- if cls.model_fields[info.field_name].default is None:
792
- return None
793
- return value
794
-
795
- # Setters
796
- # TODO: configure setters to only run if field_validators are successful, otherwise do not run them!
797
- @property
798
- def is_mandatory(self) -> bool:
799
- return self.min_count != 0
800
-
801
- @model_validator(mode="after")
802
- def set_property_type(self):
803
- if self.expected_value_type.type_ == EntityTypes.data_value_type:
804
- self.property_type = EntityTypes.data_property
805
- else:
806
- self.property_type = EntityTypes.object_property
807
- return self
808
-
809
- @model_validator(mode="after")
810
- def set_container_if_missing(self):
811
- if not self.container and (
812
- self.expected_value_type.type_ == EntityTypes.data_value_type or self.max_count == 1
813
- ):
814
- self.container = ContainerEntity(prefix="undefined", suffix=self.class_id, name=self.class_id)
815
- return self
816
-
817
- @model_validator(mode="after")
818
- def set_container_property_if_missing(self):
819
- if not self.container_property and (
820
- self.expected_value_type.type_ == EntityTypes.data_value_type or self.max_count == 1
821
- ):
822
- self.container_property = self.property_id
823
- return self
824
-
825
- @model_validator(mode="after")
826
- def set_property_name_if_none(self):
827
- if self.property_name is None:
828
- warnings.warn(
829
- exceptions.PropertyNameNotProvided(self.property_id).message,
830
- category=exceptions.PropertyNameNotProvided,
831
- stacklevel=2,
832
- )
833
- self.property_name = self.property_id
834
- return self
835
-
836
- @model_validator(mode="after")
837
- @skip_model_validator("validators_to_skip")
838
- def set_relationship_label(self):
839
- if self.label is None:
840
- warnings.warn(
841
- exceptions.MissingLabel(self.property_id).message,
842
- category=exceptions.MissingLabel,
843
- stacklevel=2,
844
- )
845
- self.label = self.property_id
846
- return self
847
-
848
- @model_validator(mode="after")
849
- @skip_model_validator("validators_to_skip")
850
- def set_skip_rule(self):
851
- if self.rule_type is None:
852
- warnings.warn(
853
- exceptions.NoTransformationRules(class_id=self.class_id, property_id=self.property_id).message,
854
- category=exceptions.NoTransformationRules,
855
- stacklevel=2,
856
- )
857
- self.skip_rule = True
858
- else:
859
- self.skip_rule = False
860
- return self
861
-
862
- @model_validator(mode="after")
863
- def set_default_as_list(self):
864
- if (
865
- self.property_type == "DatatypeProperty"
866
- and self.default
867
- and self.max_count
868
- and self.max_count != 1
869
- and not isinstance(self.default, list)
870
- ):
871
- warnings.warn(
872
- exceptions.DefaultValueNotList(self.property_id).message,
873
- category=exceptions.DefaultValueNotList,
874
- stacklevel=2,
875
- )
876
- if isinstance(self.default, str):
877
- if self.default:
878
- self.default = self.default.replace(", ", ",").split(",")
879
- else:
880
- self.default = [self.default]
881
- return self
882
-
883
- @model_validator(mode="after")
884
- @skip_model_validator("validators_to_skip")
885
- def is_default_value_type_proper(self):
886
- if self.property_type == "DatatypeProperty" and self.default:
887
- default_value = self.default[0] if isinstance(self.default, list) else self.default
888
-
889
- if type(default_value) != self.expected_value_type.python:
890
- try:
891
- if isinstance(self.default, list):
892
- updated_list = []
893
- for value in self.default:
894
- updated_list.append(self.expected_value_type.python(value))
895
- self.default = updated_list
896
- else:
897
- self.default = self.expected_value_type.python(self.default)
898
-
899
- except Exception:
900
- exceptions.DefaultValueTypeNotProper(
901
- self.property_id,
902
- type(self.default),
903
- self.expected_value_type.python,
904
- )
905
- return self
906
-
907
-
908
- class Properties(ResourceDict[Property]):
909
- """This represents a collection of properties that are part of the data model."""
910
-
911
- ...
912
-
913
-
914
- class Prefixes(RuleModel):
915
- """
916
- Class deals with prefixes used in the data model and data model instances
917
-
918
- Args:
919
- prefixes: Dict of prefixes
920
- """
921
-
922
- prefixes: dict[str, Namespace] = get_default_prefixes()
923
-
924
-
925
- class Instance(RuleModel):
926
- """
927
- Class deals with RDF triple that defines data model instances of data, represented
928
- as a single row in the `Instances` sheet of the Excel file.
929
-
930
- Args:
931
- instance: URI of the instance
932
- property_: URI of the property
933
- value: value of the property
934
- namespace: namespace of the instance
935
- prefixes: prefixes of the instance
936
-
937
- !!! note "Warning"
938
- Use of the `Instances` sheet is not recommended, instead if you need additional
939
- triples in your graph use Graph Capturing Sheet instead!
940
-
941
- See
942
- [`extract_graph_from_sheet`](../graph/extractors.md#cognite.neat.graph.extractors.extract_graph_from_sheet)
943
- for more details.
944
- """
945
-
946
- instance: URIRef | None = Field(alias="Instance", default=None)
947
- property_: URIRef | None = Field(alias="Property", default=None)
948
- value: Literal | URIRef | None = Field(alias="Value", default=None)
949
- namespace: Namespace
950
- prefixes: dict[str, Namespace]
951
-
952
- @staticmethod
953
- def get_value(value, prefixes) -> URIRef | Literal:
954
- try:
955
- url = URL(url=value).url
956
- return URIRef(str(url))
957
- except ValidationError:
958
- try:
959
- entity = Entity.from_string(value)
960
- return URIRef(prefixes[entity.prefix][entity.suffix])
961
- except ValueError:
962
- return value
963
-
964
- @model_validator(mode="before")
965
- def convert_values(cls, values: dict):
966
- # we expect to read Excel sheet which contains naming convention of column
967
- # 'Instance', 'Property', 'Value', if that's not the case we should raise error
968
- if not {"Instance", "Property", "Value"}.issubset(set(values.keys())):
969
- raise TypeError("We only support inputs from the transformation rule Excel sheet!!!")
970
-
971
- namespace = values["namespace"]
972
- prefixes = values["prefixes"]
973
-
974
- values["Instance"] = cls.get_value(values["Instance"], prefixes)
975
- values["Instance"] = (
976
- values["Instance"] if isinstance(values["Instance"], URIRef) else URIRef(namespace[values["Instance"]])
977
- )
978
-
979
- values["Property"] = cls.get_value(values["Property"], prefixes)
980
- values["Property"] = (
981
- values["Property"] if isinstance(values["Property"], URIRef) else URIRef(namespace[values["Property"]])
982
- )
983
-
984
- if isinstance(values["Value"], str):
985
- values["Value"] = cls.get_value(values["Value"], prefixes)
986
- if not isinstance(values["Value"], URIRef):
987
- datatype = (
988
- XSD.integer
989
- if cls.isint(values["Value"])
990
- else XSD.float
991
- if cls.isfloat(values["Value"])
992
- else XSD.string
993
- )
994
- values["Value"] = Literal(values["Value"], datatype=datatype)
995
- elif isinstance(values["Value"], float):
996
- values["Value"] = Literal(values["Value"], datatype=XSD.float)
997
- elif isinstance(values["Value"], int):
998
- values["Value"] = Literal(values["Value"], datatype=XSD.integer)
999
- elif isinstance(values["Value"], bool):
1000
- values["Value"] = Literal(values["Value"], datatype=XSD.boolean)
1001
- elif isinstance(values["Value"], datetime):
1002
- values["Value"] = Literal(values["Value"], datatype=XSD.dateTime)
1003
- else:
1004
- values["Value"] = Literal(values["Value"], datatype=XSD.string)
1005
-
1006
- return values
1007
-
1008
- @staticmethod
1009
- def isfloat(x):
1010
- try:
1011
- _ = float(x)
1012
- except (TypeError, ValueError):
1013
- return False
1014
- else:
1015
- return True
1016
-
1017
- @staticmethod
1018
- def isint(x):
1019
- try:
1020
- a = float(x)
1021
- b = int(a)
1022
- except (TypeError, ValueError):
1023
- return False
1024
- else:
1025
- return a == b
1026
-
1027
-
1028
- class Rules(RuleModel):
1029
- """
1030
- Rules is a core concept in `neat`. This represents fusion of data model
1031
- definitions and (optionally) the transformation rules used to transform the data/graph
1032
- from the source representation to the target representation defined by the data model.
1033
- The rules are defined in a Excel sheet and then parsed into a `Rules` object. The
1034
- `Rules` object is then used to generate data model and the`RDF` graph made of data
1035
- model instances.
1036
-
1037
- Args:
1038
- metadata: Data model metadata
1039
- classes: Classes defined in the data model
1040
- properties: Class properties defined in the data model with accompanying transformation rules
1041
- to transform data from source to target representation
1042
- prefixes: Prefixes used in the data model. Defaults to internal prefixes
1043
- instances: Instances defined in the data model. Defaults to None
1044
- validators_to_skip: List of validators to skip. Defaults to []
1045
-
1046
- !!! note "Importers"
1047
- Neat supports importing data from different sources. See the importers section for more details.
1048
-
1049
- !!! note "Exporters"
1050
- Neat supports exporting data to different sources. See the exporters section for more details.
1051
-
1052
- !!! note "validators_to_skip" use this only if you are sure what you are doing
1053
- """
1054
-
1055
- metadata: Metadata
1056
- classes: Classes
1057
- properties: Properties
1058
- prefixes: dict[str, Namespace] = get_default_prefixes()
1059
- instances: list[Instance] = Field(default_factory=list)
1060
-
1061
- @property
1062
- def raw_tables(self) -> list[str]:
1063
- return list(
1064
- {
1065
- parse_rule(rule.rule, TransformationRuleType.rawlookup).table.name # type: ignore[arg-type, attr-defined]
1066
- for rule in self.properties.values()
1067
- if rule.is_raw_lookup
1068
- }
1069
- )
1070
-
1071
- @field_validator("instances", mode="before")
1072
- def none_as_empty_list(cls, value):
1073
- if value is None:
1074
- return []
1075
- return value
1076
-
1077
- @field_validator("classes", mode="before")
1078
- def dict_to_classes_obj(cls, value: dict | Classes) -> Classes:
1079
- if not isinstance(value, dict):
1080
- return value
1081
- dict_of_classes = TypeAdapter(dict[str, Class]).validate_python(value)
1082
- return Classes(data=dict_of_classes)
1083
-
1084
- @field_validator("properties", mode="before")
1085
- def dict_to_properties_obj(cls, value: dict | Properties) -> Properties:
1086
- if not isinstance(value, dict):
1087
- return value
1088
- dict_of_properties = TypeAdapter(dict[str, Property]).validate_python(value)
1089
- return Properties(data=dict_of_properties)
1090
-
1091
- @model_validator(mode="after")
1092
- @skip_model_validator("validators_to_skip")
1093
- def update_prefix_version_entities(self) -> Self:
1094
- version = self.metadata.version
1095
- prefix = self.metadata.prefix
1096
-
1097
- # update expected_value_types
1098
- for id_ in self.properties.keys():
1099
- # only update version of expected value type which are part of this data model
1100
- if (
1101
- not self.properties[id_].expected_value_type.version
1102
- and self.properties[id_].expected_value_type.prefix == "undefined"
1103
- ):
1104
- self.properties[id_].expected_value_type.version = version
1105
-
1106
- if self.properties[id_].expected_value_type.prefix == "undefined":
1107
- self.properties[id_].expected_value_type.prefix = prefix
1108
-
1109
- # update container
1110
- for id_ in self.properties.keys():
1111
- # only update version of expected value type which are part of this data model
1112
- if (
1113
- self.properties[id_].container
1114
- and cast(ContainerEntity, self.properties[id_].container).prefix == "undefined"
1115
- ):
1116
- cast(ContainerEntity, self.properties[id_].container).prefix = prefix
1117
-
1118
- # update parent classes
1119
- for id_ in self.classes.keys():
1120
- if self.classes[id_].parent_class:
1121
- for parent_class in cast(list[ParentClass], self.classes[id_].parent_class):
1122
- if parent_class.prefix == "undefined":
1123
- parent_class.prefix = prefix
1124
- if not parent_class.version:
1125
- parent_class.version = version
1126
-
1127
- return self
1128
-
1129
- @model_validator(mode="after")
1130
- @skip_model_validator("validators_to_skip")
1131
- def update_container_description_and_name(self):
1132
- for id_ in self.properties.keys():
1133
- if (
1134
- self.properties[id_].container
1135
- and self.properties[id_].container.external_id in self.classes
1136
- and self.properties[id_].container.space == self.metadata.space
1137
- ):
1138
- self.properties[id_].container.description = self.classes[
1139
- self.properties[id_].container.external_id
1140
- ].description
1141
-
1142
- self.properties[id_].container.name = self.classes[
1143
- self.properties[id_].container.external_id
1144
- ].class_name
1145
- return self
1146
-
1147
- @model_validator(mode="after")
1148
- @skip_model_validator("validators_to_skip")
1149
- def add_missing_classes(self):
1150
- for property_ in self.properties.values():
1151
- if property_.class_id not in self.classes:
1152
- self.classes[property_.class_id] = Class(
1153
- class_id=property_.class_id,
1154
- class_name=property_.class_id,
1155
- comment="This class was automatically added by neat",
1156
- )
1157
- return self
1158
-
1159
- def update_prefix(self, prefix: str):
1160
- if prefix == self.metadata.prefix:
1161
- warnings.warn("Prefix is already in use, no changes made!", stacklevel=2)
1162
- elif prefix in self.prefixes.keys():
1163
- raise exceptions.PrefixAlreadyInUse(prefix).to_pydantic_custom_error()
1164
- elif not re.match(cdf_space_compliance_regex, prefix):
1165
- raise exceptions.PrefixRegexViolation(prefix, cdf_space_compliance_regex).to_pydantic_custom_error()
1166
- else:
1167
- old_prefix = self.metadata.prefix
1168
- self.metadata.prefix = prefix
1169
-
1170
- # update entity ids for expected_value_types and containers
1171
- for id_ in self.properties.keys():
1172
- if self.properties[id_].expected_value_type.prefix == old_prefix:
1173
- self.properties[id_].expected_value_type.prefix = prefix
1174
-
1175
- if (
1176
- self.properties[id_].container
1177
- and cast(ContainerEntity, self.properties[id_].container).prefix == old_prefix
1178
- ):
1179
- cast(ContainerEntity, self.properties[id_].container).prefix = prefix
1180
-
1181
- # update parent classes
1182
- for id_ in self.classes.keys():
1183
- if self.classes[id_].parent_class:
1184
- for parent_class in cast(list[ParentClass], self.classes[id_].parent_class):
1185
- if parent_class.prefix == old_prefix:
1186
- parent_class.prefix = prefix
1187
-
1188
- # update prefixes
1189
- self.prefixes[prefix] = self.prefixes.pop(old_prefix)
1190
-
1191
- def update_space(self, space: str):
1192
- "Convenience method for updating prefix more intuitive to CDF users"
1193
- return self.update_prefix(space)
1194
-
1195
- def update_version(self, version: str):
1196
- if version == self.metadata.version:
1197
- warnings.warn("Version is already in use, no changes made!", stacklevel=2)
1198
- elif not re.match(version_compliance_regex, version):
1199
- raise exceptions.VersionRegexViolation(version, version_compliance_regex).to_pydantic_custom_error()
1200
- else:
1201
- old_version = self.metadata.version
1202
- self.metadata.version = version
1203
- for id_ in self.properties.keys():
1204
- if (
1205
- self.properties[id_].expected_value_type.prefix == self.metadata.prefix
1206
- and self.properties[id_].expected_value_type.version == old_version
1207
- ):
1208
- self.properties[id_].expected_value_type.version = version
1209
-
1210
- for id_ in self.classes.keys():
1211
- if self.classes[id_].parent_class:
1212
- for parent_class in cast(list[ParentClass], self.classes[id_].parent_class):
1213
- if parent_class.prefix == self.metadata.prefix and parent_class.version == old_version:
1214
- parent_class.version = version
1215
-
1216
- @validator("prefixes")
1217
- @skip_field_validator("validators_to_skip")
1218
- def are_prefixes_compliant(cls, value, values):
1219
- if ill_formed_prefixes := [
1220
- prefix for prefix, _ in value.items() if re.search(more_than_one_none_alphanumerics_regex, prefix)
1221
- ]:
1222
- raise exceptions.MoreThanOneNonAlphanumericCharacter(
1223
- "prefixes", ", ".join(ill_formed_prefixes)
1224
- ).to_pydantic_custom_error()
1225
- if ill_formed_prefixes := [
1226
- prefix for prefix, _ in value.items() if not re.match(prefix_compliance_regex, prefix)
1227
- ]:
1228
- raise exceptions.PrefixesRegexViolation(
1229
- ill_formed_prefixes, prefix_compliance_regex
1230
- ).to_pydantic_custom_error()
1231
- else:
1232
- return value
1233
-
1234
- @validator("prefixes")
1235
- @skip_field_validator("validators_to_skip")
1236
- def are_namespaces_compliant(cls, value, values):
1237
- ill_formed_namespaces = []
1238
- for _, namespace in value.items():
1239
- try:
1240
- _ = URL(url=namespace).url
1241
- except ValueError:
1242
- ill_formed_namespaces += namespace
1243
-
1244
- if ill_formed_namespaces:
1245
- raise exceptions.PrefixesSheetNamespaceNotValidURL(ill_formed_namespaces).to_pydantic_custom_error()
1246
- else:
1247
- return value
1248
-
1249
- @validator("prefixes")
1250
- @skip_field_validator("validators_to_skip")
1251
- def add_data_model_prefix_namespace(cls, value, values):
1252
- if "metadata" not in values:
1253
- raise exceptions.MetadataSheetMissingOrFailedValidation().to_pydantic_custom_error()
1254
- if "prefix" not in values["metadata"].dict():
1255
- raise exceptions.FiledInMetadataSheetMissingOrFailedValidation(
1256
- missing_field="prefix"
1257
- ).to_pydantic_custom_error()
1258
- if "namespace" not in values["metadata"].dict():
1259
- raise exceptions.FiledInMetadataSheetMissingOrFailedValidation(
1260
- missing_field="namespace"
1261
- ).to_pydantic_custom_error()
1262
-
1263
- value[values["metadata"].prefix] = values["metadata"].namespace
1264
- return value
1265
-
1266
- @property
1267
- def space(self) -> str:
1268
- """Returns data model space."""
1269
- return cast(str, self.metadata.prefix)
1270
-
1271
- @property
1272
- def external_id(self) -> str:
1273
- """Returns data model external."""
1274
- return cast(str, self.metadata.suffix)
1275
-
1276
- @property
1277
- def name(self) -> str:
1278
- """Returns data model name."""
1279
- return cast(str, self.metadata.title)
1280
-
1281
- def _repr_html_(self) -> str:
1282
- """Pretty display of the TransformationRules object in a Notebook"""
1283
- dump = self.metadata.model_dump(by_alias=True)
1284
- for key in ["creator", "contributor"]:
1285
- dump[key] = ", ".join(dump[key]) if isinstance(dump[key], list) else dump[key]
1286
- dump["class_count"] = len(self.classes)
1287
- dump["property_count"] = len(self.properties)
1288
- dump["instance_count"] = len(self.instances)
1289
- return pd.Series(dump).to_frame("value")._repr_html_() # type: ignore[operator]