cognite-neat 0.88.0__py3-none-any.whl → 0.88.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (99) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/routers/configuration.py +1 -1
  3. cognite/neat/app/ui/neat-app/build/asset-manifest.json +7 -7
  4. cognite/neat/app/ui/neat-app/build/index.html +1 -1
  5. cognite/neat/app/ui/neat-app/build/static/css/{main.38a62222.css → main.72e3d92e.css} +2 -2
  6. cognite/neat/app/ui/neat-app/build/static/css/main.72e3d92e.css.map +1 -0
  7. cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js +3 -0
  8. cognite/neat/app/ui/neat-app/build/static/js/{main.ec7f72e2.js.LICENSE.txt → main.5a52cf09.js.LICENSE.txt} +0 -9
  9. cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js.map +1 -0
  10. cognite/neat/config.py +44 -27
  11. cognite/neat/exceptions.py +8 -2
  12. cognite/neat/graph/extractors/_classic_cdf/_assets.py +21 -73
  13. cognite/neat/graph/extractors/_classic_cdf/_base.py +102 -0
  14. cognite/neat/graph/extractors/_classic_cdf/_events.py +46 -42
  15. cognite/neat/graph/extractors/_classic_cdf/_files.py +41 -45
  16. cognite/neat/graph/extractors/_classic_cdf/_labels.py +75 -52
  17. cognite/neat/graph/extractors/_classic_cdf/_relationships.py +49 -27
  18. cognite/neat/graph/extractors/_classic_cdf/_sequences.py +47 -50
  19. cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +47 -49
  20. cognite/neat/graph/loaders/_base.py +4 -4
  21. cognite/neat/graph/loaders/_rdf2asset.py +12 -14
  22. cognite/neat/graph/loaders/_rdf2dms.py +14 -10
  23. cognite/neat/graph/queries/_base.py +22 -29
  24. cognite/neat/graph/queries/_shared.py +1 -1
  25. cognite/neat/graph/stores/_base.py +19 -11
  26. cognite/neat/graph/transformers/_rdfpath.py +3 -2
  27. cognite/neat/issues/__init__.py +16 -0
  28. cognite/neat/{issues.py → issues/_base.py} +78 -2
  29. cognite/neat/issues/errors/external.py +21 -0
  30. cognite/neat/issues/errors/properties.py +75 -0
  31. cognite/neat/issues/errors/resources.py +123 -0
  32. cognite/neat/issues/errors/schema.py +0 -0
  33. cognite/neat/{rules/issues → issues}/formatters.py +9 -9
  34. cognite/neat/issues/neat_warnings/__init__.py +2 -0
  35. cognite/neat/issues/neat_warnings/identifier.py +27 -0
  36. cognite/neat/issues/neat_warnings/models.py +22 -0
  37. cognite/neat/issues/neat_warnings/properties.py +77 -0
  38. cognite/neat/issues/neat_warnings/resources.py +125 -0
  39. cognite/neat/rules/exporters/_rules2dms.py +3 -2
  40. cognite/neat/rules/exporters/_rules2ontology.py +28 -20
  41. cognite/neat/rules/exporters/_validation.py +15 -21
  42. cognite/neat/rules/importers/__init__.py +7 -3
  43. cognite/neat/rules/importers/_base.py +3 -3
  44. cognite/neat/rules/importers/_dms2rules.py +39 -18
  45. cognite/neat/rules/importers/_dtdl2rules/dtdl_converter.py +44 -53
  46. cognite/neat/rules/importers/_dtdl2rules/dtdl_importer.py +6 -5
  47. cognite/neat/rules/importers/_rdf/__init__.py +0 -0
  48. cognite/neat/rules/importers/_rdf/_imf2rules/__init__.py +3 -0
  49. cognite/neat/rules/importers/_rdf/_imf2rules/_imf2classes.py +82 -0
  50. cognite/neat/rules/importers/_rdf/_imf2rules/_imf2metadata.py +34 -0
  51. cognite/neat/rules/importers/_rdf/_imf2rules/_imf2properties.py +123 -0
  52. cognite/neat/rules/importers/{_owl2rules/_owl2rules.py → _rdf/_imf2rules/_imf2rules.py} +15 -11
  53. cognite/neat/rules/importers/{_inference2rules.py → _rdf/_inference2rules.py} +1 -1
  54. cognite/neat/rules/importers/_rdf/_owl2rules/_owl2classes.py +57 -0
  55. cognite/neat/rules/importers/_rdf/_owl2rules/_owl2metadata.py +68 -0
  56. cognite/neat/rules/importers/_rdf/_owl2rules/_owl2properties.py +59 -0
  57. cognite/neat/rules/importers/_rdf/_owl2rules/_owl2rules.py +76 -0
  58. cognite/neat/rules/importers/_rdf/_shared.py +586 -0
  59. cognite/neat/rules/importers/_spreadsheet2rules.py +31 -28
  60. cognite/neat/rules/importers/_yaml2rules.py +2 -1
  61. cognite/neat/rules/issues/__init__.py +1 -5
  62. cognite/neat/rules/issues/base.py +2 -21
  63. cognite/neat/rules/issues/dms.py +20 -134
  64. cognite/neat/rules/issues/ontology.py +298 -0
  65. cognite/neat/rules/issues/spreadsheet.py +51 -3
  66. cognite/neat/rules/issues/tables.py +72 -0
  67. cognite/neat/rules/models/_rdfpath.py +4 -4
  68. cognite/neat/rules/models/_types/_field.py +14 -21
  69. cognite/neat/rules/models/asset/_validation.py +1 -1
  70. cognite/neat/rules/models/dms/_schema.py +53 -30
  71. cognite/neat/rules/models/dms/_validation.py +2 -2
  72. cognite/neat/rules/models/entities.py +3 -0
  73. cognite/neat/rules/models/information/_rules.py +5 -4
  74. cognite/neat/rules/models/information/_validation.py +1 -1
  75. cognite/neat/utils/rdf_.py +17 -9
  76. cognite/neat/utils/regex_patterns.py +52 -0
  77. cognite/neat/workflows/steps/lib/current/rules_importer.py +73 -1
  78. cognite/neat/workflows/steps/lib/current/rules_validator.py +19 -7
  79. {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/METADATA +2 -6
  80. {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/RECORD +85 -72
  81. cognite/neat/app/ui/neat-app/build/static/css/main.38a62222.css.map +0 -1
  82. cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js +0 -3
  83. cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js.map +0 -1
  84. cognite/neat/graph/issues/loader.py +0 -104
  85. cognite/neat/graph/stores/_oxrdflib.py +0 -247
  86. cognite/neat/rules/exceptions.py +0 -2972
  87. cognite/neat/rules/importers/_owl2rules/_owl2classes.py +0 -215
  88. cognite/neat/rules/importers/_owl2rules/_owl2metadata.py +0 -213
  89. cognite/neat/rules/importers/_owl2rules/_owl2properties.py +0 -203
  90. cognite/neat/rules/issues/importing.py +0 -408
  91. cognite/neat/rules/models/_types/_base.py +0 -16
  92. cognite/neat/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  93. cognite/neat/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  94. cognite/neat/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  95. /cognite/neat/{graph/issues → issues/errors}/__init__.py +0 -0
  96. /cognite/neat/rules/importers/{_owl2rules → _rdf/_owl2rules}/__init__.py +0 -0
  97. {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/LICENSE +0 -0
  98. {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/WHEEL +0 -0
  99. {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,586 @@
1
+ import datetime
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from rdflib import OWL, Literal, Namespace
6
+
7
+ from cognite.neat.rules.models._base import MatchType
8
+ from cognite.neat.rules.models.data_types import _XSD_TYPES
9
+ from cognite.neat.utils.rdf_ import remove_namespace_from_uri
10
+ from cognite.neat.utils.regex_patterns import PATTERNS
11
+
12
+
13
+ def parse_raw_classes_dataframe(query_results: list[tuple]) -> pd.DataFrame:
14
+ df = pd.DataFrame(
15
+ query_results,
16
+ columns=[
17
+ "Class",
18
+ "Name",
19
+ "Description",
20
+ "Parent Class",
21
+ "Reference",
22
+ "Match",
23
+ "Comment",
24
+ ],
25
+ )
26
+ if df.empty:
27
+ return df
28
+
29
+ # # remove NaNs
30
+ df.replace(np.nan, "", regex=True, inplace=True)
31
+
32
+ df.Reference = df.Class
33
+ df.Class = df.Class.apply(lambda x: remove_namespace_from_uri(x))
34
+ df["Match Type"] = len(df) * [MatchType.exact]
35
+ df["Comment"] = len(df) * [None]
36
+ df["Parent Class"] = df["Parent Class"].apply(lambda x: remove_namespace_from_uri(x))
37
+
38
+ return df
39
+
40
+
41
+ def clean_up_classes(df: pd.DataFrame) -> pd.DataFrame:
42
+ clean_list = [
43
+ {
44
+ "Class": class_,
45
+ "Name": group_df["Name"].unique()[0],
46
+ "Description": "\n".join(list(group_df.Description.unique())),
47
+ "Parent Class": ", ".join(list(group_df["Parent Class"].unique())),
48
+ "Reference": group_df["Reference"].unique()[0],
49
+ "Match Type": group_df["Match Type"].unique()[0],
50
+ "Comment": group_df["Comment"].unique()[0],
51
+ }
52
+ for class_, group_df in df.groupby("Class")
53
+ ]
54
+
55
+ df = pd.DataFrame(clean_list)
56
+
57
+ # bring NaNs back
58
+ df.replace("", None, inplace=True)
59
+
60
+ # split Parent Class column back into list
61
+ df["Parent Class"] = df["Parent Class"].apply(lambda x: x.split(", ") if isinstance(x, str) else None)
62
+
63
+ return df
64
+
65
+
66
+ def make_classes_compliant(classes: pd.DataFrame, importer: str = "RDF-based") -> pd.DataFrame:
67
+ """Make classes compliant.
68
+
69
+ Returns:
70
+ Dataframe containing compliant classes
71
+
72
+ !!! note "About the compliant classes"
73
+ The compliant classes are based on the OWL base ontology, but adapted to NEAT and use in CDF.
74
+ One thing to note is that this method would not be able to fix issues with class ids which
75
+ are not compliant with the CDF naming convention. For example, if a class id contains a space,
76
+ starts with a number, etc. This will cause issues when trying to create the class in CDF.
77
+ """
78
+
79
+ # Replace empty or non-string values in "Match" column with "exact"
80
+ classes["Match Type"] = classes["Match Type"].fillna(MatchType.exact)
81
+ classes["Match Type"] = classes["Match Type"].apply(
82
+ lambda x: MatchType.exact if not isinstance(x, str) or len(x) == 0 else x
83
+ )
84
+
85
+ # Replace empty or non-string values in "Comment" column with a default value
86
+ classes["Comment"] = classes["Comment"].fillna(f"Imported using {importer} importer")
87
+ classes["Comment"] = classes["Comment"].apply(
88
+ lambda x: (f"Imported using {importer} importer" if not isinstance(x, str) or len(x) == 0 else x)
89
+ )
90
+
91
+ # Add _object_property_class, _data_type_property_class, _thing_class to the dataframe
92
+ classes = pd.concat(
93
+ [
94
+ classes,
95
+ pd.DataFrame([object_property_class(), data_type_property_class(), thing_class()]),
96
+ ],
97
+ ignore_index=True,
98
+ )
99
+
100
+ # Reduce length of elements in the "Description" column to 1024 characters
101
+ classes["Description"] = classes["Description"].apply(lambda x: x[:1024] if isinstance(x, str) else None)
102
+
103
+ # Add missing parent classes to the dataframe
104
+ classes = pd.concat(
105
+ [classes, pd.DataFrame(add_parent_class(classes))],
106
+ ignore_index=True,
107
+ )
108
+
109
+ return classes
110
+
111
+
112
+ def object_property_class() -> dict:
113
+ return {
114
+ "Class": "ObjectProperty",
115
+ "Name": None,
116
+ "Description": "The class of object properties.",
117
+ "Parent Class": None,
118
+ "Reference": OWL.ObjectProperty,
119
+ "Match Type": MatchType.exact,
120
+ "Comment": "Added by NEAT based on owl:ObjectProperty but adapted to NEAT and use in CDF.",
121
+ }
122
+
123
+
124
+ def data_type_property_class() -> dict:
125
+ return {
126
+ "Class": "DatatypeProperty",
127
+ "Name": None,
128
+ "Description": "The class of data properties.",
129
+ "Parent Class": None,
130
+ "Reference": OWL.DatatypeProperty,
131
+ "Match Type": MatchType.exact,
132
+ "Comment": "Added by NEAT based on owl:DatatypeProperty but adapted to NEAT and use in CDF.",
133
+ }
134
+
135
+
136
+ def thing_class() -> dict:
137
+ return {
138
+ "Class": "Thing",
139
+ "Name": None,
140
+ "Description": "The class of holding class individuals.",
141
+ "Parent Class": None,
142
+ "Reference": OWL.Thing,
143
+ "Match Type": MatchType.exact,
144
+ "Comment": (
145
+ "Added by NEAT. "
146
+ "Imported from OWL base ontology, it is meant for use as a default"
147
+ " value type for object properties which miss a declared range."
148
+ ),
149
+ }
150
+
151
+
152
+ def add_parent_class(df: pd.DataFrame) -> list[dict]:
153
+ parent_set = {
154
+ item
155
+ for sublist in df["Parent Class"].tolist()
156
+ if sublist
157
+ for item in sublist
158
+ if item != "" and item is not None
159
+ }
160
+ class_set = set(df["Class"].tolist())
161
+
162
+ rows = []
163
+ for missing_parent_class in parent_set.difference(class_set):
164
+ rows += [
165
+ {
166
+ "Class": missing_parent_class,
167
+ "Name": None,
168
+ "Description": None,
169
+ "Parent Class": None,
170
+ "Reference": None,
171
+ "Match Type": None,
172
+ "Comment": (
173
+ "Added by NEAT. "
174
+ "This is a parent class that is missing in the ontology. "
175
+ "It is added by NEAT to make the ontology compliant with CDF."
176
+ ),
177
+ }
178
+ ]
179
+
180
+ return rows
181
+
182
+
183
+ def parse_raw_properties_dataframe(query_results: list[tuple]) -> pd.DataFrame:
184
+ df = pd.DataFrame(
185
+ query_results,
186
+ columns=[
187
+ "Class",
188
+ "Property",
189
+ "Name",
190
+ "Description",
191
+ "Value Type",
192
+ "Min Count",
193
+ "Max Count",
194
+ "Default",
195
+ "Reference",
196
+ "Match Type",
197
+ "Comment",
198
+ "_property_type",
199
+ ],
200
+ )
201
+ if df.empty:
202
+ return df
203
+
204
+ df.replace(np.nan, "", regex=True, inplace=True)
205
+ df.Reference = df.Reference if df.Reference.unique() else df.Property.copy(deep=True)
206
+ df.Class = df.Class.apply(lambda x: remove_namespace_from_uri(x))
207
+ df.Property = df.Property.apply(lambda x: remove_namespace_from_uri(x))
208
+ df["Value Type"] = df["Value Type"].apply(lambda x: remove_namespace_from_uri(x))
209
+ df["Match Type"] = len(df) * [MatchType.exact]
210
+ df["Comment"] = len(df) * [None]
211
+ df["_property_type"] = df["_property_type"].apply(lambda x: remove_namespace_from_uri(x))
212
+
213
+ return df
214
+
215
+
216
+ def clean_up_properties(df: pd.DataFrame) -> pd.DataFrame:
217
+ class_grouped_dfs = df.groupby("Class")
218
+
219
+ clean_list = []
220
+
221
+ for class_, class_grouped_df in class_grouped_dfs:
222
+ property_grouped_dfs = class_grouped_df.groupby("Property")
223
+ for property_, property_grouped_df in property_grouped_dfs:
224
+ clean_list += [
225
+ {
226
+ "Class": class_,
227
+ "Property": property_,
228
+ "Name": property_grouped_df["Name"].unique()[0],
229
+ "Description": "\n".join(list(property_grouped_df["Description"].unique()))[:1024],
230
+ "Value Type": property_grouped_df["Value Type"].unique()[0],
231
+ "Min Count": property_grouped_df["Min Count"].unique()[0],
232
+ "Max Count": property_grouped_df["Max Count"].unique()[0],
233
+ "Default": property_grouped_df["Default"].unique()[0],
234
+ "Reference": property_grouped_df["Reference"].unique()[0] or property_,
235
+ "Match Type": property_grouped_df["Match Type"].unique()[0],
236
+ "Comment": property_grouped_df["Comment"].unique()[0],
237
+ "_property_type": property_grouped_df["_property_type"].unique()[0],
238
+ }
239
+ ]
240
+
241
+ df = pd.DataFrame(clean_list)
242
+ df.replace("", None, inplace=True)
243
+
244
+ return df
245
+
246
+
247
+ def make_properties_compliant(properties: pd.DataFrame, importer: str = "RDF-based") -> pd.DataFrame:
248
+ # default to 0 if "Min Count" is not specified
249
+ properties["Min Count"] = properties["Min Count"].apply(lambda x: 0 if not isinstance(x, Literal) or x == "" else x)
250
+
251
+ # default to 1 if "Max Count" is not specified
252
+ properties["Max Count"] = properties["Max Count"].apply(lambda x: 1 if not isinstance(x, Literal) or x == "" else x)
253
+
254
+ # Replace empty or non-string values in "Match Type" column with "exact"
255
+ properties["Match Type"] = properties["Match Type"].fillna("exact")
256
+ properties["Match Type"] = properties["Match Type"].apply(
257
+ lambda x: "exact" if not isinstance(x, str) or len(x) == 0 else x
258
+ )
259
+
260
+ # Replace empty or non-string values in "Comment" column with a default value
261
+ properties["Comment"] = properties["Comment"].fillna(f"Imported using {importer} importer")
262
+ properties["Comment"] = properties["Comment"].apply(
263
+ lambda x: (f"Imported using {importer} importer" if not isinstance(x, str) or len(x) == 0 else x)
264
+ )
265
+
266
+ # Reduce length of elements in the "Description" column to 1024 characters
267
+ properties["Description"] = properties["Description"].apply(lambda x: x[:1024] if isinstance(x, str) else None)
268
+
269
+ # fixes and additions
270
+ properties = fix_dangling_properties(properties)
271
+ properties = fix_missing_property_value_type(properties)
272
+
273
+ return properties
274
+
275
+
276
+ def fix_dangling_properties(properties: pd.DataFrame) -> pd.DataFrame:
277
+ """This method fixes properties which are missing a domain definition in the ontology.
278
+
279
+ Args:
280
+ properties: Dataframe containing properties
281
+
282
+ Returns:
283
+ Dataframe containing properties with fixed domain
284
+ """
285
+ domain = {
286
+ "ObjectProperty": object_property_class()["Class"],
287
+ "DatatypeProperty": data_type_property_class()["Class"],
288
+ }
289
+
290
+ # apply missing range
291
+ properties["Class"] = properties.apply(
292
+ lambda row: (
293
+ domain[row._property_type]
294
+ if row._property_type == "ObjectProperty" and pd.isna(row["Class"])
295
+ else domain["DatatypeProperty"]
296
+ if pd.isna(row["Class"])
297
+ else row["Class"]
298
+ ),
299
+ axis=1,
300
+ )
301
+ return properties
302
+
303
+
304
+ def fix_missing_property_value_type(properties: pd.DataFrame) -> pd.DataFrame:
305
+ """This method fixes properties which are missing a range definition in the ontology.
306
+
307
+ Args:
308
+ properties: Dataframe containing properties
309
+
310
+ Returns:
311
+ Dataframe containing properties with fixed range
312
+ """
313
+ # apply missing range
314
+ properties["Value Type"] = properties.apply(
315
+ lambda row: (
316
+ thing_class()["Class"]
317
+ if row._property_type == "ObjectProperty" and pd.isna(row["Value Type"])
318
+ else "string"
319
+ if pd.isna(row["Value Type"])
320
+ else row["Value Type"]
321
+ ),
322
+ axis=1,
323
+ )
324
+
325
+ return properties
326
+
327
+
328
+ def make_metadata_compliant(metadata: dict) -> dict:
329
+ """Attempts to fix errors in metadata, otherwise defaults to values that will pass validation.
330
+
331
+ Args:
332
+ metadata: Dictionary containing metadata
333
+
334
+ Returns:
335
+ Dictionary containing metadata with fixed errors
336
+ """
337
+
338
+ metadata = fix_namespace(metadata, default=Namespace("http://purl.org/cognite/neat#"))
339
+ metadata = fix_prefix(metadata)
340
+ metadata = fix_version(metadata)
341
+ metadata = fix_date(
342
+ metadata,
343
+ date_type="created",
344
+ default=datetime.datetime.now().replace(microsecond=0),
345
+ )
346
+ metadata = fix_date(
347
+ metadata,
348
+ date_type="updated",
349
+ default=datetime.datetime.now().replace(microsecond=0),
350
+ )
351
+ metadata = fix_title(metadata)
352
+ metadata = fix_description(metadata)
353
+ metadata = fix_author(metadata, "creator")
354
+ metadata = fix_rights(metadata)
355
+ metadata = fix_license(metadata)
356
+
357
+ return metadata
358
+
359
+
360
+ def fix_license(metadata: dict, default: str = "Unknown license") -> dict:
361
+ if license := metadata.get("license", None):
362
+ if not isinstance(license, str):
363
+ metadata["license"] = default
364
+ elif isinstance(license, str) and len(license) == 0:
365
+ metadata["license"] = default
366
+ else:
367
+ metadata["license"] = default
368
+ return metadata
369
+
370
+
371
+ def fix_rights(metadata: dict, default: str = "Unknown rights") -> dict:
372
+ if rights := metadata.get("rights", None):
373
+ if not isinstance(rights, str):
374
+ metadata["rights"] = default
375
+ elif isinstance(rights, str) and len(rights) == 0:
376
+ metadata["rights"] = default
377
+ else:
378
+ metadata["rights"] = default
379
+ return metadata
380
+
381
+
382
+ def fix_author(metadata: dict, author_type: str = "creator", default: str = "NEAT") -> dict:
383
+ if author := metadata.get(author_type, None):
384
+ if not isinstance(author, str) or isinstance(author, list):
385
+ metadata[author_type] = default
386
+ elif isinstance(author, str) and len(author) == 0:
387
+ metadata[author_type] = default
388
+ else:
389
+ metadata[author_type] = default
390
+ return metadata
391
+
392
+
393
+ def fix_description(metadata: dict, default: str = "This model has been inferred from OWL ontology") -> dict:
394
+ if description := metadata.get("description", None):
395
+ if not isinstance(description, str) or len(description) == 0:
396
+ metadata["description"] = default
397
+ elif isinstance(description, str) and len(description) > 1024:
398
+ metadata["description"] = metadata["description"][:1024]
399
+ else:
400
+ metadata["description"] = default
401
+ return metadata
402
+
403
+
404
+ def fix_prefix(metadata: dict, default: str = "neat") -> dict:
405
+ if prefix := metadata.get("prefix", None):
406
+ if not isinstance(prefix, str) or not PATTERNS.prefix_compliance.match(prefix):
407
+ metadata["prefix"] = default
408
+ else:
409
+ metadata["prefix"] = default
410
+ return metadata
411
+
412
+
413
+ def fix_namespace(metadata: dict, default: Namespace) -> dict:
414
+ if namespace := metadata.get("namespace", None):
415
+ if not isinstance(namespace, Namespace):
416
+ try:
417
+ metadata["namespace"] = Namespace(namespace)
418
+ except Exception:
419
+ metadata["namespace"] = default
420
+ else:
421
+ metadata["namespace"] = default
422
+
423
+ return metadata
424
+
425
+
426
+ def fix_date(
427
+ metadata: dict,
428
+ date_type: str,
429
+ default: datetime.datetime,
430
+ ) -> dict:
431
+ if date := metadata.get(date_type, None):
432
+ try:
433
+ if isinstance(date, datetime.datetime):
434
+ return metadata
435
+ elif isinstance(date, datetime.date):
436
+ metadata[date_type] = datetime.datetime.combine(metadata[date_type], datetime.datetime.min.time())
437
+ elif isinstance(date, str):
438
+ metadata[date_type] = datetime.datetime.strptime(metadata[date_type], "%Y-%m-%dT%H:%M:%SZ")
439
+ else:
440
+ metadata[date_type] = default
441
+ except Exception:
442
+ metadata[date_type] = default
443
+ else:
444
+ metadata[date_type] = default
445
+
446
+ return metadata
447
+
448
+
449
+ def fix_version(metadata: dict, default: str = "1.0.0") -> dict:
450
+ if version := metadata.get("version", None):
451
+ if not PATTERNS.version_compliance.match(version):
452
+ metadata["version"] = default
453
+ else:
454
+ metadata["version"] = default
455
+
456
+ return metadata
457
+
458
+
459
+ def fix_title(metadata: dict, default: str = "OWL Inferred Data Model") -> dict:
460
+ if title := metadata.get("title", None):
461
+ if not isinstance(title, str):
462
+ metadata["title"] = default
463
+ elif isinstance(title, str) and len(title) == 0:
464
+ metadata["title"] = default
465
+ elif isinstance(title, str) and len(title) > 255:
466
+ metadata["title"] = metadata["title"][:255]
467
+ else:
468
+ pass
469
+ else:
470
+ metadata["title"] = default
471
+
472
+ return metadata
473
+
474
+
475
+ def make_components_compliant(components: dict) -> dict:
476
+ components = add_missing_classes(components)
477
+ components = add_missing_value_types(components)
478
+ components = add_default_property_to_dangling_classes(components)
479
+
480
+ return components
481
+
482
+
483
+ def add_missing_classes(components: dict[str, list[dict]]) -> dict:
484
+ """Add missing classes to Classes.
485
+
486
+ Args:
487
+ tables: imported tables from owl ontology
488
+
489
+ Returns:
490
+ Updated tables with missing classes added to containers
491
+ """
492
+
493
+ missing_classes = {definition["Class"] for definition in components["Properties"]} - {
494
+ definition["Class"] for definition in components["Classes"]
495
+ }
496
+
497
+ comment = (
498
+ "Added by NEAT. "
499
+ "This is a class that a domain of a property but was not defined in the ontology. "
500
+ "It is added by NEAT to make the ontology compliant with CDF."
501
+ )
502
+
503
+ for class_ in missing_classes:
504
+ components["Classes"].append({"Class": class_, "Comment": comment})
505
+
506
+ return components
507
+
508
+
509
+ def add_missing_value_types(components: dict) -> dict:
510
+ """Add properties to classes that do not have any properties defined to them
511
+
512
+ Args:
513
+ tables: imported tables from owl ontology
514
+
515
+ Returns:
516
+ Updated tables with missing properties added to containers
517
+ """
518
+
519
+ xsd_types = _XSD_TYPES
520
+ candidate_value_types = {definition["Value Type"] for definition in components["Properties"]} - {
521
+ definition["Class"] for definition in components["Classes"]
522
+ }
523
+
524
+ # to avoid issue of case sensitivity for xsd types
525
+ value_types_lower = {v.lower() for v in candidate_value_types}
526
+
527
+ xsd_types_lower = {x.lower() for x in xsd_types}
528
+
529
+ # Create a mapping from lowercase strings to original strings
530
+ value_types_mapping = {v.lower(): v for v in candidate_value_types}
531
+
532
+ # Find the difference
533
+ difference = value_types_lower - xsd_types_lower
534
+
535
+ # Convert the difference back to the original case
536
+ difference_original_case = {value_types_mapping[d] for d in difference}
537
+
538
+ for class_ in difference_original_case:
539
+ components["Classes"].append(
540
+ {
541
+ "Class": class_,
542
+ "Comment": (
543
+ "Added by NEAT. "
544
+ "This is a class that a domain of a property but was not defined in the ontology. "
545
+ "It is added by NEAT to make the ontology compliant with CDF."
546
+ ),
547
+ }
548
+ )
549
+
550
+ return components
551
+
552
+
553
+ def add_default_property_to_dangling_classes(components: dict[str, list[dict]]) -> dict:
554
+ """Add missing classes to Classes.
555
+
556
+ Args:
557
+ tables: imported tables from owl ontology
558
+
559
+ Returns:
560
+ Updated tables with missing classes added to containers
561
+ """
562
+
563
+ dangling_classes = {
564
+ definition["Class"] for definition in components["Classes"] if not definition.get("Parent Class", None)
565
+ } - {definition["Class"] for definition in components["Properties"]}
566
+
567
+ comment = (
568
+ "Added by NEAT. "
569
+ "This is property has been added to this class since otherwise it will create "
570
+ "dangling classes in the ontology."
571
+ )
572
+
573
+ for class_ in dangling_classes:
574
+ components["Properties"].append(
575
+ {
576
+ "Class": class_,
577
+ "Property": "label",
578
+ "Value Type": "string",
579
+ "Comment": comment,
580
+ "Min Count": 0,
581
+ "Max Count": 1,
582
+ "Reference": "http://www.w3.org/2000/01/rdf-schema#label",
583
+ }
584
+ )
585
+
586
+ return components
@@ -11,8 +11,8 @@ from typing import Literal, cast, overload
11
11
  import pandas as pd
12
12
  from pandas import ExcelFile
13
13
 
14
+ from cognite.neat.issues import IssueList
14
15
  from cognite.neat.rules import issues
15
- from cognite.neat.rules.issues import IssueList
16
16
  from cognite.neat.rules.models import (
17
17
  RULES_PER_ROLE,
18
18
  AssetRules,
@@ -131,25 +131,24 @@ class SpreadsheetReader:
131
131
  names = MANDATORY_SHEETS_BY_ROLE[role]
132
132
  return {f"{self._sheet_prefix}{sheet_name}" for sheet_name in names if sheet_name != "Metadata"}
133
133
 
134
- def read(self, filepath: Path) -> None | ReadResult:
135
- with pd.ExcelFile(filepath) as excel_file:
136
- self._seen_files.add(filepath)
137
- self._seen_sheets.update(map(str, excel_file.sheet_names))
138
- metadata: MetadataRaw | None
139
- if self.metadata is not None:
140
- metadata = self.metadata
141
- else:
142
- metadata = self._read_metadata(excel_file, filepath)
143
- if metadata is None:
144
- # The reading of metadata failed, so we can't continue
145
- return None
146
-
147
- sheets, read_info_by_sheet = self._read_sheets(excel_file, metadata.role)
148
- if sheets is None or self.issue_list.has_errors:
134
+ def read(self, excel_file: pd.ExcelFile, filepath: Path) -> None | ReadResult:
135
+ self._seen_files.add(filepath)
136
+ self._seen_sheets.update(map(str, excel_file.sheet_names))
137
+ metadata: MetadataRaw | None
138
+ if self.metadata is not None:
139
+ metadata = self.metadata
140
+ else:
141
+ metadata = self._read_metadata(excel_file, filepath)
142
+ if metadata is None:
143
+ # The reading of metadata failed, so we can't continue
149
144
  return None
150
- sheets["Metadata"] = dict(metadata)
151
145
 
152
- return ReadResult(sheets, read_info_by_sheet, metadata)
146
+ sheets, read_info_by_sheet = self._read_sheets(excel_file, metadata.role)
147
+ if sheets is None or self.issue_list.has_errors:
148
+ return None
149
+ sheets["Metadata"] = dict(metadata)
150
+
151
+ return ReadResult(sheets, read_info_by_sheet, metadata)
153
152
 
154
153
  def _read_metadata(self, excel_file: ExcelFile, filepath: Path) -> MetadataRaw | None:
155
154
  if self.metadata_sheet_name not in excel_file.sheet_names:
@@ -232,17 +231,21 @@ class ExcelImporter(BaseImporter):
232
231
  issue_list.append(issues.spreadsheet_file.SpreadsheetNotFoundError(self.filepath))
233
232
  return self._return_or_raise(issue_list, errors)
234
233
 
235
- user_reader = SpreadsheetReader(issue_list)
236
- user_read = user_reader.read(self.filepath)
237
- if user_read is None or issue_list.has_errors:
238
- return self._return_or_raise(issue_list, errors)
234
+ with pd.ExcelFile(self.filepath) as excel_file:
235
+ user_reader = SpreadsheetReader(issue_list)
239
236
 
240
- last_read: ReadResult | None = None
241
- if any(sheet_name.startswith("Last") for sheet_name in user_reader.seen_sheets):
242
- last_read = SpreadsheetReader(issue_list, required=False, sheet_prefix="Last").read(self.filepath)
243
- reference_read: ReadResult | None = None
244
- if any(sheet_name.startswith("Ref") for sheet_name in user_reader.seen_sheets):
245
- reference_read = SpreadsheetReader(issue_list, sheet_prefix="Ref").read(self.filepath)
237
+ user_read = user_reader.read(excel_file, self.filepath)
238
+ if user_read is None or issue_list.has_errors:
239
+ return self._return_or_raise(issue_list, errors)
240
+
241
+ last_read: ReadResult | None = None
242
+ if any(sheet_name.startswith("Last") for sheet_name in user_reader.seen_sheets):
243
+ last_read = SpreadsheetReader(issue_list, required=False, sheet_prefix="Last").read(
244
+ excel_file, self.filepath
245
+ )
246
+ reference_read: ReadResult | None = None
247
+ if any(sheet_name.startswith("Ref") for sheet_name in user_reader.seen_sheets):
248
+ reference_read = SpreadsheetReader(issue_list, sheet_prefix="Ref").read(excel_file, self.filepath)
246
249
 
247
250
  if issue_list.has_errors:
248
251
  return self._return_or_raise(issue_list, errors)
@@ -3,8 +3,9 @@ from typing import Any, Literal, overload
3
3
 
4
4
  import yaml
5
5
 
6
+ from cognite.neat.issues import IssueList
6
7
  from cognite.neat.rules import issues
7
- from cognite.neat.rules.issues import IssueList, NeatValidationError, ValidationIssue
8
+ from cognite.neat.rules.issues import NeatValidationError, ValidationIssue
8
9
  from cognite.neat.rules.models import RULES_PER_ROLE, DMSRules, RoleTypes
9
10
  from cognite.neat.rules.models.dms import DMSRulesInput
10
11