cognite-neat 0.88.0__py3-none-any.whl → 0.88.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (99) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/routers/configuration.py +1 -1
  3. cognite/neat/app/ui/neat-app/build/asset-manifest.json +7 -7
  4. cognite/neat/app/ui/neat-app/build/index.html +1 -1
  5. cognite/neat/app/ui/neat-app/build/static/css/{main.38a62222.css → main.72e3d92e.css} +2 -2
  6. cognite/neat/app/ui/neat-app/build/static/css/main.72e3d92e.css.map +1 -0
  7. cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js +3 -0
  8. cognite/neat/app/ui/neat-app/build/static/js/{main.ec7f72e2.js.LICENSE.txt → main.5a52cf09.js.LICENSE.txt} +0 -9
  9. cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js.map +1 -0
  10. cognite/neat/config.py +44 -27
  11. cognite/neat/exceptions.py +8 -2
  12. cognite/neat/graph/extractors/_classic_cdf/_assets.py +21 -73
  13. cognite/neat/graph/extractors/_classic_cdf/_base.py +102 -0
  14. cognite/neat/graph/extractors/_classic_cdf/_events.py +46 -42
  15. cognite/neat/graph/extractors/_classic_cdf/_files.py +41 -45
  16. cognite/neat/graph/extractors/_classic_cdf/_labels.py +75 -52
  17. cognite/neat/graph/extractors/_classic_cdf/_relationships.py +49 -27
  18. cognite/neat/graph/extractors/_classic_cdf/_sequences.py +47 -50
  19. cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +47 -49
  20. cognite/neat/graph/loaders/_base.py +4 -4
  21. cognite/neat/graph/loaders/_rdf2asset.py +12 -14
  22. cognite/neat/graph/loaders/_rdf2dms.py +14 -10
  23. cognite/neat/graph/queries/_base.py +22 -29
  24. cognite/neat/graph/queries/_shared.py +1 -1
  25. cognite/neat/graph/stores/_base.py +19 -11
  26. cognite/neat/graph/transformers/_rdfpath.py +3 -2
  27. cognite/neat/issues/__init__.py +16 -0
  28. cognite/neat/{issues.py → issues/_base.py} +78 -2
  29. cognite/neat/issues/errors/external.py +21 -0
  30. cognite/neat/issues/errors/properties.py +75 -0
  31. cognite/neat/issues/errors/resources.py +123 -0
  32. cognite/neat/issues/errors/schema.py +0 -0
  33. cognite/neat/{rules/issues → issues}/formatters.py +9 -9
  34. cognite/neat/issues/neat_warnings/__init__.py +2 -0
  35. cognite/neat/issues/neat_warnings/identifier.py +27 -0
  36. cognite/neat/issues/neat_warnings/models.py +22 -0
  37. cognite/neat/issues/neat_warnings/properties.py +77 -0
  38. cognite/neat/issues/neat_warnings/resources.py +125 -0
  39. cognite/neat/rules/exporters/_rules2dms.py +3 -2
  40. cognite/neat/rules/exporters/_rules2ontology.py +28 -20
  41. cognite/neat/rules/exporters/_validation.py +15 -21
  42. cognite/neat/rules/importers/__init__.py +7 -3
  43. cognite/neat/rules/importers/_base.py +3 -3
  44. cognite/neat/rules/importers/_dms2rules.py +39 -18
  45. cognite/neat/rules/importers/_dtdl2rules/dtdl_converter.py +44 -53
  46. cognite/neat/rules/importers/_dtdl2rules/dtdl_importer.py +6 -5
  47. cognite/neat/rules/importers/_rdf/__init__.py +0 -0
  48. cognite/neat/rules/importers/_rdf/_imf2rules/__init__.py +3 -0
  49. cognite/neat/rules/importers/_rdf/_imf2rules/_imf2classes.py +82 -0
  50. cognite/neat/rules/importers/_rdf/_imf2rules/_imf2metadata.py +34 -0
  51. cognite/neat/rules/importers/_rdf/_imf2rules/_imf2properties.py +123 -0
  52. cognite/neat/rules/importers/{_owl2rules/_owl2rules.py → _rdf/_imf2rules/_imf2rules.py} +15 -11
  53. cognite/neat/rules/importers/{_inference2rules.py → _rdf/_inference2rules.py} +1 -1
  54. cognite/neat/rules/importers/_rdf/_owl2rules/_owl2classes.py +57 -0
  55. cognite/neat/rules/importers/_rdf/_owl2rules/_owl2metadata.py +68 -0
  56. cognite/neat/rules/importers/_rdf/_owl2rules/_owl2properties.py +59 -0
  57. cognite/neat/rules/importers/_rdf/_owl2rules/_owl2rules.py +76 -0
  58. cognite/neat/rules/importers/_rdf/_shared.py +586 -0
  59. cognite/neat/rules/importers/_spreadsheet2rules.py +31 -28
  60. cognite/neat/rules/importers/_yaml2rules.py +2 -1
  61. cognite/neat/rules/issues/__init__.py +1 -5
  62. cognite/neat/rules/issues/base.py +2 -21
  63. cognite/neat/rules/issues/dms.py +20 -134
  64. cognite/neat/rules/issues/ontology.py +298 -0
  65. cognite/neat/rules/issues/spreadsheet.py +51 -3
  66. cognite/neat/rules/issues/tables.py +72 -0
  67. cognite/neat/rules/models/_rdfpath.py +4 -4
  68. cognite/neat/rules/models/_types/_field.py +14 -21
  69. cognite/neat/rules/models/asset/_validation.py +1 -1
  70. cognite/neat/rules/models/dms/_schema.py +53 -30
  71. cognite/neat/rules/models/dms/_validation.py +2 -2
  72. cognite/neat/rules/models/entities.py +3 -0
  73. cognite/neat/rules/models/information/_rules.py +5 -4
  74. cognite/neat/rules/models/information/_validation.py +1 -1
  75. cognite/neat/utils/rdf_.py +17 -9
  76. cognite/neat/utils/regex_patterns.py +52 -0
  77. cognite/neat/workflows/steps/lib/current/rules_importer.py +73 -1
  78. cognite/neat/workflows/steps/lib/current/rules_validator.py +19 -7
  79. {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/METADATA +2 -6
  80. {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/RECORD +85 -72
  81. cognite/neat/app/ui/neat-app/build/static/css/main.38a62222.css.map +0 -1
  82. cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js +0 -3
  83. cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js.map +0 -1
  84. cognite/neat/graph/issues/loader.py +0 -104
  85. cognite/neat/graph/stores/_oxrdflib.py +0 -247
  86. cognite/neat/rules/exceptions.py +0 -2972
  87. cognite/neat/rules/importers/_owl2rules/_owl2classes.py +0 -215
  88. cognite/neat/rules/importers/_owl2rules/_owl2metadata.py +0 -213
  89. cognite/neat/rules/importers/_owl2rules/_owl2properties.py +0 -203
  90. cognite/neat/rules/issues/importing.py +0 -408
  91. cognite/neat/rules/models/_types/_base.py +0 -16
  92. cognite/neat/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  93. cognite/neat/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  94. cognite/neat/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  95. /cognite/neat/{graph/issues → issues/errors}/__init__.py +0 -0
  96. /cognite/neat/rules/importers/{_owl2rules → _rdf/_owl2rules}/__init__.py +0 -0
  97. {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/LICENSE +0 -0
  98. {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/WHEEL +0 -0
  99. {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/entry_points.txt +0 -0
@@ -1,215 +0,0 @@
1
- from typing import cast
2
-
3
- import numpy as np
4
- import pandas as pd
5
- from rdflib import OWL, Graph
6
-
7
- from cognite.neat.rules.models._base import MatchType
8
- from cognite.neat.utils.rdf_ import remove_namespace_from_uri
9
-
10
-
11
- def parse_owl_classes(graph: Graph, language: str = "en") -> list[dict]:
12
- """Parse owl classes from graph to pandas dataframe.
13
-
14
- Args:
15
- graph: Graph containing owl classes
16
- language: Language to use for parsing, by default "en"
17
-
18
- Returns:
19
- Dataframe containing owl classes
20
-
21
- !!! note "Compliant OWL classes"
22
- This makes the method very opinionated, but results in a compliant classes.
23
- """
24
-
25
- query = """
26
- SELECT ?class ?name ?description ?parentClass ?reference ?match ?comment
27
- WHERE {
28
- ?class a owl:Class .
29
- OPTIONAL {?class rdfs:subClassOf ?parentClass }.
30
- OPTIONAL {?class rdfs:label ?name }.
31
- OPTIONAL {?class rdfs:comment ?description} .
32
- FILTER (!isBlank(?class))
33
- FILTER (!bound(?parentClass) || !isBlank(?parentClass))
34
- FILTER (!bound(?name) || LANG(?name) = "" || LANGMATCHES(LANG(?name), "en"))
35
- FILTER (!bound(?description) || LANG(?description) = "" || LANGMATCHES(LANG(?description), "en"))
36
- }
37
- """
38
-
39
- # create raw dataframe
40
-
41
- raw_df = _parse_raw_dataframe(cast(list[tuple], list(graph.query(query.replace("en", language)))))
42
- if raw_df.empty:
43
- return []
44
-
45
- # group values and clean up
46
- processed_df = _clean_up_classes(raw_df)
47
-
48
- # make compliant
49
- processed_df = make_classes_compliant(processed_df)
50
-
51
- # Make Parent Class list elements into string joined with comma
52
- processed_df["Parent Class"] = processed_df["Parent Class"].apply(
53
- lambda x: ", ".join(x) if isinstance(x, list) and x else None
54
- )
55
-
56
- return processed_df.dropna(axis=0, how="all").replace(float("nan"), None).to_dict(orient="records")
57
-
58
-
59
- def _parse_raw_dataframe(query_results: list[tuple]) -> pd.DataFrame:
60
- df = pd.DataFrame(
61
- query_results,
62
- columns=["Class", "Name", "Description", "Parent Class", "Reference", "Match", "Comment"],
63
- )
64
- if df.empty:
65
- return df
66
-
67
- # # remove NaNs
68
- df.replace(np.nan, "", regex=True, inplace=True)
69
-
70
- df.Reference = df.Class
71
- df.Class = df.Class.apply(lambda x: remove_namespace_from_uri(x))
72
- df["Match Type"] = len(df) * [MatchType.exact]
73
- df["Comment"] = len(df) * [None]
74
- df["Parent Class"] = df["Parent Class"].apply(lambda x: remove_namespace_from_uri(x))
75
-
76
- return df
77
-
78
-
79
- def _clean_up_classes(df: pd.DataFrame) -> pd.DataFrame:
80
- clean_list = [
81
- {
82
- "Class": class_,
83
- "Name": group_df["Name"].unique()[0],
84
- "Description": "\n".join(list(group_df.Description.unique())),
85
- "Parent Class": ", ".join(list(group_df["Parent Class"].unique())),
86
- "Reference": group_df["Reference"].unique()[0],
87
- "Match Type": group_df["Match Type"].unique()[0],
88
- "Comment": group_df["Comment"].unique()[0],
89
- }
90
- for class_, group_df in df.groupby("Class")
91
- ]
92
-
93
- df = pd.DataFrame(clean_list)
94
-
95
- # bring NaNs back
96
- df.replace("", None, inplace=True)
97
-
98
- # split Parent Class column back into list
99
- df["Parent Class"] = df["Parent Class"].apply(lambda x: x.split(", ") if isinstance(x, str) else None)
100
-
101
- return df
102
-
103
-
104
- def make_classes_compliant(classes: pd.DataFrame) -> pd.DataFrame:
105
- """Make classes compliant.
106
-
107
- Returns:
108
- Dataframe containing compliant classes
109
-
110
- !!! note "About the compliant classes"
111
- The compliant classes are based on the OWL base ontology, but adapted to NEAT and use in CDF.
112
- One thing to note is that this method would not be able to fix issues with class ids which
113
- are not compliant with the CDF naming convention. For example, if a class id contains a space,
114
- starts with a number, etc. This will cause issues when trying to create the class in CDF.
115
- """
116
-
117
- # Replace empty or non-string values in "Match" column with "exact"
118
- classes["Match Type"] = classes["Match Type"].fillna(MatchType.exact)
119
- classes["Match Type"] = classes["Match Type"].apply(
120
- lambda x: MatchType.exact if not isinstance(x, str) or len(x) == 0 else x
121
- )
122
-
123
- # Replace empty or non-string values in "Comment" column with a default value
124
- classes["Comment"] = classes["Comment"].fillna("Imported from Ontology by NEAT")
125
- classes["Comment"] = classes["Comment"].apply(
126
- lambda x: "Imported from Ontology by NEAT" if not isinstance(x, str) or len(x) == 0 else x
127
- )
128
-
129
- # Add _object_property_class, _data_type_property_class, _thing_class to the dataframe
130
- classes = pd.concat(
131
- [classes, pd.DataFrame([_object_property_class(), _data_type_property_class(), _thing_class()])],
132
- ignore_index=True,
133
- )
134
-
135
- # Reduce length of elements in the "Description" column to 1024 characters
136
- classes["Description"] = classes["Description"].apply(lambda x: x[:1024] if isinstance(x, str) else None)
137
-
138
- # Add missing parent classes to the dataframe
139
- classes = pd.concat(
140
- [classes, pd.DataFrame(_add_parent_class(classes))],
141
- ignore_index=True,
142
- )
143
-
144
- return classes
145
-
146
-
147
- def _object_property_class() -> dict:
148
- return {
149
- "Class": "ObjectProperty",
150
- "Name": None,
151
- "Description": "The class of object properties.",
152
- "Parent Class": None,
153
- "Reference": OWL.ObjectProperty,
154
- "Match Type": MatchType.exact,
155
- "Comment": "Added by NEAT based on owl:ObjectProperty but adapted to NEAT and use in CDF.",
156
- }
157
-
158
-
159
- def _data_type_property_class() -> dict:
160
- return {
161
- "Class": "DatatypeProperty",
162
- "Name": None,
163
- "Description": "The class of data properties.",
164
- "Parent Class": None,
165
- "Reference": OWL.DatatypeProperty,
166
- "Match Type": MatchType.exact,
167
- "Comment": "Added by NEAT based on owl:DatatypeProperty but adapted to NEAT and use in CDF.",
168
- }
169
-
170
-
171
- def _thing_class() -> dict:
172
- return {
173
- "Class": "Thing",
174
- "Name": None,
175
- "Description": "The class of holding class individuals.",
176
- "Parent Class": None,
177
- "Reference": OWL.Thing,
178
- "Match Type": MatchType.exact,
179
- "Comment": (
180
- "Added by NEAT. "
181
- "Imported from OWL base ontology, it is meant for use as a default"
182
- " value type for object properties which miss a declared range."
183
- ),
184
- }
185
-
186
-
187
- def _add_parent_class(df: pd.DataFrame) -> list[dict]:
188
- parent_set = {
189
- item
190
- for sublist in df["Parent Class"].tolist()
191
- if sublist
192
- for item in sublist
193
- if item != "" and item is not None
194
- }
195
- class_set = set(df["Class"].tolist())
196
-
197
- rows = []
198
- for missing_parent_class in parent_set.difference(class_set):
199
- rows += [
200
- {
201
- "Class": missing_parent_class,
202
- "Name": None,
203
- "Description": None,
204
- "Parent Class": None,
205
- "Reference": None,
206
- "Match Type": None,
207
- "Comment": (
208
- "Added by NEAT. "
209
- "This is a parent class that is missing in the ontology. "
210
- "It is added by NEAT to make the ontology compliant with CDF."
211
- ),
212
- }
213
- ]
214
-
215
- return rows
@@ -1,213 +0,0 @@
1
- import datetime
2
- import re
3
-
4
- from rdflib import Graph, Namespace
5
-
6
- from cognite.neat.constants import DEFAULT_NAMESPACE
7
- from cognite.neat.rules.models import RoleTypes, SchemaCompleteness
8
- from cognite.neat.rules.models._types._base import (
9
- PREFIX_COMPLIANCE_REGEX,
10
- VERSION_COMPLIANCE_REGEX,
11
- )
12
- from cognite.neat.utils.collection_ import remove_none_elements_from_set
13
- from cognite.neat.utils.rdf_ import convert_rdflib_content
14
-
15
-
16
- def parse_owl_metadata(graph: Graph) -> dict:
17
- """Parse owl metadata from graph to dict.
18
-
19
- Args:
20
- graph: Graph containing owl metadata
21
-
22
- Returns:
23
- Dictionary containing owl metadata
24
-
25
- !!! note "Compliant OWL metadata"
26
- This makes the method very opinionated, but results in a compliant metadata.
27
-
28
-
29
- """
30
- # TODO: Move dataframe to dict representation
31
-
32
- query = f"""SELECT ?namespace ?prefix ?version ?created ?updated ?title ?description ?creator ?rights ?license
33
- WHERE {{
34
- ?namespace a owl:Ontology .
35
- OPTIONAL {{?namespace owl:versionInfo ?version }}.
36
- OPTIONAL {{?namespace dcterms:creator ?creator }}.
37
- OPTIONAL {{?namespace <{DEFAULT_NAMESPACE.prefix}> ?prefix }}.
38
- OPTIONAL {{?namespace dcterms:title|rdfs:label|skos:prefLabel ?title }}.
39
- OPTIONAL {{?namespace dcterms:modified ?updated }}.
40
- OPTIONAL {{?namespace dcterms:created ?created }}.
41
- OPTIONAL {{?namespace dcterms:description ?description }}.
42
- OPTIONAL {{?namespace dcterms:rights|dc:rights ?rights }}.
43
-
44
- OPTIONAL {{?namespace dcterms:license|dc:license ?license }}.
45
- FILTER (!isBlank(?namespace))
46
- FILTER (!bound(?description) || LANG(?description) = "" || LANGMATCHES(LANG(?description), "en"))
47
- FILTER (!bound(?title) || LANG(?title) = "" || LANGMATCHES(LANG(?title), "en"))
48
- }}
49
- """
50
-
51
- results = [{item for item in sublist} for sublist in list(zip(*graph.query(query), strict=True))]
52
-
53
- raw_metadata = convert_rdflib_content(
54
- {
55
- "role": RoleTypes.information,
56
- "schema": SchemaCompleteness.partial,
57
- "prefix": results[1].pop(),
58
- "namespace": Namespace(results[0].pop()),
59
- "version": results[2].pop(),
60
- "created": results[3].pop(),
61
- "updated": results[4].pop(),
62
- "title": results[5].pop(),
63
- "description": results[6].pop(),
64
- "creator": (
65
- ", ".join(remove_none_elements_from_set(results[7]))
66
- if remove_none_elements_from_set(results[7])
67
- else None
68
- ),
69
- "rights": results[8].pop(),
70
- "license": results[9].pop(),
71
- }
72
- )
73
-
74
- return make_metadata_compliant(raw_metadata)
75
-
76
-
77
- def make_metadata_compliant(metadata: dict) -> dict:
78
- """Attempts to fix errors in metadata, otherwise defaults to values that will pass validation.
79
-
80
- Args:
81
- metadata: Dictionary containing metadata
82
-
83
- Returns:
84
- Dictionary containing metadata with fixed errors
85
- """
86
-
87
- metadata = fix_namespace(metadata, default=Namespace("http://purl.org/cognite/neat#"))
88
- metadata = fix_prefix(metadata)
89
- metadata = fix_version(metadata)
90
- metadata = fix_date(metadata, date_type="created", default=datetime.datetime.now().replace(microsecond=0))
91
- metadata = fix_date(metadata, date_type="updated", default=datetime.datetime.now().replace(microsecond=0))
92
- metadata = fix_title(metadata)
93
- metadata = fix_description(metadata)
94
- metadata = fix_author(metadata, "creator")
95
- metadata = fix_rights(metadata)
96
- metadata = fix_license(metadata)
97
-
98
- return metadata
99
-
100
-
101
- def fix_license(metadata: dict, default: str = "Unknown license") -> dict:
102
- if license := metadata.get("license", None):
103
- if not isinstance(license, str):
104
- metadata["license"] = default
105
- elif isinstance(license, str) and len(license) == 0:
106
- metadata["license"] = default
107
- else:
108
- metadata["license"] = default
109
- return metadata
110
-
111
-
112
- def fix_rights(metadata: dict, default: str = "Unknown rights") -> dict:
113
- if rights := metadata.get("rights", None):
114
- if not isinstance(rights, str):
115
- metadata["rights"] = default
116
- elif isinstance(rights, str) and len(rights) == 0:
117
- metadata["rights"] = default
118
- else:
119
- metadata["rights"] = default
120
- return metadata
121
-
122
-
123
- def fix_author(metadata: dict, author_type: str = "creator", default: str = "NEAT") -> dict:
124
- if author := metadata.get(author_type, None):
125
- if not isinstance(author, str) or isinstance(author, list):
126
- metadata[author_type] = default
127
- elif isinstance(author, str) and len(author) == 0:
128
- metadata[author_type] = default
129
- else:
130
- metadata[author_type] = default
131
- return metadata
132
-
133
-
134
- def fix_description(metadata: dict, default: str = "This model has been inferred from OWL ontology") -> dict:
135
- if description := metadata.get("description", None):
136
- if not isinstance(description, str) or len(description) == 0:
137
- metadata["description"] = default
138
- elif isinstance(description, str) and len(description) > 1024:
139
- metadata["description"] = metadata["description"][:1024]
140
- else:
141
- metadata["description"] = default
142
- return metadata
143
-
144
-
145
- def fix_prefix(metadata: dict, default: str = "neat") -> dict:
146
- if prefix := metadata.get("prefix", None):
147
- if not isinstance(prefix, str) or not re.match(PREFIX_COMPLIANCE_REGEX, prefix):
148
- metadata["prefix"] = default
149
- else:
150
- metadata["prefix"] = default
151
- return metadata
152
-
153
-
154
- def fix_namespace(metadata: dict, default: Namespace) -> dict:
155
- if namespace := metadata.get("namespace", None):
156
- if not isinstance(namespace, Namespace):
157
- try:
158
- metadata["namespace"] = Namespace(namespace)
159
- except Exception:
160
- metadata["namespace"] = default
161
- else:
162
- metadata["namespace"] = default
163
-
164
- return metadata
165
-
166
-
167
- def fix_date(
168
- metadata: dict,
169
- date_type: str,
170
- default: datetime.datetime,
171
- ) -> dict:
172
- if date := metadata.get(date_type, None):
173
- try:
174
- if isinstance(date, datetime.datetime):
175
- return metadata
176
- elif isinstance(date, datetime.date):
177
- metadata[date_type] = datetime.datetime.combine(metadata[date_type], datetime.datetime.min.time())
178
- elif isinstance(date, str):
179
- metadata[date_type] = datetime.datetime.strptime(metadata[date_type], "%Y-%m-%dT%H:%M:%SZ")
180
- else:
181
- metadata[date_type] = default
182
- except Exception:
183
- metadata[date_type] = default
184
- else:
185
- metadata[date_type] = default
186
-
187
- return metadata
188
-
189
-
190
- def fix_version(metadata: dict, default: str = "1.0.0") -> dict:
191
- if version := metadata.get("version", None):
192
- if not re.match(VERSION_COMPLIANCE_REGEX, version):
193
- metadata["version"] = default
194
- else:
195
- metadata["version"] = default
196
-
197
- return metadata
198
-
199
-
200
- def fix_title(metadata: dict, default: str = "OWL Inferred Data Model") -> dict:
201
- if title := metadata.get("title", None):
202
- if not isinstance(title, str):
203
- metadata["title"] = default
204
- elif isinstance(title, str) and len(title) == 0:
205
- metadata["title"] = default
206
- elif isinstance(title, str) and len(title) > 255:
207
- metadata["title"] = metadata["title"][:255]
208
- else:
209
- pass
210
- else:
211
- metadata["title"] = default
212
-
213
- return metadata
@@ -1,203 +0,0 @@
1
- from typing import cast
2
-
3
- import numpy as np
4
- import pandas as pd
5
- from rdflib import Graph
6
-
7
- from cognite.neat.rules.models._base import MatchType
8
- from cognite.neat.utils.rdf_ import remove_namespace_from_uri
9
-
10
- from ._owl2classes import _data_type_property_class, _object_property_class, _thing_class
11
-
12
-
13
- def parse_owl_properties(graph: Graph, language: str = "en") -> list[dict]:
14
- """Parse owl properties from graph to pandas dataframe.
15
-
16
- Args:
17
- graph: Graph containing owl properties
18
- language: Language to use for parsing, by default "en"
19
-
20
- Returns:
21
- List of dictionaries containing owl properties
22
- """
23
-
24
- query = """
25
-
26
- SELECT ?class ?property ?name ?description ?type ?minCount ?maxCount ?reference
27
- ?match ?propertyType
28
- WHERE {
29
- ?property a ?propertyType.
30
- FILTER (?propertyType IN (owl:ObjectProperty, owl:DatatypeProperty ) )
31
- OPTIONAL {?property rdfs:domain ?class }.
32
- OPTIONAL {?property rdfs:range ?type }.
33
- OPTIONAL {?property rdfs:label ?name }.
34
- OPTIONAL {?property rdfs:comment ?description} .
35
- OPTIONAL {?property owl:maxCardinality ?maxCount} .
36
- OPTIONAL {?property owl:minCardinality ?minCount} .
37
- FILTER (!isBlank(?property))
38
- FILTER (!bound(?type) || !isBlank(?type))
39
- FILTER (!bound(?class) || !isBlank(?class))
40
- FILTER (!bound(?name) || LANG(?name) = "" || LANGMATCHES(LANG(?name), "en"))
41
- FILTER (!bound(?description) || LANG(?description) = "" || LANGMATCHES(LANG(?description), "en"))
42
- BIND(IF(bound(?minCount), ?minCount, 0) AS ?minCount)
43
- BIND(IF(bound(?maxCount), ?maxCount, 1) AS ?maxCount)
44
- }
45
- """
46
-
47
- raw_df = _parse_raw_dataframe(cast(list[tuple], list(graph.query(query.replace("en", language)))))
48
- if raw_df.empty:
49
- return []
50
-
51
- # group values and clean up
52
- processed_df = _clean_up_properties(raw_df)
53
-
54
- # make compliant
55
- processed_df = make_properties_compliant(processed_df)
56
-
57
- # drop column _property_type, which was a helper column:
58
- processed_df.drop(columns=["_property_type"], inplace=True)
59
-
60
- return processed_df.to_dict(orient="records")
61
-
62
-
63
- def _parse_raw_dataframe(query_results: list[tuple]) -> pd.DataFrame:
64
- df = pd.DataFrame(
65
- query_results,
66
- columns=[
67
- "Class",
68
- "Property",
69
- "Name",
70
- "Description",
71
- "Value Type",
72
- "Min Count",
73
- "Max Count",
74
- "Reference",
75
- "Match Type",
76
- "_property_type",
77
- ],
78
- )
79
- if df.empty:
80
- return df
81
-
82
- df.replace(np.nan, "", regex=True, inplace=True)
83
-
84
- df.Reference = df.Property
85
- df.Class = df.Class.apply(lambda x: remove_namespace_from_uri(x))
86
- df.Property = df.Property.apply(lambda x: remove_namespace_from_uri(x))
87
- df["Value Type"] = df["Value Type"].apply(lambda x: remove_namespace_from_uri(x))
88
- df["Match Type"] = len(df) * [MatchType.exact]
89
- df["Comment"] = len(df) * [None]
90
- df["_property_type"] = df["_property_type"].apply(lambda x: remove_namespace_from_uri(x))
91
-
92
- return df
93
-
94
-
95
- def _clean_up_properties(df: pd.DataFrame) -> pd.DataFrame:
96
- class_grouped_dfs = df.groupby("Class")
97
-
98
- clean_list = []
99
-
100
- for class_, class_grouped_df in class_grouped_dfs:
101
- property_grouped_dfs = class_grouped_df.groupby("Property")
102
- for property_, property_grouped_df in property_grouped_dfs:
103
- clean_list += [
104
- {
105
- "Class": class_,
106
- "Property": property_,
107
- "Name": property_grouped_df["Name"].unique()[0],
108
- "Description": "\n".join(list(property_grouped_df.Description.unique()))[:1024],
109
- "Value Type": property_grouped_df["Value Type"].unique()[0],
110
- "Min Count": property_grouped_df["Min Count"].unique()[0],
111
- "Max Count": property_grouped_df["Max Count"].unique()[0],
112
- "Reference": property_grouped_df["Reference"].unique()[0],
113
- "Match Type": property_grouped_df["Match Type"].unique()[0],
114
- "Comment": property_grouped_df["Comment"].unique()[0],
115
- "_property_type": property_grouped_df["_property_type"].unique()[0],
116
- }
117
- ]
118
-
119
- df = pd.DataFrame(clean_list)
120
- df.replace("", None, inplace=True)
121
-
122
- return df
123
-
124
-
125
- def make_properties_compliant(properties: pd.DataFrame) -> pd.DataFrame:
126
- # default to None if "Min Count" is not specified
127
- properties["Min Count"] = properties["Min Count"].apply(lambda x: 0 if not isinstance(x, int) or x == "" else x)
128
-
129
- # default to None if "Max Count" is not specified
130
- properties["Max Count"] = properties["Max Count"].apply(lambda x: 1 if not isinstance(x, int) or x == "" else x)
131
-
132
- # Replace empty or non-string values in "Match Type" column with "exact"
133
- properties["Match Type"] = properties["Match Type"].fillna("exact")
134
- properties["Match Type"] = properties["Match Type"].apply(
135
- lambda x: "exact" if not isinstance(x, str) or len(x) == 0 else x
136
- )
137
-
138
- # Replace empty or non-string values in "Comment" column with a default value
139
- properties["Comment"] = properties["Comment"].fillna("Imported from Ontology by NEAT")
140
- properties["Comment"] = properties["Comment"].apply(
141
- lambda x: "Imported from Ontology by NEAT" if not isinstance(x, str) or len(x) == 0 else x
142
- )
143
-
144
- # Reduce length of elements in the "Description" column to 1024 characters
145
- properties["Description"] = properties["Description"].apply(lambda x: x[:1024] if isinstance(x, str) else None)
146
-
147
- # fixes and additions
148
- properties = fix_dangling_properties(properties)
149
- properties = fix_missing_property_value_type(properties)
150
-
151
- return properties
152
-
153
-
154
- def fix_dangling_properties(properties: pd.DataFrame) -> pd.DataFrame:
155
- """This method fixes properties which are missing a domain definition in the ontology.
156
-
157
- Args:
158
- properties: Dataframe containing properties
159
-
160
- Returns:
161
- Dataframe containing properties with fixed domain
162
- """
163
- domain = {
164
- "ObjectProperty": _object_property_class()["Class"],
165
- "DatatypeProperty": _data_type_property_class()["Class"],
166
- }
167
-
168
- # apply missing range
169
- properties["Class"] = properties.apply(
170
- lambda row: (
171
- domain[row._property_type]
172
- if row._property_type == "ObjectProperty" and pd.isna(row["Class"])
173
- else domain["DatatypeProperty"]
174
- if pd.isna(row["Class"])
175
- else row["Class"]
176
- ),
177
- axis=1,
178
- )
179
- return properties
180
-
181
-
182
- def fix_missing_property_value_type(properties: pd.DataFrame) -> pd.DataFrame:
183
- """This method fixes properties which are missing a range definition in the ontology.
184
-
185
- Args:
186
- properties: Dataframe containing properties
187
-
188
- Returns:
189
- Dataframe containing properties with fixed range
190
- """
191
- # apply missing range
192
- properties["Value Type"] = properties.apply(
193
- lambda row: (
194
- _thing_class()["Class"]
195
- if row._property_type == "ObjectProperty" and pd.isna(row["Value Type"])
196
- else "string"
197
- if pd.isna(row["Value Type"])
198
- else row["Value Type"]
199
- ),
200
- axis=1,
201
- )
202
-
203
- return properties