cognite-neat 0.87.4__py3-none-any.whl → 0.88.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (132) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/data_classes/rest.py +0 -19
  3. cognite/neat/app/api/explorer.py +6 -4
  4. cognite/neat/app/api/routers/crud.py +11 -21
  5. cognite/neat/app/api/routers/workflows.py +24 -94
  6. cognite/neat/graph/extractors/_classic_cdf/_assets.py +8 -2
  7. cognite/neat/graph/extractors/_mock_graph_generator.py +2 -2
  8. cognite/neat/graph/loaders/_base.py +17 -12
  9. cognite/neat/graph/loaders/_rdf2asset.py +223 -58
  10. cognite/neat/graph/loaders/_rdf2dms.py +1 -1
  11. cognite/neat/graph/stores/_base.py +5 -0
  12. cognite/neat/rules/analysis/_asset.py +31 -1
  13. cognite/neat/rules/importers/_inference2rules.py +31 -35
  14. cognite/neat/rules/models/information/_rules.py +1 -1
  15. cognite/neat/workflows/steps/data_contracts.py +17 -43
  16. cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
  17. cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
  18. cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
  19. cognite/neat/workflows/steps_registry.py +5 -7
  20. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/METADATA +1 -1
  21. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/RECORD +24 -132
  22. cognite/neat/app/api/routers/core.py +0 -91
  23. cognite/neat/app/api/routers/data_exploration.py +0 -336
  24. cognite/neat/app/api/routers/rules.py +0 -203
  25. cognite/neat/legacy/__init__.py +0 -0
  26. cognite/neat/legacy/graph/__init__.py +0 -3
  27. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
  28. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
  29. cognite/neat/legacy/graph/examples/__init__.py +0 -10
  30. cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
  31. cognite/neat/legacy/graph/exceptions.py +0 -90
  32. cognite/neat/legacy/graph/extractors/__init__.py +0 -6
  33. cognite/neat/legacy/graph/extractors/_base.py +0 -14
  34. cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
  35. cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
  36. cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
  37. cognite/neat/legacy/graph/loaders/__init__.py +0 -23
  38. cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
  39. cognite/neat/legacy/graph/loaders/_base.py +0 -67
  40. cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
  41. cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
  42. cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
  43. cognite/neat/legacy/graph/loaders/core/models.py +0 -136
  44. cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
  45. cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
  46. cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
  47. cognite/neat/legacy/graph/loaders/validator.py +0 -87
  48. cognite/neat/legacy/graph/models.py +0 -6
  49. cognite/neat/legacy/graph/stores/__init__.py +0 -13
  50. cognite/neat/legacy/graph/stores/_base.py +0 -400
  51. cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
  52. cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
  53. cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
  54. cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
  55. cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
  56. cognite/neat/legacy/graph/transformations/__init__.py +0 -0
  57. cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
  58. cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
  59. cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
  60. cognite/neat/legacy/graph/transformations/transformer.py +0 -322
  61. cognite/neat/legacy/rules/__init__.py +0 -0
  62. cognite/neat/legacy/rules/analysis.py +0 -231
  63. cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
  64. cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
  65. cognite/neat/legacy/rules/examples/__init__.py +0 -18
  66. cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
  67. cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
  68. cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
  69. cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
  70. cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
  71. cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
  72. cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
  73. cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
  74. cognite/neat/legacy/rules/exceptions.py +0 -2972
  75. cognite/neat/legacy/rules/exporters/__init__.py +0 -20
  76. cognite/neat/legacy/rules/exporters/_base.py +0 -45
  77. cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
  78. cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
  79. cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
  80. cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
  81. cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
  82. cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
  83. cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
  84. cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
  85. cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
  86. cognite/neat/legacy/rules/exporters/_validation.py +0 -146
  87. cognite/neat/legacy/rules/importers/__init__.py +0 -22
  88. cognite/neat/legacy/rules/importers/_base.py +0 -66
  89. cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
  90. cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
  91. cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
  92. cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
  93. cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
  94. cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
  95. cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
  96. cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
  97. cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
  98. cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
  99. cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
  100. cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
  101. cognite/neat/legacy/rules/models/__init__.py +0 -5
  102. cognite/neat/legacy/rules/models/_base.py +0 -151
  103. cognite/neat/legacy/rules/models/raw_rules.py +0 -316
  104. cognite/neat/legacy/rules/models/rdfpath.py +0 -237
  105. cognite/neat/legacy/rules/models/rules.py +0 -1289
  106. cognite/neat/legacy/rules/models/tables.py +0 -9
  107. cognite/neat/legacy/rules/models/value_types.py +0 -118
  108. cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
  109. cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  110. cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  111. cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  112. cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
  113. cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  114. cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
  115. cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
  116. cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
  117. cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
  118. cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  119. cognite/neat/workflows/migration/__init__.py +0 -0
  120. cognite/neat/workflows/migration/steps.py +0 -91
  121. cognite/neat/workflows/migration/wf_manifests.py +0 -33
  122. cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
  123. cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
  124. cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
  125. cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
  126. cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
  127. cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
  128. cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
  129. cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
  130. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/LICENSE +0 -0
  131. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/WHEEL +0 -0
  132. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/entry_points.txt +0 -0
@@ -1,194 +0,0 @@
1
- import sys
2
- from collections.abc import Sequence
3
- from datetime import datetime
4
- from typing import Any, cast
5
-
6
- import pandas as pd
7
- from cognite.client import CogniteClient
8
- from cognite.client.data_classes.data_modeling import (
9
- DataModel,
10
- DirectRelation,
11
- EdgeConnection,
12
- MappedProperty,
13
- SingleHopConnectionDefinition,
14
- View,
15
- )
16
- from cognite.client.data_classes.data_modeling.data_types import ListablePropertyType
17
- from cognite.client.data_classes.data_modeling.ids import DataModelIdentifier, ViewId
18
-
19
- from cognite.neat.legacy.rules.models.tables import Tables
20
- from cognite.neat.legacy.rules.models.value_types import DMS_VALUE_TYPE_MAPPINGS, XSD_VALUE_TYPE_MAPPINGS
21
-
22
- from ._base import BaseImporter
23
-
24
- if sys.version_info >= (3, 11):
25
- from typing import Self
26
- else:
27
- from typing_extensions import Self
28
-
29
-
30
- class DMSImporter(BaseImporter):
31
- """
32
- Converts a Data Model Storage (DMS) data model to a set of transformation rules.
33
-
34
- Args:
35
- views: List of views to convert to transformation rules.
36
- """
37
-
38
- def __init__(self, views: Sequence[View] | DataModel[View], metadata: dict[str, str | float] | None = None):
39
- if isinstance(views, DataModel):
40
- self.views = views.views
41
- else:
42
- self.views = list(views)
43
-
44
- if metadata is None:
45
- self.metadata = self._default_metadata()
46
- if len(self.views) == 1:
47
- self.metadata["version"] = self.views[0].version
48
- self.metadata["prefix"] = self.views[0].space
49
- else:
50
- self.metadata = metadata
51
-
52
- if isinstance(views, DataModel):
53
- if views.name:
54
- self.metadata["title"] = views.name
55
- if views.description:
56
- self.metadata["description"] = views.description
57
- if views.space:
58
- self.metadata["prefix"] = views.space
59
- if views.external_id:
60
- self.metadata["suffix"] = views.external_id
61
- if views.version:
62
- self.metadata["version"] = views.version
63
-
64
- @classmethod
65
- def from_cdf(cls, client: CogniteClient, data_model: DataModelIdentifier) -> Self:
66
- """
67
- Converts a Data Model Storage (DMS) data model to a set of transformation rules.
68
-
69
- Args:
70
- client: Cognite client to use for fetching data models.
71
- data_model: List of data models to convert to transformation rules.
72
-
73
- !!! Note
74
- Beware that `DataModelIdentifier` is just type hint that you cannot instantiate
75
- directly, e.g. `id = DataModelIdentifier(space=, external_id, version)` will fail.
76
- Instead, provide `data_models` as a list of three element tuples,
77
- e.g. `[(space, external_id, version)]`, or two element tuples,
78
- e.g. `[(space, external_id)]`, where `space` represents CDF space name,
79
- `external_id` represents data model external ID, and `version`
80
- represents data model version. If `version` is not provided, whatever is
81
- the first version CDF returns it will give you that one.
82
-
83
- """
84
- data_model = client.data_modeling.data_models.retrieve(data_model, inline_views=True)[0]
85
-
86
- # Avoid duplicate views (same view can be used by multiple data models)
87
- views_by_id: dict[ViewId, View] = {}
88
- for view in data_model.views:
89
- views_by_id[view.as_id()] = view
90
-
91
- if metadata := cls._to_metadata(data_model):
92
- return cls(list(views_by_id.values()), metadata)
93
- else:
94
- return cls(list(views_by_id.values()))
95
-
96
- def to_tables(self) -> dict[str, pd.DataFrame]:
97
- classes: list[dict[str, str | float]] = []
98
- properties: list[dict[str, str | float]] = []
99
- for view in self.views:
100
- class_id = view.external_id
101
- classes.append(
102
- {
103
- "Class": class_id,
104
- "Name": view.name or float("nan"),
105
- "Description": view.description or float("nan"),
106
- }
107
- )
108
- for prop_id, prop in view.properties.items():
109
- if isinstance(prop, MappedProperty):
110
- # Edge 1-1
111
- if isinstance(prop.type, DirectRelation):
112
- type_ = cast(ViewId, prop.source).external_id
113
- else:
114
- type_ = cast(
115
- str, DMS_VALUE_TYPE_MAPPINGS.get(type(prop.type), XSD_VALUE_TYPE_MAPPINGS["string"]).xsd
116
- )
117
-
118
- default_value = prop.default_value
119
- name = prop.name or prop_id
120
- description = prop.description or float("nan")
121
-
122
- # Edge 1-many
123
- elif isinstance(prop, EdgeConnection):
124
- type_ = prop.source.external_id
125
- default_value = None
126
- name = prop.name or prop_id
127
- description = prop.description or float("nan")
128
- else:
129
- raise NotImplementedError(f"Property type {type(prop)} not supported")
130
-
131
- max_count: str | float = "1"
132
- if isinstance(prop, SingleHopConnectionDefinition) or (
133
- isinstance(prop, MappedProperty)
134
- and isinstance(prop.type, ListablePropertyType)
135
- and prop.type.is_list
136
- ):
137
- max_count = float("nan")
138
-
139
- min_count: str | float = "1"
140
- if isinstance(prop, SingleHopConnectionDefinition) or (
141
- isinstance(prop, MappedProperty) and prop.nullable
142
- ):
143
- min_count = "0"
144
-
145
- properties.append(
146
- {
147
- "Class": class_id,
148
- "Property": prop_id,
149
- "Name": name,
150
- "Description": description,
151
- "Type": type_,
152
- "Default": cast(Any, default_value), # fixes issues with mypy
153
- "Min Count": min_count,
154
- "Max Count": max_count,
155
- "Rule Type": "rdfpath",
156
- "Rule": f"cim:{class_id}(cim:{prop_id})",
157
- }
158
- )
159
-
160
- return {
161
- Tables.metadata: pd.Series(self.metadata).to_frame("value").reset_index(),
162
- Tables.classes: pd.DataFrame(classes),
163
- Tables.properties: pd.DataFrame(properties),
164
- }
165
-
166
- @staticmethod
167
- def _to_metadata(data_model: DataModel) -> dict:
168
- mapping = {
169
- "space": "cdf_space_name",
170
- "external_id": "data_model_name",
171
- "version": "version",
172
- "description": "description",
173
- "created_time": "created",
174
- "last_updated_time": "updated",
175
- "name": "title",
176
- }
177
-
178
- metadata = {mapping.get(k, k): v for k, v in data_model.to_pandas().value.to_dict().items() if k in mapping}
179
-
180
- metadata["prefix"] = metadata["data_model_name"]
181
- metadata["creator"] = "Unknown"
182
-
183
- if "created" in metadata:
184
- metadata["created"] = datetime.utcfromtimestamp(metadata["created"] / 1e3)
185
- if "updated" in metadata:
186
- metadata["updated"] = datetime.utcfromtimestamp(metadata["updated"] / 1e3)
187
-
188
- return metadata
189
-
190
- def _repr_html_(self) -> str:
191
- """Pretty display of the DMSImporter object in a Notebook"""
192
- dump = self.metadata
193
- dump["views_count"] = len(self.views)
194
- return pd.Series(dump).to_frame("value")._repr_html_() # type: ignore[operator]
@@ -1,308 +0,0 @@
1
- """This module performs importing of graph to TransformationRules pydantic class.
2
- In more details, it traverses the graph and abstracts class and properties, basically
3
- generating a list of rules based on which nodes that form the graph are made.
4
- """
5
-
6
- import warnings
7
- from datetime import datetime
8
- from typing import cast
9
-
10
- import pandas as pd
11
- from rdflib import Graph, Literal, Namespace, URIRef
12
-
13
- from cognite.neat.constants import get_default_prefixes
14
- from cognite.neat.legacy.rules import exceptions
15
- from cognite.neat.legacy.rules.exporters._rules2rules import to_dms_name
16
- from cognite.neat.legacy.rules.models.tables import Tables
17
- from cognite.neat.utils.rdf_ import get_namespace, remove_namespace_from_uri, uri_to_short_form
18
-
19
- from ._base import BaseImporter
20
-
21
-
22
- class GraphImporter(BaseImporter):
23
- """
24
- Convert RDF graph, containing nodes and edges, to tables/ transformation rules / Excel file.
25
-
26
- Args:
27
- graph: RDF graph to be imported
28
- max_number_of_instance: Max number of instances to be analyzed for each class in RDF graph
29
-
30
-
31
- !!! Note
32
- Due to high degree of flexibility of RDF graphs, the RDF graph is not guaranteed to be
33
- converted to a complete and/or valid `Rules` object. Therefore, it is recommended to
34
- call method `to_raw_rules` to get the raw rules which one should export to Excel file
35
- using `exporter.ExcelExporter` and then manually edit the Excel file by checking
36
- validation report file produced by the exporter.
37
-
38
- """
39
-
40
- def __init__(self, graph: Graph, max_number_of_instance: int = -1):
41
- self.graph = graph
42
- self.max_number_of_instance = max_number_of_instance
43
-
44
- def to_tables(self) -> dict[str, pd.DataFrame]:
45
- data_model, prefixes = _graph_to_data_model_dict(self.graph, self.max_number_of_instance)
46
-
47
- return {
48
- Tables.metadata: _parse_metadata_df(),
49
- Tables.classes: _parse_classes_df(data_model, prefixes),
50
- Tables.properties: _parse_properties_df(data_model, prefixes),
51
- Tables.prefixes: _parse_prefixes_df(prefixes),
52
- }
53
-
54
-
55
- def _create_default_properties_parsing_config() -> dict[str, tuple[str, ...]]:
56
- # TODO: these are to be read from Property pydantic model
57
- return {
58
- "header": (
59
- "Class",
60
- "Property",
61
- "Description",
62
- "Type",
63
- "Min Count",
64
- "Max Count",
65
- "Rule Type",
66
- "Rule",
67
- "Source",
68
- "Source Entity Name",
69
- "Match Type",
70
- "Comment",
71
- )
72
- }
73
-
74
-
75
- def _create_default_classes_parsing_config() -> dict[str, tuple[str, ...]]:
76
- # TODO: these are to be read from Class pydantic model
77
- return {"header": ("Class", "Description", "Parent Class", "Source", "Source Entity Name", "Match Type", "Comment")}
78
-
79
-
80
- def _parse_prefixes_df(prefixes: dict[str, Namespace]) -> pd.DataFrame:
81
- return pd.DataFrame.from_dict({"Prefix": list(prefixes.keys()), "URI": [str(uri) for uri in prefixes.values()]})
82
-
83
-
84
- def _parse_metadata_df() -> pd.DataFrame:
85
- clean_list = {
86
- "namespace": "http://purl.org/cognite/neat/",
87
- "prefix": "playground",
88
- "external_id": "neat",
89
- "version": "1.0.0",
90
- "isCurrentVersion": True,
91
- "created": datetime.utcnow(),
92
- "updated": datetime.utcnow(),
93
- "title": "RDF Graph Inferred Data Model",
94
- "description": "This data model has been inferred with NEAT",
95
- "creator": "NEAT",
96
- "contributor": "NEAT",
97
- "rights": "Unknown rights of usage",
98
- "license": "Unknown license",
99
- }
100
- return pd.DataFrame(list(clean_list.items()), columns=["Key", "Value"])
101
-
102
-
103
- def _parse_classes_df(data_model: dict, prefixes: dict, parsing_config: dict | None = None) -> pd.DataFrame:
104
- if parsing_config is None:
105
- parsing_config = _create_default_classes_parsing_config()
106
-
107
- class_rows = []
108
-
109
- for class_ in data_model:
110
- sanitized_class = to_dms_name(class_, "class")
111
- class_rows.append(
112
- [
113
- sanitized_class,
114
- None,
115
- None,
116
- str(prefixes[data_model[class_]["uri"].split(":")[0]]) + class_,
117
- class_,
118
- "exact",
119
- "Parsed from RDF graph",
120
- ]
121
- )
122
-
123
- return pd.DataFrame(class_rows, columns=parsing_config["header"])
124
-
125
-
126
- def _parse_properties_df(data_model: dict, prefixes: dict, parsing_config: dict | None = None) -> pd.DataFrame:
127
- if parsing_config is None:
128
- parsing_config = _create_default_properties_parsing_config()
129
-
130
- property_rows = []
131
-
132
- for class_ in data_model:
133
- sanitized_class = to_dms_name(class_, "class")
134
- for property_ in data_model[class_]["properties"]:
135
- for type_ in data_model[class_]["properties"][property_]["value_type"]:
136
- sanitized_property = to_dms_name(property_, "property")
137
-
138
- max_count = max(data_model[class_]["properties"][property_]["occurrence"])
139
-
140
- property_rows.append(
141
- [
142
- sanitized_class,
143
- sanitized_property,
144
- None,
145
- to_dms_name(type_, "value-type"),
146
- 0, # setting min count to 0 to be more flexible (all properties are optional)
147
- None if max_count > 1 else 1,
148
- "rdfpath",
149
- f'{data_model[class_]["uri"]}({data_model[class_]["properties"][property_]["uri"]})',
150
- str(prefixes[data_model[class_]["properties"][property_]["uri"].split(":")[0]]) + property_,
151
- property_,
152
- "exact",
153
- "Parsed from RDF graph",
154
- ]
155
- )
156
-
157
- return pd.DataFrame(property_rows, columns=parsing_config["header"])
158
-
159
-
160
- def _graph_to_data_model_dict(graph: Graph, max_number_of_instance: int = -1) -> tuple[dict, dict]:
161
- """Convert RDF graph to dictionary defining data model and prefixes of the graph
162
-
163
- Args:
164
- graph: RDF graph to be converted to TransformationRules object
165
- max_number_of_instance: Max number of instances to be considered for each class
166
-
167
- Returns:
168
- Tuple of data model and prefixes of the graph
169
- """
170
- data_model: dict[str, dict] = {}
171
-
172
- prefixes: dict[str, Namespace] = get_default_prefixes()
173
-
174
- for class_ in _get_class_ids(graph):
175
- _add_uri_namespace_to_prefixes(class_, prefixes)
176
- class_name = remove_namespace_from_uri(class_)
177
-
178
- if class_name in data_model:
179
- warnings.warn(
180
- exceptions.GraphClassNameCollision(class_name=class_name).message,
181
- category=exceptions.GraphClassNameCollision,
182
- stacklevel=2,
183
- )
184
- class_name = f"{class_name}_{len(data_model)+1}"
185
-
186
- data_model[class_name] = {"properties": {}, "uri": uri_to_short_form(class_, prefixes)}
187
-
188
- for instance in _get_class_instance_ids(graph, class_, max_number_of_instance):
189
- for property_, occurrence, data_type, object_type in _define_instance_properties(graph, instance):
190
- property_name = remove_namespace_from_uri(property_)
191
- _add_uri_namespace_to_prefixes(property_, prefixes)
192
-
193
- type_ = data_type if data_type else object_type
194
-
195
- # this is to skip rdf:type property
196
- if not type_:
197
- continue
198
-
199
- type_name = remove_namespace_from_uri(type_)
200
- _add_uri_namespace_to_prefixes(type_, prefixes)
201
-
202
- if property_name not in data_model[class_name]["properties"]:
203
- data_model[class_name]["properties"][property_name] = {
204
- "occurrence": {occurrence.value},
205
- "value_type": {type_name: {"uri": uri_to_short_form(type_, prefixes)}},
206
- "uri": uri_to_short_form(property_, prefixes),
207
- }
208
-
209
- elif type_name not in data_model[class_name]["properties"][property_name]["value_type"]:
210
- data_model[class_name]["properties"][property_name]["value_type"][type_name] = {
211
- "uri": uri_to_short_form(type_, prefixes)
212
- }
213
- warnings.warn(
214
- exceptions.GraphClassPropertyMultiValueTypes(
215
- class_name=class_name,
216
- property_name=property_name,
217
- types=list(data_model[class_name]["properties"][property_name]["value_type"].keys()),
218
- ).message,
219
- category=exceptions.GraphClassPropertyMultiValueTypes,
220
- stacklevel=3,
221
- )
222
-
223
- elif occurrence.value not in data_model[class_name]["properties"][property_name]["occurrence"]:
224
- data_model[class_name]["properties"][property_name]["occurrence"].add(occurrence.value)
225
-
226
- warnings.warn(
227
- exceptions.GraphClassPropertyMultiOccurrence(
228
- class_name=class_name, property_name=property_name
229
- ).message,
230
- category=exceptions.GraphClassPropertyMultiOccurrence,
231
- stacklevel=3,
232
- )
233
- else:
234
- continue
235
-
236
- return data_model, prefixes
237
-
238
-
239
- def _add_uri_namespace_to_prefixes(URI: URIRef, prefixes: dict[str, Namespace]):
240
- """Add URI to prefixes dict if not already present
241
-
242
- Args:
243
- URI: URI from which namespace is being extracted
244
- prefixes: Dict of prefixes and namespaces
245
- """
246
- if Namespace(get_namespace(URI)) not in prefixes.values():
247
- prefixes[f"prefix-{len(prefixes)+1}"] = Namespace(get_namespace(URI))
248
-
249
-
250
- def _get_class_ids(graph: Graph) -> list[URIRef]:
251
- """Get instances ids for a given class
252
-
253
- Args:
254
- graph: Graph containing class instances
255
- class_: Class for which instances are to be found
256
- namespace: Namespace of given class (to avoid writing long URIs)
257
- limit: Max number of instances to return, by default -1 meaning all instances
258
-
259
- Returns:
260
- List of class instance URIs
261
- """
262
-
263
- query_statement = """SELECT ?class (count(?s) as ?instances )
264
- WHERE { ?s a ?class . }
265
- group by ?class order by DESC(?instances)"""
266
-
267
- return [cast(tuple[URIRef, int], res)[0] for res in list(graph.query(query_statement))]
268
-
269
-
270
- def _get_class_instance_ids(graph: Graph, class_id: URIRef, max_number_of_instance: int = -1) -> list[URIRef]:
271
- """Get instances ids for a given class
272
-
273
- Args:
274
- graph: Graph containing class instances
275
- class_id: Class id for which instances are to be found
276
-
277
- Returns:
278
- List of class instance URIs
279
- """
280
-
281
- query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .}".replace("class", class_id)
282
- if max_number_of_instance > 0:
283
- query_statement += f" LIMIT {max_number_of_instance}"
284
- return [cast(tuple[URIRef], res)[0] for res in list(graph.query(query_statement))]
285
-
286
-
287
- def _define_instance_properties(
288
- graph: Graph, instance_id: URIRef
289
- ) -> list[tuple[URIRef, Literal, URIRef | None, None | URIRef]]:
290
- """Get properties of a given instance
291
-
292
- Args:
293
- graph: Graph containing class instances
294
- instance_id: Instance id for which properties are to be found and defined
295
-
296
- Returns:
297
- List of properties of a given instance
298
- """
299
- query_statement = """SELECT ?property (count(?property) as ?occurrence) ?dataType ?objectType
300
- WHERE {<instance_id> ?property ?value .
301
- BIND(datatype(?value) AS ?dataType)
302
- OPTIONAL {?value rdf:type ?objectType .}
303
- }
304
- GROUP BY ?property ?dataType ?objectType"""
305
-
306
- results = graph.query(query_statement.replace("instance_id", instance_id))
307
-
308
- return [cast(tuple[URIRef, Literal, URIRef | None, None | URIRef], res) for res in list(results)]
@@ -1,39 +0,0 @@
1
- import json
2
- from pathlib import Path
3
- from typing import Literal
4
-
5
- from ._dict2rules import ArbitraryDictImporter
6
-
7
-
8
- class ArbitraryJSONImporter(ArbitraryDictImporter):
9
- """
10
- Importer for data given in a JSON file or string.
11
-
12
- This importer infers the data model from the JSON string based on the shape of the data.
13
-
14
- Args:
15
- json_path_or_str: Path to file with JSON or a JSON string.
16
- relationship_direction: Direction of relationships, either "parent-to-child" or "child-to-parent". JSON
17
- files are nested with children nested inside parents. This option determines whether the resulting rules
18
- will have an edge from parents to children or from children to parents.
19
-
20
- """
21
-
22
- def __init__(
23
- self,
24
- json_path_or_str: Path,
25
- relationship_direction: Literal["parent-to-child", "child-to-parent"] = "parent-to-child",
26
- ):
27
- if isinstance(json_path_or_str, str):
28
- data = json.loads(json_path_or_str)
29
- super().__init__(data, relationship_direction)
30
- elif isinstance(json_path_or_str, Path):
31
- if not json_path_or_str.exists():
32
- raise ValueError(f"File {json_path_or_str} does not exist")
33
- if json_path_or_str.suffix != ".json":
34
- raise ValueError(f"File {json_path_or_str} is not a JSON file")
35
- self.json_path = json_path_or_str
36
- data = json.loads(json_path_or_str.read_text())
37
- super().__init__(data, relationship_direction)
38
- else:
39
- raise TypeError(f"Expected Path or str, got {type(json_path_or_str)}")
@@ -1,3 +0,0 @@
1
- from ._owl2rules import OWLImporter
2
-
3
- __all__ = ["OWLImporter"]