cognite-neat 0.87.4__py3-none-any.whl → 0.88.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (132) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/data_classes/rest.py +0 -19
  3. cognite/neat/app/api/explorer.py +6 -4
  4. cognite/neat/app/api/routers/crud.py +11 -21
  5. cognite/neat/app/api/routers/workflows.py +24 -94
  6. cognite/neat/graph/extractors/_classic_cdf/_assets.py +8 -2
  7. cognite/neat/graph/extractors/_mock_graph_generator.py +2 -2
  8. cognite/neat/graph/loaders/_base.py +17 -12
  9. cognite/neat/graph/loaders/_rdf2asset.py +223 -58
  10. cognite/neat/graph/loaders/_rdf2dms.py +1 -1
  11. cognite/neat/graph/stores/_base.py +5 -0
  12. cognite/neat/rules/analysis/_asset.py +31 -1
  13. cognite/neat/rules/importers/_inference2rules.py +31 -35
  14. cognite/neat/rules/models/information/_rules.py +1 -1
  15. cognite/neat/workflows/steps/data_contracts.py +17 -43
  16. cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
  17. cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
  18. cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
  19. cognite/neat/workflows/steps_registry.py +5 -7
  20. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/METADATA +1 -1
  21. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/RECORD +24 -132
  22. cognite/neat/app/api/routers/core.py +0 -91
  23. cognite/neat/app/api/routers/data_exploration.py +0 -336
  24. cognite/neat/app/api/routers/rules.py +0 -203
  25. cognite/neat/legacy/__init__.py +0 -0
  26. cognite/neat/legacy/graph/__init__.py +0 -3
  27. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
  28. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
  29. cognite/neat/legacy/graph/examples/__init__.py +0 -10
  30. cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
  31. cognite/neat/legacy/graph/exceptions.py +0 -90
  32. cognite/neat/legacy/graph/extractors/__init__.py +0 -6
  33. cognite/neat/legacy/graph/extractors/_base.py +0 -14
  34. cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
  35. cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
  36. cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
  37. cognite/neat/legacy/graph/loaders/__init__.py +0 -23
  38. cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
  39. cognite/neat/legacy/graph/loaders/_base.py +0 -67
  40. cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
  41. cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
  42. cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
  43. cognite/neat/legacy/graph/loaders/core/models.py +0 -136
  44. cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
  45. cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
  46. cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
  47. cognite/neat/legacy/graph/loaders/validator.py +0 -87
  48. cognite/neat/legacy/graph/models.py +0 -6
  49. cognite/neat/legacy/graph/stores/__init__.py +0 -13
  50. cognite/neat/legacy/graph/stores/_base.py +0 -400
  51. cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
  52. cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
  53. cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
  54. cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
  55. cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
  56. cognite/neat/legacy/graph/transformations/__init__.py +0 -0
  57. cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
  58. cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
  59. cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
  60. cognite/neat/legacy/graph/transformations/transformer.py +0 -322
  61. cognite/neat/legacy/rules/__init__.py +0 -0
  62. cognite/neat/legacy/rules/analysis.py +0 -231
  63. cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
  64. cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
  65. cognite/neat/legacy/rules/examples/__init__.py +0 -18
  66. cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
  67. cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
  68. cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
  69. cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
  70. cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
  71. cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
  72. cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
  73. cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
  74. cognite/neat/legacy/rules/exceptions.py +0 -2972
  75. cognite/neat/legacy/rules/exporters/__init__.py +0 -20
  76. cognite/neat/legacy/rules/exporters/_base.py +0 -45
  77. cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
  78. cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
  79. cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
  80. cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
  81. cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
  82. cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
  83. cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
  84. cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
  85. cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
  86. cognite/neat/legacy/rules/exporters/_validation.py +0 -146
  87. cognite/neat/legacy/rules/importers/__init__.py +0 -22
  88. cognite/neat/legacy/rules/importers/_base.py +0 -66
  89. cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
  90. cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
  91. cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
  92. cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
  93. cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
  94. cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
  95. cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
  96. cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
  97. cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
  98. cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
  99. cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
  100. cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
  101. cognite/neat/legacy/rules/models/__init__.py +0 -5
  102. cognite/neat/legacy/rules/models/_base.py +0 -151
  103. cognite/neat/legacy/rules/models/raw_rules.py +0 -316
  104. cognite/neat/legacy/rules/models/rdfpath.py +0 -237
  105. cognite/neat/legacy/rules/models/rules.py +0 -1289
  106. cognite/neat/legacy/rules/models/tables.py +0 -9
  107. cognite/neat/legacy/rules/models/value_types.py +0 -118
  108. cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
  109. cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  110. cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  111. cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  112. cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
  113. cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  114. cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
  115. cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
  116. cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
  117. cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
  118. cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  119. cognite/neat/workflows/migration/__init__.py +0 -0
  120. cognite/neat/workflows/migration/steps.py +0 -91
  121. cognite/neat/workflows/migration/wf_manifests.py +0 -33
  122. cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
  123. cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
  124. cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
  125. cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
  126. cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
  127. cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
  128. cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
  129. cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
  130. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/LICENSE +0 -0
  131. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/WHEEL +0 -0
  132. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/entry_points.txt +0 -0
@@ -1,10 +0,0 @@
1
- from pathlib import Path
2
-
3
- # we should make this a proper package that loads examples
4
- # similar how they handle it in xarray:
5
- # https://github.com/pydata/xarray/blob/main/xarray/tutorial.py
6
- # Currently there are simple paths to the examples which are then easily loaded in the notebooks
7
- nordic44_knowledge_graph = Path(__file__).parent / "Knowledge-Graph-Nordic44.xml"
8
- nordic44_knowledge_graph_dirty = (
9
- Path(__file__).parent / "Knowledge-Graph-Nordic44-dirty.xml"
10
- )
@@ -1,90 +0,0 @@
1
- """This module contains the definition of validation errors and warnings raised during graph methods"""
2
-
3
- from cognite.neat.constants import DEFAULT_DOCS_URL
4
- from cognite.neat.exceptions import NeatException
5
-
6
- DOCS_BASE_URL = f"{DEFAULT_DOCS_URL}api/exceptions.html#{__name__}"
7
-
8
-
9
- class UnsupportedPropertyType(NeatException):
10
- """Unsupported property type when processing the graph capturing sheet
11
-
12
- Args:
13
- property_type: property type that is not supported
14
- verbose: flag that indicates whether to provide enhanced exception message, by default False
15
- """
16
-
17
- type_: str = "UnsupportedPropertyType"
18
- code: int = 1000
19
- description: str = "Unsupported property type when processing the graph capturing sheet."
20
- example: str = ""
21
- fix: str = ""
22
-
23
- def __init__(self, property_type: str, verbose: bool = False):
24
- self.property_type = property_type
25
-
26
- self.message = (
27
- f"Property type {self.property_type} is not supported. "
28
- " Only the following property types are supported: DatatypeProperty and ObjectProperty"
29
- f"\nFor more information visit: {DOCS_BASE_URL}.{self.__class__.__name__}"
30
- )
31
-
32
- if verbose:
33
- self.message += f"\nDescription: {self.description}"
34
- self.message += f"\nExample: {self.example}"
35
- self.message += f"\nFix: {self.fix}"
36
- super().__init__(self.message)
37
-
38
-
39
- class NamespaceRequired(NeatException):
40
- """The functionality requires namespace in the TransformationRules.
41
-
42
- Args:
43
- functionality: functionality that requires namespace
44
- verbose: flag that indicates whether to provide enhanced exception message, by default False
45
- """
46
-
47
- type_ = "NamespaceRequired"
48
- description: str = "The functionality requires namespace in the TransformationRules."
49
- example: str = ""
50
- fix: str = ""
51
-
52
- def __init__(self, functionality: str, verbose: bool = False):
53
- self.message = (
54
- f"Namespace is required to be set in the Transformation rules"
55
- f"to use {functionality}."
56
- f"\nFor more information visit: {DOCS_BASE_URL}.{self.__class__.__name__}"
57
- )
58
-
59
- if verbose:
60
- self.message += f"\nDescription: {self.description}"
61
- self.message += f"\nExample: {self.example}"
62
- self.message += f"\nFix: {self.fix}"
63
- super().__init__(self.message)
64
-
65
-
66
- class DatasetIdRequired(NeatException):
67
- """The functionality requires data_set_id in the TransformationRules.
68
-
69
- Args:
70
- functionality: functionality that requires namespace
71
- verbose: flag that indicates whether to provide enhanced exception message, by default False
72
- """
73
-
74
- type_ = "DatasetIdRequired"
75
- description: str = "The functionality requires data_set_id in the TransformationRules."
76
- example: str = ""
77
- fix: str = ""
78
-
79
- def __init__(self, functionality: str, verbose: bool = False):
80
- self.message = (
81
- f"DataSetId is required to be set in the Transformation rules"
82
- f"to use {functionality}."
83
- f"\nFor more information visit: {DOCS_BASE_URL}.{self.__class__.__name__}"
84
- )
85
-
86
- if verbose:
87
- self.message += f"\nDescription: {self.description}"
88
- self.message += f"\nExample: {self.example}"
89
- self.message += f"\nFix: {self.fix}"
90
- super().__init__(self.message)
@@ -1,6 +0,0 @@
1
- from ._base import BaseExtractor
2
- from ._dexpi import DexpiXML
3
- from ._graph_capturing_sheet import GraphCapturingSheet
4
- from ._mock_graph_generator import MockGraphGenerator
5
-
6
- __all__ = ["BaseExtractor", "MockGraphGenerator", "GraphCapturingSheet", "DexpiXML"]
@@ -1,14 +0,0 @@
1
- from abc import abstractmethod
2
- from collections.abc import Iterable
3
-
4
- from cognite.neat.legacy.graph.models import Triple
5
-
6
-
7
- class BaseExtractor:
8
- """This is the base class for all extractors. It defines the interface that
9
- extractors must implement.
10
- """
11
-
12
- @abstractmethod
13
- def extract(self) -> Iterable[Triple]:
14
- raise NotImplementedError()
@@ -1,44 +0,0 @@
1
- import xml.etree.ElementTree as ET
2
- from pathlib import Path
3
-
4
- from rdflib import Namespace
5
-
6
- from cognite.neat.constants import DEFAULT_NAMESPACE
7
- from cognite.neat.graph.extractors._dexpi import DexpiExtractor
8
- from cognite.neat.legacy.graph.models import Triple
9
-
10
- from ._base import BaseExtractor
11
-
12
-
13
- class DexpiXML(BaseExtractor):
14
- """
15
- DEXPI-XML extractor of RDF triples
16
-
17
- Args:
18
- filepath: File path to DEXPI XML file.
19
- namespace: Optional custom namespace to use for extracted triples that define data
20
- model instances. Defaults to http://purl.org/cognite/neat/.
21
- """
22
-
23
- def __init__(
24
- self,
25
- filepath: Path | str,
26
- base_namespace: str | None = None,
27
- ):
28
- self.filepath = Path(filepath)
29
- self.namespace = Namespace(base_namespace) if isinstance(base_namespace, str | Namespace) else DEFAULT_NAMESPACE
30
-
31
- def extract(self) -> set[Triple]:
32
- """
33
- Extracts RDF triples from the graph capturing sheet.
34
-
35
- Returns:
36
- List of RDF triples, represented as tuples `(subject, predicate, object)`, that define data model instances
37
- """
38
- if self.filepath is None:
39
- raise ValueError("File path to the graph capturing sheet is not provided!")
40
-
41
- root = ET.parse(self.filepath).getroot()
42
-
43
- # removing legacy code by reusing the maintained version of DexpiExtractor
44
- return set(DexpiExtractor(root, self.namespace).extract())
@@ -1,403 +0,0 @@
1
- import logging
2
- import uuid
3
- import warnings
4
- from pathlib import Path
5
- from typing import cast
6
-
7
- import numpy as np
8
- import pandas as pd
9
- from openpyxl import Workbook, load_workbook
10
- from openpyxl.cell import Cell
11
- from openpyxl.styles import Alignment, Border, Font, NamedStyle, PatternFill, Side
12
- from openpyxl.utils import get_column_letter
13
- from openpyxl.worksheet.datavalidation import DataValidation
14
- from openpyxl.worksheet.worksheet import Worksheet
15
- from rdflib import RDF, XSD, Literal, Namespace, URIRef
16
-
17
- from cognite.neat.legacy.graph import exceptions
18
- from cognite.neat.legacy.graph.exceptions import NamespaceRequired
19
- from cognite.neat.legacy.graph.models import Triple
20
- from cognite.neat.legacy.rules.analysis import get_defined_classes, to_class_property_pairs
21
- from cognite.neat.legacy.rules.exporters._rules2rules import to_dms_name
22
- from cognite.neat.legacy.rules.models.rules import Rules
23
-
24
- from ._base import BaseExtractor
25
-
26
-
27
- class GraphCapturingSheet(BaseExtractor):
28
- """
29
- Graph capturing sheet class that provides methods for creating a graph capturing sheet and extracting RDF triples.
30
-
31
- Args:
32
- rules: Transformation rules which holds data model that is used to validate
33
- the graph capturing sheet and extract data model instances from it (i.e. RDF triples)
34
- filepath: File path to save the sheet to. Defaults to None.
35
- separator: Multi value separator at cell level. Defaults to ",".
36
- namespace: Optional custom namespace to use for extracted triples that define data
37
- model instances. Defaults to None, meaning namespace of rules will be used.
38
- store_graph_capturing_sheet: Whether to store the graph capturing sheet in the object. Will be stored in the
39
- `sheet` attribute. Defaults to False.
40
- use_source_ids : Whether to use source ids for properties and classes stored in Source column if they exist.
41
- Defaults to False, meaning that the source ids will be ignored.
42
-
43
- """
44
-
45
- def __init__(
46
- self,
47
- rules: Rules,
48
- filepath: Path | str | None = None,
49
- separator: str = ",",
50
- namespace: str | None = None,
51
- store_graph_capturing_sheet: bool = False,
52
- use_source_ids: bool = False,
53
- ):
54
- self.rules = rules
55
- self.filepath = Path(filepath) if isinstance(filepath, str | Path) else None
56
- self.separator = separator
57
- self.namespace = namespace
58
- self.store_graph_capturing_sheet = store_graph_capturing_sheet
59
- self.use_source_ids = use_source_ids
60
- self.sheet: dict[str, pd.DataFrame] = {}
61
-
62
- def create_template(self, filepath: Path | None = None, overwrite: bool = False) -> None:
63
- """
64
- Creates a graph capturing sheet template based on the transformation rules.
65
-
66
- Args:
67
- filepath: File path to save the sheet to. Defaults to None.
68
- overwrite: Overwrite existing file. Defaults to False.
69
- """
70
- if filepath is None:
71
- filepath = self.filepath
72
- if filepath is None:
73
- raise ValueError("File path to the graph capturing sheet is not provided!")
74
- if filepath.exists() and not overwrite:
75
- raise FileExistsError(f"File {filepath} already exists! Set overwrite to True to overwrite it!")
76
- rules2graph_capturing_sheet(self.rules, filepath)
77
-
78
- def extract(self) -> list[Triple]:
79
- """
80
- Extracts RDF triples from the graph capturing sheet.
81
-
82
- Returns:
83
- List of RDF triples, represented as tuples `(subject, predicate, object)`, that define data model instances
84
- """
85
- if self.filepath is None:
86
- raise ValueError("File path to the graph capturing sheet is not provided!")
87
- graph_capturing_sheet = read_graph_excel_file_to_table_by_name(self.filepath)
88
- if self.store_graph_capturing_sheet:
89
- self.sheet = graph_capturing_sheet
90
-
91
- print(self.namespace)
92
-
93
- return sheet2triples(graph_capturing_sheet, self.rules, self.separator, self.namespace, self.use_source_ids)
94
-
95
-
96
- def extract_graph_from_sheet(
97
- filepath: Path, transformation_rule: Rules, separator: str = ",", namespace: str | None = None
98
- ) -> list[Triple]:
99
- """Converts a graph capturing sheet to RDF triples that define data model instances
100
-
101
- Args:
102
- filepath : Path to the graph capturing sheet
103
- transformation_rule : Transformation rules which holds data model that is used to validate
104
- the graph capturing sheet and extract data model instances from it (i.e. RDF triples)
105
- separator : Multi value separator at cell level. Defaults to ",".
106
- namespace : Optional custom namespace to use for extracted triples that define data
107
- model instances. Defaults to None, meaning namespace of rules will be used.
108
-
109
- Returns:
110
- List of RDF triples, represented as tuples `(subject, predicate, object)`, that define data model instances
111
- """
112
-
113
- graph_capturing_sheet = read_graph_excel_file_to_table_by_name(filepath)
114
-
115
- return sheet2triples(graph_capturing_sheet, transformation_rule, separator, namespace)
116
-
117
-
118
- def sheet2triples(
119
- graph_capturing_sheet: dict[str, pd.DataFrame],
120
- rules: Rules,
121
- separator: str = ",",
122
- namespace: str | None = None,
123
- use_source_ids: bool = False,
124
- ) -> list[Triple]:
125
- """Converts a graph capturing sheet represented as dictionary of dataframes to rdf triples
126
-
127
- Args:
128
- graph_capturing_sheet : Graph capturing sheet provided as dictionary of dataframes
129
- transformation_rule : Transformation rules which holds data model that is used to validate
130
- the graph capturing sheet and extract data model instances from it (i.e. RDF triples)
131
- separator : Multi value separator at cell level. Defaults to ",".
132
- namespace : Optional custom namespace to use for extracted triples that define
133
- data model instances. Defaults to None, meaning namespace of rules will be used.
134
- use_source_ids : Whether to use source ids for properties and classes stored in Source column if they exist.
135
- Defaults to False, meaning that the source ids will be ignored.
136
-
137
- Returns:
138
- List of RDF triples, represented as tuples `(subject, predicate, object)`, that define data model instances
139
- """
140
-
141
- # Validation that everything is in order before proceeding
142
- validate_if_graph_capturing_sheet_empty(graph_capturing_sheet)
143
- validate_rules_graph_pair(graph_capturing_sheet, rules)
144
-
145
- # get class property pairs
146
- class_property_pairs = to_class_property_pairs(rules)
147
-
148
- # namespace selection
149
- if namespace is None and rules.metadata.namespace is not None:
150
- instance_namespace = rules.metadata.namespace
151
- elif namespace:
152
- instance_namespace = Namespace(namespace)
153
- else:
154
- raise NamespaceRequired("Extract instances from sheet")
155
-
156
- if rules.metadata.namespace is not None:
157
- model_namespace = Namespace(rules.metadata.namespace)
158
- else:
159
- raise NamespaceRequired("Extract instances from sheet")
160
-
161
- # Now create empty graph
162
- triples: list[Triple] = []
163
-
164
- # Add triples from the capturing sheet to the graph by iterating over the capturing sheet
165
- # iterate over sheets
166
- for sheet_name, df in graph_capturing_sheet.items():
167
- # iterate over sheet rows
168
-
169
- class_uri = (
170
- URIRef(str(rules.classes[sheet_name].source))
171
- if use_source_ids and rules.classes[sheet_name].source
172
- else model_namespace[sheet_name]
173
- )
174
-
175
- for _, row in df.iterrows():
176
- if row.identifier is None:
177
- msg = f"Missing identifier in sheet {sheet_name} at row {row.name}! Skipping..."
178
- logging.warning(msg)
179
- warnings.warn(msg, stacklevel=2)
180
- continue
181
-
182
- # iterate over sheet rows properties
183
- for property_name, value in row.to_dict().items():
184
- # Setting RDF type of the instance
185
- if property_name == "identifier":
186
- triples.append((instance_namespace[row.identifier], RDF.type, class_uri))
187
- continue
188
- elif not value:
189
- continue
190
-
191
- property_uri = (
192
- URIRef(str(class_property_pairs[sheet_name][property_name].source))
193
- if use_source_ids and class_property_pairs[sheet_name][property_name].source
194
- else model_namespace[property_name]
195
- )
196
-
197
- property_ = class_property_pairs[sheet_name][property_name]
198
-
199
- is_one_to_many = separator and (
200
- (property_.max_count and property_.max_count > 1) or not property_.max_count
201
- )
202
-
203
- values = value.split(separator) if is_one_to_many else [value]
204
-
205
- # Adding object properties
206
- if property_.property_type == "ObjectProperty":
207
- triples.extend(
208
- (
209
- instance_namespace[row.identifier],
210
- property_uri,
211
- instance_namespace[v.strip()],
212
- )
213
- for v in values
214
- )
215
- # Adding data properties
216
- elif property_.property_type == "DatatypeProperty":
217
- for v in values:
218
- try:
219
- literal_value = v.strip()
220
- except AttributeError:
221
- literal_value = v
222
-
223
- triples.append(
224
- (
225
- instance_namespace[row.identifier],
226
- property_uri,
227
- Literal(literal_value, datatype=XSD[property_.expected_value_type.suffix]),
228
- )
229
- )
230
-
231
- else:
232
- raise exceptions.UnsupportedPropertyType(property_.property_type)
233
- return triples
234
-
235
-
236
- def validate_if_graph_capturing_sheet_empty(graph_capturing_sheet: dict[str, pd.DataFrame]):
237
- """Validate if the graph capturing sheet is empty
238
-
239
- Args:
240
- graph_capturing_sheet : Graph capturing sheet
241
- """
242
- if all(df.empty for df in graph_capturing_sheet.values()):
243
- msg = "Graph capturing sheet is empty! Aborting!"
244
- logging.error(msg)
245
- raise ValueError(msg)
246
-
247
-
248
- def validate_rules_graph_pair(graph_capturing_sheet: dict[str, pd.DataFrame], transformation_rule: Rules):
249
- """Validate if the graph capturing sheet is based on the transformation rules
250
-
251
- Args:
252
- graph_capturing_sheet : Graph capturing sheet
253
- transformation_rule : Transformation rules
254
- """
255
- intersection = set(graph_capturing_sheet.keys()).intersection(set(get_defined_classes(transformation_rule)))
256
-
257
- if not intersection:
258
- msg = "Graph capturing sheet is not based on transformation rules! Aborting!"
259
- logging.error(msg)
260
- raise ValueError(msg)
261
-
262
- elif len(intersection) == len(graph_capturing_sheet.keys()):
263
- logging.info("All classes in the graph capturing sheet are defined in the transformation rules!")
264
-
265
- elif len(intersection) < len(graph_capturing_sheet.keys()):
266
- msg = "Graph capturing sheet contains classes that are not defined in the transformation rules! Proceeding..."
267
- logging.warning(msg)
268
- warnings.warn(msg, stacklevel=2)
269
-
270
- elif len(intersection) < len(get_defined_classes(transformation_rule)):
271
- msg = "Transformation rules contain classes that are not present in the graph capturing sheet! Proceeding..."
272
- logging.warning(msg)
273
- warnings.warn(msg, stacklevel=2)
274
-
275
-
276
- def read_graph_excel_file_to_table_by_name(filepath: Path) -> dict[str, pd.DataFrame]:
277
- workbook: Workbook = load_workbook(filepath)
278
-
279
- parsed_sheets = {
280
- sheetname: pd.read_excel(filepath, sheet_name=sheetname, header=0) for sheetname in workbook.sheetnames
281
- }
282
-
283
- for sheetname, df in parsed_sheets.items():
284
- if "identifier" in df.columns:
285
- parsed_sheets[sheetname] = df.drop(df[df.identifier == 0].index)
286
- parsed_sheets[sheetname] = df.replace({np.nan: None})
287
- else:
288
- logging.error(f"Sheet {sheetname} does not have an identifier column")
289
- raise ValueError(f"Sheet {sheetname} does not have an identifier column")
290
-
291
- return parsed_sheets
292
-
293
-
294
- def rules2graph_capturing_sheet(
295
- rules: Rules,
296
- file_path: Path,
297
- no_rows: int = 1000,
298
- auto_identifier_type: str = "index-based",
299
- add_drop_down_list: bool = True,
300
- ):
301
- """
302
- Converts a TransformationRules object to a graph capturing sheet
303
-
304
- Args:
305
- rules: The TransformationRules object to convert to the graph capturing sheet
306
- file_path: File path to save the sheet to
307
- no_rows: Number of rows for processing, by default 1000
308
- auto_identifier_type: Type of automatic identifier, by default "index" based, alternative is "uuid" based
309
- add_drop_down_list: Add drop down selection for columns that contain linking properties, by default True
310
-
311
- !!! note "no_rows parameter"
312
- no_rows should be set to the maximum expected number of instances of any of the classes.
313
- By default, it is set to 1000, increase it accordingly if you have more instances.
314
-
315
- """
316
-
317
- workbook = Workbook()
318
- # Remove default sheet named "Sheet"
319
- workbook.remove(workbook["Sheet"])
320
-
321
- for class_, properties in to_class_property_pairs(rules).items():
322
- workbook.create_sheet(title=class_)
323
-
324
- # Add header rows
325
- cast(Worksheet, workbook[class_]).append(["identifier", *list(properties.keys())])
326
-
327
- if auto_identifier_type and auto_identifier_type == "index-based": # default, easy to read
328
- logging.debug(f"Configuring index-based automatic identifiers for sheet {class_}")
329
- _add_index_identifiers(workbook, class_, no_rows)
330
- elif auto_identifier_type and auto_identifier_type == "uuid-based":
331
- logging.debug(f"Configuring UUID-based automatic identifiers for sheet {class_}")
332
- _add_uuid_identifiers(workbook, class_, no_rows)
333
- else:
334
- logging.debug(f"No automatic identifier set for sheet {class_}!")
335
-
336
- for i, property_ in enumerate(properties.values()):
337
- if property_.property_type == "ObjectProperty" and add_drop_down_list:
338
- _add_drop_down_list(
339
- workbook, class_, get_column_letter(i + 2), no_rows, property_.expected_value_type.suffix, "A"
340
- )
341
-
342
- _adjust_column_width(workbook)
343
- _set_header_style(workbook)
344
-
345
- logging.info(f"Graph capturing sheet generated and stored at {file_path}!")
346
- workbook.save(file_path)
347
- workbook.close()
348
-
349
-
350
- def _add_index_identifiers(workbook: Workbook, sheet: str, no_rows: int):
351
- """Adds index-based auto identifier to a sheet identifier column"""
352
- for i in range(no_rows):
353
- prefix = to_dms_name(sheet, "class", True)
354
- workbook[sheet][f"A{i+2}"] = f'=IF(ISBLANK(B{i+2}), "","{prefix}-{i+1}")' # type: ignore[index]
355
-
356
-
357
- def _add_uuid_identifiers(workbook: Workbook, sheet: str, no_rows: int):
358
- """Adds UUID-based auto identifier to a sheet identifier column"""
359
- for i in range(no_rows):
360
- prefix = to_dms_name(sheet, "class", True)
361
- workbook[sheet][f"A{i+2}"] = f'=IF(ISBLANK(B{i+2}), "","{prefix}-{uuid.uuid4()}")' # type: ignore[index]
362
-
363
-
364
- def _add_drop_down_list(workbook: Workbook, sheet: str, column: str, no_rows: int, value_sheet: str, value_column: str):
365
- """Adds a drop down list to a column"""
366
- drop_down_list = DataValidation(type="list", formula1=f"={value_sheet}!{value_column}$2:{value_column}${no_rows}")
367
-
368
- cast(Worksheet, workbook[sheet]).add_data_validation(drop_down_list)
369
-
370
- for i in range(no_rows):
371
- drop_down_list.add(workbook[sheet][f"{column}{i+2}"]) # type: ignore[index, misc]
372
-
373
-
374
- def _adjust_column_width(workbook: Workbook):
375
- """Adjusts the column width based on the content"""
376
- for sheet in workbook.sheetnames:
377
- for cell_tuple in cast(Worksheet, workbook[sheet]).columns:
378
- # Wrong type annotation in openpyxl
379
- cell = cast(Cell, cell_tuple[0]) # type: ignore[index]
380
- if cell.value:
381
- adjusted_width = (len(str(cell.value)) + 5) * 1.2
382
- cast(Worksheet, workbook[sheet]).column_dimensions[cell.column_letter].width = adjusted_width
383
-
384
-
385
- def _set_header_style(workbook: Workbook):
386
- """Sets the header style for all sheets in the workbook"""
387
- style = NamedStyle(name="header style")
388
- style.font = Font(bold=True, size=16)
389
- side = Side(style="thin", color="000000")
390
- style.border = Border(left=side, right=side, top=side, bottom=side)
391
- workbook.add_named_style(style)
392
-
393
- for sheet in workbook.sheetnames:
394
- for cell_tuple in cast(Worksheet, workbook[sheet]).columns:
395
- # Wrong type annotation in openpyxl
396
- cell = cast(Cell, cell_tuple[0]) # type: ignore[index]
397
- worksheet = cast(Worksheet, workbook[sheet])
398
- worksheet[f"{cell.column_letter}1"].style = style
399
- if f"{cell.column_letter}1" == "A1":
400
- worksheet[f"{cell.column_letter}1"].fill = PatternFill("solid", start_color="2FB5F2")
401
- else:
402
- worksheet[f"{cell.column_letter}1"].fill = PatternFill("solid", start_color="FFB202")
403
- worksheet[f"{cell.column_letter}1"].alignment = Alignment(horizontal="center", vertical="center")