ose-plugin-hierarchical-spreadsheets 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ose_plugin_hierarchical_spreadsheets/GenerateHierarchicalSpreadsheetReleaseStep.py +104 -69
- ose_plugin_hierarchical_spreadsheets/__init__.py +9 -3
- {ose_plugin_hierarchical_spreadsheets-0.2.5.dist-info → ose_plugin_hierarchical_spreadsheets-0.3.1.dist-info}/METADATA +4 -2
- ose_plugin_hierarchical_spreadsheets-0.3.1.dist-info/RECORD +7 -0
- {ose_plugin_hierarchical_spreadsheets-0.2.5.dist-info → ose_plugin_hierarchical_spreadsheets-0.3.1.dist-info}/WHEEL +1 -1
- ose_plugin_hierarchical_spreadsheets-0.2.5.dist-info/RECORD +0 -7
- {ose_plugin_hierarchical_spreadsheets-0.2.5.dist-info → ose_plugin_hierarchical_spreadsheets-0.3.1.dist-info}/entry_points.txt +0 -0
- {ose_plugin_hierarchical_spreadsheets-0.2.5.dist-info → ose_plugin_hierarchical_spreadsheets-0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -2,6 +2,8 @@ import dataclasses
|
|
|
2
2
|
import re
|
|
3
3
|
from typing import List, Optional, Dict, Callable, Any, Tuple
|
|
4
4
|
|
|
5
|
+
import networkx as nx
|
|
6
|
+
|
|
5
7
|
import openpyxl
|
|
6
8
|
import pyhornedowl
|
|
7
9
|
from flask_github import GitHub
|
|
@@ -12,8 +14,9 @@ from ose.release.ReleaseStep import ReleaseStep
|
|
|
12
14
|
from ose.model.ReleaseScript import ReleaseScript, ReleaseScriptFile
|
|
13
15
|
from ose.model.Result import Result
|
|
14
16
|
from ose.services.ConfigurationService import ConfigurationService
|
|
15
|
-
|
|
16
|
-
from ose.
|
|
17
|
+
|
|
18
|
+
from ose.model.ExcelOntology import ExcelOntology
|
|
19
|
+
from ose.model.TermIdentifier import TermIdentifier
|
|
17
20
|
|
|
18
21
|
|
|
19
22
|
@dataclasses.dataclass
|
|
@@ -43,7 +46,10 @@ class Node:
|
|
|
43
46
|
|
|
44
47
|
def form_tree(edges: List[Tuple[Tuple[str, str | None, str | None], Optional[str]]]) -> List[Node]:
|
|
45
48
|
all_nodes = set(n for n, _ in edges)
|
|
46
|
-
item_to_node = dict(
|
|
49
|
+
item_to_node = dict(
|
|
50
|
+
(c, Node(item=c, label=lbl if lbl is not None else c, definition=d or "<no definition>"))
|
|
51
|
+
for (c, lbl, d) in all_nodes
|
|
52
|
+
)
|
|
47
53
|
|
|
48
54
|
for (child, _, _), parent in edges:
|
|
49
55
|
if parent is None:
|
|
@@ -62,9 +68,17 @@ def form_tree(edges: List[Tuple[Tuple[str, str | None, str | None], Optional[str
|
|
|
62
68
|
|
|
63
69
|
|
|
64
70
|
class GenerateHierarchicalSpreadsheetReleaseStep(ReleaseStep):
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
71
|
+
def __init__(
|
|
72
|
+
self,
|
|
73
|
+
db: SQLAlchemy,
|
|
74
|
+
gh: GitHub,
|
|
75
|
+
release_script: ReleaseScript,
|
|
76
|
+
release_id: int,
|
|
77
|
+
tmp: str,
|
|
78
|
+
config: ConfigurationService,
|
|
79
|
+
*,
|
|
80
|
+
included_files: Dict[str, str],
|
|
81
|
+
) -> None:
|
|
68
82
|
super().__init__(db, gh, release_script, release_id, tmp, config)
|
|
69
83
|
|
|
70
84
|
self._included_files = included_files
|
|
@@ -78,29 +92,42 @@ class GenerateHierarchicalSpreadsheetReleaseStep(ReleaseStep):
|
|
|
78
92
|
for file in files:
|
|
79
93
|
self._next_item(item=file.target.file, message="Generating hierarchical spreadsheet for")
|
|
80
94
|
|
|
81
|
-
|
|
95
|
+
G, ontology = self.build_hierarchy(file)
|
|
82
96
|
|
|
83
97
|
wb = openpyxl.Workbook()
|
|
84
98
|
assert wb.active is not None
|
|
85
99
|
sheet = wb.active
|
|
86
100
|
|
|
87
|
-
height =
|
|
88
|
-
|
|
101
|
+
height = nx.dag_longest_path_length(G) + 1
|
|
102
|
+
# height = max(h.height() for h in hierarchies)
|
|
103
|
+
annotations = list(
|
|
104
|
+
{
|
|
105
|
+
k
|
|
106
|
+
for n, d in G.nodes(data=True)
|
|
107
|
+
for k in d.keys() if k not in {"label", "definition"}
|
|
108
|
+
}
|
|
109
|
+
)
|
|
110
|
+
# annotations = list({k for h in hierarchies for k in h.annotations.keys()})
|
|
89
111
|
|
|
90
112
|
sheet.append(["ID", "Label"] + [""] * (height - 1) + ["Definition"] + annotations)
|
|
91
113
|
|
|
92
|
-
def write_line(n:
|
|
93
|
-
sheet.append(
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
for
|
|
103
|
-
|
|
114
|
+
def write_line(n: str, d: dict, depth: int) -> None:
|
|
115
|
+
sheet.append(
|
|
116
|
+
[ontology.get_id_for_iri(n)]
|
|
117
|
+
+ [""] * depth
|
|
118
|
+
+ [d.get("label", n)]
|
|
119
|
+
+ [""] * (height - depth - 1)
|
|
120
|
+
+ [d.get("definition", "")]
|
|
121
|
+
+ [d.get(a, None) for a in annotations]
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
roots = [(n, d) for n, d in G.nodes(data=True) if G.in_degree(n) == 0]
|
|
125
|
+
for root, root_data in roots:
|
|
126
|
+
write_line(root, root_data, 0)
|
|
127
|
+
successors = nx.dfs_preorder_nodes(G, root)
|
|
128
|
+
for c in successors:
|
|
129
|
+
d = G.nodes[c]
|
|
130
|
+
write_line(c, d, nx.shortest_path_length(G, root, c))
|
|
104
131
|
|
|
105
132
|
[path, name] = file.target.file.rsplit("/", 1)
|
|
106
133
|
sub_name = name.rsplit(".", 1)[0]
|
|
@@ -111,33 +138,40 @@ class GenerateHierarchicalSpreadsheetReleaseStep(ReleaseStep):
|
|
|
111
138
|
wb.save(self._local_name(file_name))
|
|
112
139
|
|
|
113
140
|
self._store_artifact(self._local_name(file_name), f"{path}/{file_name}")
|
|
114
|
-
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
result.value = None
|
|
115
144
|
result.warnings = []
|
|
116
145
|
self._set_release_result(result)
|
|
117
146
|
return result.ok()
|
|
147
|
+
|
|
148
|
+
|
|
118
149
|
|
|
119
150
|
@classmethod
|
|
120
151
|
def name(cls) -> str:
|
|
121
152
|
return "HIERARCHICAL_SPREADSHEETS"
|
|
122
153
|
|
|
123
|
-
def build_hierarchy(self, file: ReleaseScriptFile) -> Tuple[
|
|
124
|
-
# Excel files to extract annotations
|
|
125
|
-
excel_files: List[str]
|
|
154
|
+
def build_hierarchy(self, file: ReleaseScriptFile) -> Tuple[nx.DiGraph, pyhornedowl.PyIndexedOntology]:
|
|
126
155
|
release_file: str
|
|
127
156
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
157
|
+
release_file = next(
|
|
158
|
+
(a.local_path for a in self._artifacts() if a.target_path == file.target.file and a.kind == "final"),
|
|
159
|
+
self._local_name(file.target.file),
|
|
160
|
+
)
|
|
132
161
|
|
|
133
162
|
ontology = pyhornedowl.open_ontology_from_file(release_file)
|
|
134
163
|
|
|
135
164
|
for p, d in self._repo_config.prefixes.items():
|
|
136
165
|
ontology.prefix_mapping.add_prefix(p, d)
|
|
137
166
|
|
|
138
|
-
classes = [
|
|
139
|
-
|
|
140
|
-
|
|
167
|
+
classes = [
|
|
168
|
+
(
|
|
169
|
+
c,
|
|
170
|
+
ontology.get_annotation(c, "http://www.w3.org/2000/01/rdf-schema#label"),
|
|
171
|
+
ontology.get_annotation(c, "http://purl.obolibrary.org/obo/IAO_0000115"),
|
|
172
|
+
)
|
|
173
|
+
for c in ontology.get_classes()
|
|
174
|
+
]
|
|
141
175
|
child_parent: List[Tuple[Tuple[str, Optional[str], Optional[str]], Optional[str]]] = []
|
|
142
176
|
for c in classes:
|
|
143
177
|
for p in ontology.get_superclasses(c[0]):
|
|
@@ -145,41 +179,42 @@ class GenerateHierarchicalSpreadsheetReleaseStep(ReleaseStep):
|
|
|
145
179
|
else:
|
|
146
180
|
child_parent.append((c, None))
|
|
147
181
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
182
|
+
excel_ontology = ExcelOntology(file.target.iri)
|
|
183
|
+
for s in file.sources:
|
|
184
|
+
if s.type == "classes":
|
|
185
|
+
excel_ontology.add_terms_from_excel(s.file, self._local_name(s.file)).ok_or_raise()
|
|
186
|
+
elif s.type == "relations":
|
|
187
|
+
excel_ontology.add_relations_from_excel(s.file, self._local_name(s.file)).ok_or_raise()
|
|
188
|
+
|
|
189
|
+
def node_annotations(iri: str) -> dict[str, str | None]:
|
|
190
|
+
id = ontology.get_id_for_iri(iri)
|
|
191
|
+
if id is None:
|
|
192
|
+
return dict()
|
|
193
|
+
|
|
194
|
+
term = excel_ontology._raw_term_by_id(id)
|
|
195
|
+
if term is None or isinstance(term, TermIdentifier):
|
|
196
|
+
return dict()
|
|
197
|
+
|
|
198
|
+
return {
|
|
199
|
+
"comment": term.get_relation_value(TermIdentifier(id="rdfs:comment")),
|
|
200
|
+
"subontology": term.get_relation_value(TermIdentifier(label="lowerLevelOntology")),
|
|
201
|
+
"examples": "; ".join(term.get_relation_values(TermIdentifier(id="IAO:0000112"))),
|
|
202
|
+
"synonyms": "; ".join(term.get_relation_values(TermIdentifier(id="IAO:0000118"))),
|
|
203
|
+
"crossreference": "; ".join(term.get_relation_values(TermIdentifier(label="crossReference"))),
|
|
204
|
+
"informaldefinition": term.get_relation_value(TermIdentifier(label="informalDefinition")),
|
|
205
|
+
}
|
|
167
206
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
for h in hierarchies:
|
|
183
|
-
h.recurse(annotate)
|
|
184
|
-
|
|
185
|
-
return hierarchies, ontology
|
|
207
|
+
G = nx.DiGraph()
|
|
208
|
+
G.add_nodes_from([(iri, {**node_annotations(iri), "label": label, "definition": definition}) for iri, label, definition in classes])
|
|
209
|
+
G.add_edges_from([(p, c) for (c, _, _), p in child_parent if p is not None])
|
|
210
|
+
|
|
211
|
+
# The only terms that should be included. All other may only be included if they are on a path from root to one of these terms.
|
|
212
|
+
required_leaf_terms = {ontology.get_iri_for_id(t.id) for t in excel_ontology._terms if t.id is not None}
|
|
213
|
+
|
|
214
|
+
# Remove all other nodes (e.g. external or from a higher level ontology)
|
|
215
|
+
for root in [n for n in G if G.in_degree(n) == 0]:
|
|
216
|
+
for n in list(nx.dfs_postorder_nodes(G, root)):
|
|
217
|
+
if G.out_degree(n) == 0 and n not in required_leaf_terms:
|
|
218
|
+
G.remove_node(n)
|
|
219
|
+
|
|
220
|
+
return G, ontology
|
|
@@ -1,14 +1,20 @@
|
|
|
1
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
__version__ = version("ose-plugin-hierarchical-spreadsheets")
|
|
5
|
+
except PackageNotFoundError:
|
|
6
|
+
__version__ = "unknown"
|
|
7
|
+
|
|
1
8
|
from ose.model.Plugin import Plugin
|
|
2
9
|
from .GenerateHierarchicalSpreadsheetReleaseStep import GenerateHierarchicalSpreadsheetReleaseStep
|
|
3
10
|
|
|
4
11
|
|
|
5
|
-
|
|
6
12
|
HierarchicalSpreadsheetsPlugin = Plugin(
|
|
7
13
|
id="org.bssofoundry.hierarchicalspreadsheets",
|
|
8
14
|
name="HierarchicalSpreadsheet Plugin",
|
|
9
15
|
version="0.1.0",
|
|
10
16
|
description="Plugin to generate hierarchical spreadsheets during release.",
|
|
11
17
|
contents=[
|
|
12
|
-
|
|
18
|
+
GenerateHierarchicalSpreadsheetReleaseStep,
|
|
13
19
|
],
|
|
14
|
-
)
|
|
20
|
+
)
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ose-plugin-hierarchical-spreadsheets
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: OntoSpreadEd plugin for hierarchical spreadsheet generation
|
|
5
5
|
Requires-Python: >=3.12
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
7
|
-
Requires-Dist:
|
|
7
|
+
Requires-Dist: networkx>=3.6.1
|
|
8
|
+
Requires-Dist: ose-core==0.3.1
|
|
9
|
+
Requires-Dist: scipy>=1.17.0
|
|
8
10
|
|
|
9
11
|
# OSE Plugin: Hierarchical Spreadsheets
|
|
10
12
|
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
ose_plugin_hierarchical_spreadsheets/GenerateHierarchicalSpreadsheetReleaseStep.py,sha256=yEjYsJD0xNnBK03la0NAwuUUn8GhFdflzmPCxUW9-WI,8318
|
|
2
|
+
ose_plugin_hierarchical_spreadsheets/__init__.py,sha256=SJfQCWp7Jzg6N3-VZ5JDCUOsjKEZ5yPabevF2S1Me2Q,640
|
|
3
|
+
ose_plugin_hierarchical_spreadsheets-0.3.1.dist-info/METADATA,sha256=oGNDPkARV5e4ZraiB15ET4CnzKpyJFLMWnSQ2xGiNEs,1008
|
|
4
|
+
ose_plugin_hierarchical_spreadsheets-0.3.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
5
|
+
ose_plugin_hierarchical_spreadsheets-0.3.1.dist-info/entry_points.txt,sha256=a6pod_osiwl4-QPdRnl2wk4HQpNGYS_7jQ6_C6aJgrc,110
|
|
6
|
+
ose_plugin_hierarchical_spreadsheets-0.3.1.dist-info/top_level.txt,sha256=aYtBQC3URKw1ZKCmEvBWYGsITjMnjmbKIrkevgzQcI4,37
|
|
7
|
+
ose_plugin_hierarchical_spreadsheets-0.3.1.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
ose_plugin_hierarchical_spreadsheets/GenerateHierarchicalSpreadsheetReleaseStep.py,sha256=5HFAlpDKykhtH6WeAr9asasqHfjwtVfxS_3QrkeDWrA,7158
|
|
2
|
-
ose_plugin_hierarchical_spreadsheets/__init__.py,sha256=4X14epIVlHO_3MY9uA1qK5LWXLqDaMqX2lEp3dywUp4,447
|
|
3
|
-
ose_plugin_hierarchical_spreadsheets-0.2.5.dist-info/METADATA,sha256=MI2CTDzDdW9tpQSH1xQ0Kcbr5GJ7hOs0KeF_D4sRIFE,948
|
|
4
|
-
ose_plugin_hierarchical_spreadsheets-0.2.5.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
5
|
-
ose_plugin_hierarchical_spreadsheets-0.2.5.dist-info/entry_points.txt,sha256=a6pod_osiwl4-QPdRnl2wk4HQpNGYS_7jQ6_C6aJgrc,110
|
|
6
|
-
ose_plugin_hierarchical_spreadsheets-0.2.5.dist-info/top_level.txt,sha256=aYtBQC3URKw1ZKCmEvBWYGsITjMnjmbKIrkevgzQcI4,37
|
|
7
|
-
ose_plugin_hierarchical_spreadsheets-0.2.5.dist-info/RECORD,,
|
|
File without changes
|