ose-plugin-hierarchical-spreadsheets 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,8 @@ import dataclasses
2
2
  import re
3
3
  from typing import List, Optional, Dict, Callable, Any, Tuple
4
4
 
5
+ import networkx as nx
6
+
5
7
  import openpyxl
6
8
  import pyhornedowl
7
9
  from flask_github import GitHub
@@ -12,8 +14,9 @@ from ose.release.ReleaseStep import ReleaseStep
12
14
  from ose.model.ReleaseScript import ReleaseScript, ReleaseScriptFile
13
15
  from ose.model.Result import Result
14
16
  from ose.services.ConfigurationService import ConfigurationService
15
- from ose.utils import letters
16
- from ose.utils.github import parse_spreadsheet
17
+
18
+ from ose.model.ExcelOntology import ExcelOntology
19
+ from ose.model.TermIdentifier import TermIdentifier
17
20
 
18
21
 
19
22
  @dataclasses.dataclass
@@ -43,7 +46,10 @@ class Node:
43
46
 
44
47
  def form_tree(edges: List[Tuple[Tuple[str, str | None, str | None], Optional[str]]]) -> List[Node]:
45
48
  all_nodes = set(n for n, _ in edges)
46
- item_to_node = dict((c, Node(item=c, label=lbl if lbl is not None else c, definition=d or "<no definition>")) for (c, lbl, d) in all_nodes)
49
+ item_to_node = dict(
50
+ (c, Node(item=c, label=lbl if lbl is not None else c, definition=d or "<no definition>"))
51
+ for (c, lbl, d) in all_nodes
52
+ )
47
53
 
48
54
  for (child, _, _), parent in edges:
49
55
  if parent is None:
@@ -62,9 +68,17 @@ def form_tree(edges: List[Tuple[Tuple[str, str | None, str | None], Optional[str
62
68
 
63
69
 
64
70
  class GenerateHierarchicalSpreadsheetReleaseStep(ReleaseStep):
65
-
66
- def __init__(self, db: SQLAlchemy, gh: GitHub, release_script: ReleaseScript, release_id: int, tmp: str,
67
- config: ConfigurationService, *, included_files: Dict[str, str]) -> None:
71
+ def __init__(
72
+ self,
73
+ db: SQLAlchemy,
74
+ gh: GitHub,
75
+ release_script: ReleaseScript,
76
+ release_id: int,
77
+ tmp: str,
78
+ config: ConfigurationService,
79
+ *,
80
+ included_files: Dict[str, str],
81
+ ) -> None:
68
82
  super().__init__(db, gh, release_script, release_id, tmp, config)
69
83
 
70
84
  self._included_files = included_files
@@ -78,29 +92,42 @@ class GenerateHierarchicalSpreadsheetReleaseStep(ReleaseStep):
78
92
  for file in files:
79
93
  self._next_item(item=file.target.file, message="Generating hierarchical spreadsheet for")
80
94
 
81
- hierarchies, ontology = self.build_hierarchy(file)
95
+ G, ontology = self.build_hierarchy(file)
82
96
 
83
97
  wb = openpyxl.Workbook()
84
98
  assert wb.active is not None
85
99
  sheet = wb.active
86
100
 
87
- height = max(h.height() for h in hierarchies)
88
- annotations = list({k for h in hierarchies for k in h.annotations.keys()})
101
+ height = nx.dag_longest_path_length(G) + 1
102
+ # height = max(h.height() for h in hierarchies)
103
+ annotations = list(
104
+ {
105
+ k
106
+ for n, d in G.nodes(data=True)
107
+ for k in d.keys() if k not in {"label", "definition"}
108
+ }
109
+ )
110
+ # annotations = list({k for h in hierarchies for k in h.annotations.keys()})
89
111
 
90
112
  sheet.append(["ID", "Label"] + [""] * (height - 1) + ["Definition"] + annotations)
91
113
 
92
- def write_line(n: Node, depth: int) -> None:
93
- sheet.append([ontology.get_id_for_iri(n.item)] +
94
- [""] * depth +
95
- [n.label] + [""] * (height - depth - 1) +
96
- [n.definition] +
97
- [n.annotations.get(a, None) for a in annotations])
98
-
99
- for child in n.children:
100
- write_line(child, depth + 1)
101
-
102
- for hierarchy in hierarchies:
103
- write_line(hierarchy, 0)
114
+ def write_line(n: str, d: dict, depth: int) -> None:
115
+ sheet.append(
116
+ [ontology.get_id_for_iri(n)]
117
+ + [""] * depth
118
+ + [d.get("label", n)]
119
+ + [""] * (height - depth - 1)
120
+ + [d.get("definition", "")]
121
+ + [d.get(a, None) for a in annotations]
122
+ )
123
+
124
+ roots = [(n, d) for n, d in G.nodes(data=True) if G.in_degree(n) == 0]
125
+ for root, root_data in roots:
126
+ write_line(root, root_data, 0)
127
+ successors = nx.dfs_preorder_nodes(G, root)
128
+ for c in successors:
129
+ d = G.nodes[c]
130
+ write_line(c, d, nx.shortest_path_length(G, root, c))
104
131
 
105
132
  [path, name] = file.target.file.rsplit("/", 1)
106
133
  sub_name = name.rsplit(".", 1)[0]
@@ -111,33 +138,40 @@ class GenerateHierarchicalSpreadsheetReleaseStep(ReleaseStep):
111
138
  wb.save(self._local_name(file_name))
112
139
 
113
140
  self._store_artifact(self._local_name(file_name), f"{path}/{file_name}")
114
-
141
+
142
+
143
+ result.value = None
115
144
  result.warnings = []
116
145
  self._set_release_result(result)
117
146
  return result.ok()
147
+
148
+
118
149
 
119
150
  @classmethod
120
151
  def name(cls) -> str:
121
152
  return "HIERARCHICAL_SPREADSHEETS"
122
153
 
123
- def build_hierarchy(self, file: ReleaseScriptFile) -> Tuple[List[Node], pyhornedowl.PyIndexedOntology]:
124
- # Excel files to extract annotations
125
- excel_files: List[str]
154
+ def build_hierarchy(self, file: ReleaseScriptFile) -> Tuple[nx.DiGraph, pyhornedowl.PyIndexedOntology]:
126
155
  release_file: str
127
156
 
128
- excel_files = [self._local_name(s.file) for s in file.sources]
129
- release_file = next((a.local_path for a in self._artifacts()
130
- if a.target_path == file.target.file and a.kind == 'final'),
131
- self._local_name(file.target.file))
157
+ release_file = next(
158
+ (a.local_path for a in self._artifacts() if a.target_path == file.target.file and a.kind == "final"),
159
+ self._local_name(file.target.file),
160
+ )
132
161
 
133
162
  ontology = pyhornedowl.open_ontology_from_file(release_file)
134
163
 
135
164
  for p, d in self._repo_config.prefixes.items():
136
165
  ontology.prefix_mapping.add_prefix(p, d)
137
166
 
138
- classes = [(c, ontology.get_annotation(c, "http://www.w3.org/2000/01/rdf-schema#label"),
139
- ontology.get_annotation(c, "http://purl.obolibrary.org/obo/IAO_0000115")) for c in
140
- ontology.get_classes()]
167
+ classes = [
168
+ (
169
+ c,
170
+ ontology.get_annotation(c, "http://www.w3.org/2000/01/rdf-schema#label"),
171
+ ontology.get_annotation(c, "http://purl.obolibrary.org/obo/IAO_0000115"),
172
+ )
173
+ for c in ontology.get_classes()
174
+ ]
141
175
  child_parent: List[Tuple[Tuple[str, Optional[str], Optional[str]], Optional[str]]] = []
142
176
  for c in classes:
143
177
  for p in ontology.get_superclasses(c[0]):
@@ -145,41 +179,42 @@ class GenerateHierarchicalSpreadsheetReleaseStep(ReleaseStep):
145
179
  else:
146
180
  child_parent.append((c, None))
147
181
 
148
- # child_parent = [(c, p) for c in classes for p in ontology.get_superclasses(c[0])]
149
- hierarchies = form_tree(child_parent)
150
-
151
- # If we do not collapse (or do not import) imported ontologies a root will always contain no label
152
- # We remove these roots as they only indicate where the subontology should be mounted in the overall ontology
153
- hierarchies = [c for h in hierarchies for c in h.children]
154
-
155
- for excel_file in excel_files:
156
- with open(excel_file, "rb") as f:
157
- file_data = f.read()
158
-
159
- rows, header = parse_spreadsheet(file_data)
160
-
161
- data = dict((r["ID"], r) for r in rows if "ID" in r)
162
-
163
- def annotate(n: Node):
164
- id = ontology.get_id_for_iri(n.item)
165
- if id is None:
166
- return
182
+ excel_ontology = ExcelOntology(file.target.iri)
183
+ for s in file.sources:
184
+ if s.type == "classes":
185
+ excel_ontology.add_terms_from_excel(s.file, self._local_name(s.file)).ok_or_raise()
186
+ elif s.type == "relations":
187
+ excel_ontology.add_relations_from_excel(s.file, self._local_name(s.file)).ok_or_raise()
188
+
189
+ def node_annotations(iri: str) -> dict[str, str | None]:
190
+ id = ontology.get_id_for_iri(iri)
191
+ if id is None:
192
+ return dict()
193
+
194
+ term = excel_ontology._raw_term_by_id(id)
195
+ if term is None or isinstance(term, TermIdentifier):
196
+ return dict()
197
+
198
+ return {
199
+ "comment": term.get_relation_value(TermIdentifier(id="rdfs:comment")),
200
+ "subontology": term.get_relation_value(TermIdentifier(label="lowerLevelOntology")),
201
+ "examples": "; ".join(term.get_relation_values(TermIdentifier(id="IAO:0000112"))),
202
+ "synonyms": "; ".join(term.get_relation_values(TermIdentifier(id="IAO:0000118"))),
203
+ "crossreference": "; ".join(term.get_relation_values(TermIdentifier(label="crossReference"))),
204
+ "informaldefinition": term.get_relation_value(TermIdentifier(label="informalDefinition")),
205
+ }
167
206
 
168
- fields = {
169
- "comment": "Comment",
170
- "subontology": "Sub-ontology",
171
- "examples": "Examples",
172
- "synonyms": "Synonyms",
173
- "crossreference": "Cross reference",
174
- "informaldefinition": "Informal definition",
175
- }
176
- for field_key, field in fields.items():
177
- node_data = data.get(id, dict())
178
- key = next((k for k in node_data.keys() if letters(k) == field_key), None)
179
- if key is not None and n.annotations.get(field, None) is None:
180
- n.annotations[field] = node_data[key]
181
-
182
- for h in hierarchies:
183
- h.recurse(annotate)
184
-
185
- return hierarchies, ontology
207
+ G = nx.DiGraph()
208
+ G.add_nodes_from([(iri, {**node_annotations(iri), "label": label, "definition": definition}) for iri, label, definition in classes])
209
+ G.add_edges_from([(p, c) for (c, _, _), p in child_parent if p is not None])
210
+
211
+ # The only terms that should be included. All other may only be included if they are on a path from root to one of these terms.
212
+ required_leaf_terms = {ontology.get_iri_for_id(t.id) for t in excel_ontology._terms if t.id is not None}
213
+
214
+ # Remove all other nodes (e.g. external or from a higher level ontology)
215
+ for root in [n for n in G if G.in_degree(n) == 0]:
216
+ for n in list(nx.dfs_postorder_nodes(G, root)):
217
+ if G.out_degree(n) == 0 and n not in required_leaf_terms:
218
+ G.remove_node(n)
219
+
220
+ return G, ontology
@@ -1,14 +1,20 @@
1
+ from importlib.metadata import version, PackageNotFoundError
2
+
3
+ try:
4
+ __version__ = version("ose-plugin-hierarchical-spreadsheets")
5
+ except PackageNotFoundError:
6
+ __version__ = "unknown"
7
+
1
8
  from ose.model.Plugin import Plugin
2
9
  from .GenerateHierarchicalSpreadsheetReleaseStep import GenerateHierarchicalSpreadsheetReleaseStep
3
10
 
4
11
 
5
-
6
12
  HierarchicalSpreadsheetsPlugin = Plugin(
7
13
  id="org.bssofoundry.hierarchicalspreadsheets",
8
14
  name="HierarchicalSpreadsheet Plugin",
9
15
  version="0.1.0",
10
16
  description="Plugin to generate hierarchical spreadsheets during release.",
11
17
  contents=[
12
- GenerateHierarchicalSpreadsheetReleaseStep,
18
+ GenerateHierarchicalSpreadsheetReleaseStep,
13
19
  ],
14
- )
20
+ )
@@ -1,10 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ose-plugin-hierarchical-spreadsheets
3
- Version: 0.2.5
3
+ Version: 0.3.1
4
4
  Summary: OntoSpreadEd plugin for hierarchical spreadsheet generation
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
7
- Requires-Dist: ose-core==0.2.5
7
+ Requires-Dist: networkx>=3.6.1
8
+ Requires-Dist: ose-core==0.3.1
9
+ Requires-Dist: scipy>=1.17.0
8
10
 
9
11
  # OSE Plugin: Hierarchical Spreadsheets
10
12
 
@@ -0,0 +1,7 @@
1
+ ose_plugin_hierarchical_spreadsheets/GenerateHierarchicalSpreadsheetReleaseStep.py,sha256=yEjYsJD0xNnBK03la0NAwuUUn8GhFdflzmPCxUW9-WI,8318
2
+ ose_plugin_hierarchical_spreadsheets/__init__.py,sha256=SJfQCWp7Jzg6N3-VZ5JDCUOsjKEZ5yPabevF2S1Me2Q,640
3
+ ose_plugin_hierarchical_spreadsheets-0.3.1.dist-info/METADATA,sha256=oGNDPkARV5e4ZraiB15ET4CnzKpyJFLMWnSQ2xGiNEs,1008
4
+ ose_plugin_hierarchical_spreadsheets-0.3.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
5
+ ose_plugin_hierarchical_spreadsheets-0.3.1.dist-info/entry_points.txt,sha256=a6pod_osiwl4-QPdRnl2wk4HQpNGYS_7jQ6_C6aJgrc,110
6
+ ose_plugin_hierarchical_spreadsheets-0.3.1.dist-info/top_level.txt,sha256=aYtBQC3URKw1ZKCmEvBWYGsITjMnjmbKIrkevgzQcI4,37
7
+ ose_plugin_hierarchical_spreadsheets-0.3.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,7 +0,0 @@
1
- ose_plugin_hierarchical_spreadsheets/GenerateHierarchicalSpreadsheetReleaseStep.py,sha256=5HFAlpDKykhtH6WeAr9asasqHfjwtVfxS_3QrkeDWrA,7158
2
- ose_plugin_hierarchical_spreadsheets/__init__.py,sha256=4X14epIVlHO_3MY9uA1qK5LWXLqDaMqX2lEp3dywUp4,447
3
- ose_plugin_hierarchical_spreadsheets-0.2.5.dist-info/METADATA,sha256=MI2CTDzDdW9tpQSH1xQ0Kcbr5GJ7hOs0KeF_D4sRIFE,948
4
- ose_plugin_hierarchical_spreadsheets-0.2.5.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
5
- ose_plugin_hierarchical_spreadsheets-0.2.5.dist-info/entry_points.txt,sha256=a6pod_osiwl4-QPdRnl2wk4HQpNGYS_7jQ6_C6aJgrc,110
6
- ose_plugin_hierarchical_spreadsheets-0.2.5.dist-info/top_level.txt,sha256=aYtBQC3URKw1ZKCmEvBWYGsITjMnjmbKIrkevgzQcI4,37
7
- ose_plugin_hierarchical_spreadsheets-0.2.5.dist-info/RECORD,,