glam4cm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. glam4cm/__init__.py +9 -0
  2. glam4cm/data_loading/__init__.py +0 -0
  3. glam4cm/data_loading/data.py +631 -0
  4. glam4cm/data_loading/encoding.py +76 -0
  5. glam4cm/data_loading/graph_dataset.py +940 -0
  6. glam4cm/data_loading/metadata.py +84 -0
  7. glam4cm/data_loading/models_dataset.py +361 -0
  8. glam4cm/data_loading/utils.py +20 -0
  9. glam4cm/downstream_tasks/__init__.py +0 -0
  10. glam4cm/downstream_tasks/bert_edge_classification.py +144 -0
  11. glam4cm/downstream_tasks/bert_graph_classification.py +137 -0
  12. glam4cm/downstream_tasks/bert_graph_classification_comp.py +156 -0
  13. glam4cm/downstream_tasks/bert_link_prediction.py +145 -0
  14. glam4cm/downstream_tasks/bert_node_classification.py +164 -0
  15. glam4cm/downstream_tasks/cm_gpt_edge_classification.py +73 -0
  16. glam4cm/downstream_tasks/cm_gpt_node_classification.py +76 -0
  17. glam4cm/downstream_tasks/cm_gpt_pretraining.py +64 -0
  18. glam4cm/downstream_tasks/common_args.py +160 -0
  19. glam4cm/downstream_tasks/create_dataset.py +51 -0
  20. glam4cm/downstream_tasks/gnn_edge_classification.py +106 -0
  21. glam4cm/downstream_tasks/gnn_graph_cls.py +101 -0
  22. glam4cm/downstream_tasks/gnn_link_prediction.py +109 -0
  23. glam4cm/downstream_tasks/gnn_node_classification.py +103 -0
  24. glam4cm/downstream_tasks/tf_idf_text_classification.py +22 -0
  25. glam4cm/downstream_tasks/utils.py +35 -0
  26. glam4cm/downstream_tasks/word2vec_text_classification.py +108 -0
  27. glam4cm/embeddings/__init__.py +0 -0
  28. glam4cm/embeddings/bert.py +72 -0
  29. glam4cm/embeddings/common.py +43 -0
  30. glam4cm/embeddings/fasttext.py +0 -0
  31. glam4cm/embeddings/tfidf.py +25 -0
  32. glam4cm/embeddings/w2v.py +41 -0
  33. glam4cm/encoding/__init__.py +0 -0
  34. glam4cm/encoding/common.py +0 -0
  35. glam4cm/encoding/encoders.py +100 -0
  36. glam4cm/graph2str/__init__.py +0 -0
  37. glam4cm/graph2str/common.py +34 -0
  38. glam4cm/graph2str/constants.py +15 -0
  39. glam4cm/graph2str/ontouml.py +141 -0
  40. glam4cm/graph2str/uml.py +0 -0
  41. glam4cm/lang2graph/__init__.py +0 -0
  42. glam4cm/lang2graph/archimate.py +31 -0
  43. glam4cm/lang2graph/bpmn.py +0 -0
  44. glam4cm/lang2graph/common.py +416 -0
  45. glam4cm/lang2graph/ecore.py +221 -0
  46. glam4cm/lang2graph/ontouml.py +169 -0
  47. glam4cm/lang2graph/utils.py +80 -0
  48. glam4cm/models/cmgpt.py +352 -0
  49. glam4cm/models/gnn_layers.py +273 -0
  50. glam4cm/models/hf.py +10 -0
  51. glam4cm/run.py +99 -0
  52. glam4cm/run_configs.py +126 -0
  53. glam4cm/settings.py +54 -0
  54. glam4cm/tokenization/__init__.py +0 -0
  55. glam4cm/tokenization/special_tokens.py +4 -0
  56. glam4cm/tokenization/utils.py +37 -0
  57. glam4cm/trainers/__init__.py +0 -0
  58. glam4cm/trainers/bert_classifier.py +105 -0
  59. glam4cm/trainers/cm_gpt_trainer.py +153 -0
  60. glam4cm/trainers/gnn_edge_classifier.py +126 -0
  61. glam4cm/trainers/gnn_graph_classifier.py +123 -0
  62. glam4cm/trainers/gnn_link_predictor.py +144 -0
  63. glam4cm/trainers/gnn_node_classifier.py +135 -0
  64. glam4cm/trainers/gnn_trainer.py +129 -0
  65. glam4cm/trainers/metrics.py +55 -0
  66. glam4cm/utils.py +194 -0
  67. glam4cm-0.1.0.dist-info/LICENSE +21 -0
  68. glam4cm-0.1.0.dist-info/METADATA +86 -0
  69. glam4cm-0.1.0.dist-info/RECORD +72 -0
  70. glam4cm-0.1.0.dist-info/WHEEL +5 -0
  71. glam4cm-0.1.0.dist-info/entry_points.txt +2 -0
  72. glam4cm-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,221 @@
1
+ import xmltodict
2
+ from glam4cm.lang2graph.common import LangGraph
3
+ import json
4
+ from glam4cm.tokenization.utils import doc_tokenizer
5
+ from glam4cm.settings import logger
6
+
7
+
8
+
9
+ REFERENCE = 'reference'
10
+ SUPERTYPE = 'supertype'
11
+ CONTAINMENT = 'containment'
12
+
13
+ EGenericType = 'EGenericType'
14
+ EPackage = 'EPackage'
15
+ EClass = 'EClass'
16
+ EAttribute = 'EAttribute'
17
+ EReference = 'EReference'
18
+ EEnum = 'EEnum'
19
+ EEnumLiteral = 'EEnumLiteral'
20
+ EOperation = 'EOperation'
21
+ EParameter = 'EParameter'
22
+ EDataType = 'EDataType'
23
+ GenericNodes = [EGenericType, EPackage]
24
+
25
+
26
+
27
+ class EcoreNxG(LangGraph):
28
+ def __init__(self, json_obj: dict):
29
+ super().__init__()
30
+ self.xmi = json_obj.get('xmi')
31
+ self.graph_id = json_obj.get('ids')
32
+ self.json_obj = json_obj
33
+ self.graph_type = json_obj.get('model_type')
34
+ self.label = json_obj.get('labels')
35
+ self.is_duplicated = json_obj.get('is_duplicated')
36
+ self.directed = json.loads(json_obj.get('graph')).get('directed')
37
+ # self.text = doc_tokenizer(json_obj.get('txt'))
38
+
39
+ self.__create_graph()
40
+ self.set_numbered_labels()
41
+
42
+
43
+ def __create_graph(self):
44
+ model = xmltodict.parse(self.xmi)
45
+ eclassifiers, _ = get_eclassifiers(model)
46
+ classifier_nodes = dict()
47
+ for eclassifier in eclassifiers:
48
+ eclassifier_info = get_eclassifier_info(eclassifier)
49
+ classifier_nodes[eclassifier_info['name']] = eclassifier_info
50
+
51
+ references = get_connections(classifier_nodes)
52
+
53
+ for classifier_name, classifier_info in classifier_nodes.items():
54
+ # if classifier_info['type'] != 'class':
55
+ # continue
56
+ structural_features = classifier_info.get('structural_features', [])
57
+ attributes = list()
58
+ for f in structural_features:
59
+ if f['type'] == 'ecore:EAttribute':
60
+ name = f['name']
61
+ attr_type = f['ref'] if f['ref'] else ''
62
+ attributes.append((name, attr_type))
63
+
64
+ self.add_node(
65
+ classifier_name,
66
+ name=classifier_name,
67
+ attributes=attributes,
68
+ abstract=classifier_info['abstract']
69
+ )
70
+
71
+ for edge in references:
72
+ src, dest = edge['source'], edge['target']
73
+ name = edge['name'] if 'name' in edge else ''
74
+ self.add_edge(src, dest, name=name, type=edge['type'])
75
+
76
+ for node in self.nodes:
77
+ self.nodes[node]['abstract'] = self.nodes[node]['abstract'] if 'abstract' in self.nodes[node] and self.nodes[node]['abstract'] is not None else False
78
+
79
+ logger.info(f'Graph {self.graph_id} created with {self.number_of_nodes()} nodes and {self.number_of_edges()} edges')
80
+
81
+ def __str__(self):
82
+ return self.__repr__()
83
+
84
+
85
+
86
+ def __repr__(self):
87
+ reference_edges = [edge for edge in self.edges if self.edges[edge]['type'] == REFERENCE]
88
+ containment_edges = [edge for edge in self.edges if self.edges[edge]['type'] == CONTAINMENT]
89
+ supertype_edges = [edge for edge in self.edges if self.edges[edge]['type'] == SUPERTYPE]
90
+ return f'EcoreNxG({self.graph_id}, nodes={self.number_of_nodes()}, edges={self.number_of_edges()}, references={len(reference_edges)}, containment={len(containment_edges)}, supertypes={len(supertype_edges)})'
91
+
92
+
93
+
94
+ def get_eclassifiers(json_obj):
95
+ def get_eclassifiers_util(json_obj, classifiers: list):
96
+ for key, value in json_obj.items():
97
+ if key == 'eClassifiers':
98
+ if isinstance(value, dict):
99
+ value = [value]
100
+ classifiers.extend(value)
101
+ elif isinstance(value, dict):
102
+ get_eclassifiers_util(value, classifiers)
103
+ elif isinstance(value, list):
104
+ for item in value:
105
+ if isinstance(item, dict):
106
+ get_eclassifiers_util(item, classifiers)
107
+ classifiers = list()
108
+ get_eclassifiers_util(json_obj, classifiers)
109
+ names = [c['@name'] for c in classifiers]
110
+ return classifiers, len(names) - len(set(names))
111
+
112
+
113
+ def get_connections(nodes):
114
+ links = list()
115
+ for source_class, classifier_info in nodes.items():
116
+ if classifier_info['type'] != 'class':
117
+ continue
118
+ super_types = classifier_info['super_types']
119
+ for super_type in super_types:
120
+ if super_type in nodes:
121
+ links.append({
122
+ 'source': source_class,
123
+ 'target': super_type,
124
+ 'type': SUPERTYPE,
125
+ })
126
+ nodes[super_type]['abstract'] = True
127
+
128
+ for feature in classifier_info['structural_features']:
129
+ ref = feature['ref']
130
+ if ref and ref in nodes:
131
+ links.append({
132
+ 'name': feature['name'],
133
+ 'source': source_class,
134
+ 'target': ref,
135
+ 'type': REFERENCE if not feature['containment'] else CONTAINMENT
136
+ })
137
+
138
+ for node in nodes:
139
+ abstract = nodes[node].get('abstract', '')
140
+ if abstract:
141
+ nodes[node]['abstract'] = True
142
+ else:
143
+ nodes[node]['abstract'] = False
144
+
145
+ return links
146
+
147
+
148
+ def get_estructural_feature(structural_feat):
149
+ feat_type = '@xsi:type' if '@xsi:type' in structural_feat else '@xmi:type'
150
+ structural_feat_type = structural_feat[feat_type]
151
+ name = structural_feat['@name']
152
+ eType = structural_feat['@eType'] if '@eType' in structural_feat else False
153
+
154
+ return {
155
+ 'name': name,
156
+ 'ref': eType.split('/')[-1] if eType else None,
157
+ 'type': structural_feat_type,
158
+ 'containment': structural_feat['@containment'] if '@containment' in structural_feat else None,
159
+ }
160
+
161
+
162
+ def get_eclassifier_info_eclass(eclass):
163
+ name = eclass['@name']
164
+ super_types = eclass['@eSuperTypes'] if '@eSuperTypes' in eclass else ""
165
+ super_types = [s.split('/')[-1] for s in super_types.split(' ')] if super_types else []
166
+ structural_features = eclass['eStructuralFeatures'] if 'eStructuralFeatures' in eclass else []
167
+ if not isinstance(structural_features, list):
168
+ structural_features = [structural_features]
169
+
170
+ structural_features_info = list()
171
+ for feature in structural_features:
172
+ structural_features_info.append(get_estructural_feature(feature))
173
+
174
+ return {
175
+ 'name': name,
176
+ 'type': 'class',
177
+ 'super_types': super_types,
178
+ 'structural_features': structural_features_info,
179
+ 'abstract': '@abstract' in eclass and eclass['@abstract']
180
+ }
181
+
182
+ def get_eclassifier_info_eenum(eenum):
183
+ name = eenum['@name']
184
+ literals = eenum['eLiterals'] if 'eLiterals' in eenum else []
185
+ if not isinstance(literals, list):
186
+ literals = [literals]
187
+
188
+ literals_info = list()
189
+ for literal in literals:
190
+ literal_label = '@literal' if '@literal' in literal else '@value'
191
+ name = literal['@name']
192
+ value = literal[literal_label] if literal_label in literal else ""
193
+ literals_info.append((name, value))
194
+
195
+ return {
196
+ 'name': name,
197
+ 'type': 'enum',
198
+ 'literals': literals_info
199
+ }
200
+
201
+ def get_eclassifier_info_edatatype(edatatype):
202
+ name = edatatype['@name']
203
+ return {
204
+ 'type': 'datatype',
205
+ 'name': name,
206
+ }
207
+
208
+
209
+ def get_eclassifier_info(eclassifier):
210
+ classifier_type = '@xsi:type' if '@xsi:type' in eclassifier else '@xmi:type'
211
+ if classifier_type not in eclassifier:
212
+ raise ValueError(f"Classifier has no type: {eclassifier}")
213
+ if eclassifier[classifier_type] in ['ecore:EClass', 'EClass']:
214
+ return get_eclassifier_info_eclass(eclassifier)
215
+ elif eclassifier[classifier_type] in ['ecore:EEnum', 'EEnum']:
216
+ return get_eclassifier_info_eenum(eclassifier)
217
+ elif eclassifier[classifier_type] in ['ecore:EDataType', 'EDataType']:
218
+ return get_eclassifier_info_edatatype(eclassifier)
219
+ else:
220
+ logger.log(eclassifier)
221
+ raise ValueError(f"Unknown classifier type: {eclassifier[classifier_type]}")
@@ -0,0 +1,169 @@
1
+ import json
2
+ from tqdm.auto import tqdm
3
+ from glam4cm.lang2graph.common import LangGraph
4
+ from glam4cm.utils import find_files_with_extension
5
+ from glam4cm.settings import logger
6
+
7
+
8
+ ONTOUML_ELEMENT_ID = 'id'
9
+ ONTOUML_ELEMENT_TYPE = 'type'
10
+ ONTOUML_ELEMENT_NAME = 'name'
11
+ ONTOUML_ELEMENT_DESCRIPTION = 'description'
12
+
13
+ ONTOUML_GENERALIZATION = "Generalization"
14
+ ONTOUML_GENERALIZATION_GENERAL = "general"
15
+ ONTOUML_GENERALIZATION_SPECIFIC = "specific"
16
+ ONTOUML_GENERALIZATION_SET = "GeneralizationSet"
17
+ ONTOUML_GENERALIZATION_SET_GENERALIZATIONS = "generalizations"
18
+ ONTOUML_GENERALIZATION_SET_IS_DISJOINT = "isDisjoint"
19
+ ONTOUML_GENERALIZATION_SET_IS_COMPLETE = "isComplete"
20
+
21
+ ONTOUML_PROJECT = "Project"
22
+ ONTOUML_PROJECT_MODEL = "model"
23
+ ONTOUML_PROJECT_MODEL_CONTENTS = "contents"
24
+ ONTOUML_RELATION = "Relation"
25
+ ONTOUML_PROPERTIES = "properties"
26
+ ONTOUML_RELATION_PROPERTY_TYPE = "propertyType"
27
+ ONTOUML_STEREOTYPE = "stereotype"
28
+ ONTOUML_CLASS = "Class"
29
+ ONTOUML_ENUMERATION = "enumeration"
30
+ ONTOUML_CLASS_LITERALS = 'literals'
31
+ ONTOUML_PACKAGE = "Package"
32
+ ONTOUML_LITERAL = "Literal"
33
+
34
+
35
+ extra_properties = [
36
+ "isAbstract",
37
+ "isDerived",
38
+ "isDisjoint",
39
+ "type",
40
+ "isComplete",
41
+ "isPowertype",
42
+ "isExtensional",
43
+ "isOrdered",
44
+ "aggregationKind",
45
+ ]
46
+
47
+
48
+ class OntoUMLNxG(LangGraph):
49
+ def __init__(self, json_obj: dict, rel_as_node=True):
50
+ super().__init__()
51
+ self.json_obj = json_obj
52
+ self.rel_as_node = rel_as_node
53
+ self.__create_graph()
54
+ self.set_numbered_labels()
55
+
56
+ self.text = " ".join([
57
+ self.nodes[node]['name'] if 'name' in self.nodes[node] else ''
58
+ for node in self.nodes
59
+ ])
60
+
61
+ def __create_graph(self):
62
+
63
+ def ontouml_id2obj(obj):
64
+ assert isinstance(obj, dict)
65
+ for key in obj:
66
+ if key == ONTOUML_ELEMENT_ID and ONTOUML_ELEMENT_TYPE in obj and obj[ONTOUML_ELEMENT_TYPE]\
67
+ in [ONTOUML_CLASS, ONTOUML_RELATION, ONTOUML_GENERALIZATION_SET, ONTOUML_GENERALIZATION]\
68
+ and ONTOUML_ELEMENT_DESCRIPTION in obj:
69
+ id2obj_map[obj[ONTOUML_ELEMENT_ID]] = obj
70
+ elif isinstance(obj[key], dict):
71
+ ontouml_id2obj(obj[key])
72
+ elif isinstance(obj[key], list):
73
+ for item in obj[key]:
74
+ assert not isinstance(item, list)
75
+ if isinstance(item, dict):
76
+ ontouml_id2obj(item)
77
+
78
+ def create_nxg():
79
+ for k, v in id2obj_map.items():
80
+ node_name = v.get('name', '')
81
+
82
+ if v[ONTOUML_ELEMENT_TYPE] in [ONTOUML_CLASS, ONTOUML_RELATION]:
83
+ self.add_node(k, name=node_name, type=v[ONTOUML_ELEMENT_TYPE], description='')
84
+ for prop in extra_properties:
85
+ self.nodes[k][prop] = v[prop] if prop in v else False
86
+
87
+ logger.info(f"Node: {node_name} type: {v[ONTOUML_ELEMENT_TYPE]}")
88
+
89
+
90
+ logger.info(f"Node: {node_name} type: {v[ONTOUML_ELEMENT_TYPE]}")
91
+ if ONTOUML_STEREOTYPE in v and v[ONTOUML_STEREOTYPE] is not None:
92
+ self.nodes[k][ONTOUML_STEREOTYPE] = v[ONTOUML_STEREOTYPE].lower()
93
+ logger.info(f"Stereotype: {v[ONTOUML_STEREOTYPE].lower()}")
94
+
95
+
96
+ if ONTOUML_ELEMENT_DESCRIPTION in v and v[ONTOUML_ELEMENT_DESCRIPTION] is not None:
97
+ self.nodes[k][ONTOUML_ELEMENT_DESCRIPTION] = v[ONTOUML_ELEMENT_DESCRIPTION]
98
+ logger.info(f"Description: {v[ONTOUML_ELEMENT_DESCRIPTION]}")
99
+
100
+
101
+ if v[ONTOUML_ELEMENT_TYPE] == ONTOUML_CLASS:
102
+ if ONTOUML_CLASS_LITERALS in v and v[ONTOUML_CLASS_LITERALS] is not None:
103
+ literals = v[ONTOUML_CLASS_LITERALS] if isinstance(v[ONTOUML_CLASS_LITERALS], list) else [v[ONTOUML_CLASS_LITERALS]]
104
+ literals_str = ", ".join([literal[ONTOUML_ELEMENT_NAME] for literal in literals])
105
+ self.nodes[k][ONTOUML_PROPERTIES] = literals_str
106
+
107
+ logger.info(f"Literals: {literals_str}")
108
+
109
+ elif ONTOUML_PROPERTIES in v and v[ONTOUML_PROPERTIES] is not None:
110
+ properties = v[ONTOUML_PROPERTIES] if isinstance(v[ONTOUML_PROPERTIES], list) else [v[ONTOUML_PROPERTIES]]
111
+ properties_str = ", ".join([property[ONTOUML_ELEMENT_NAME] for property in properties])
112
+ self.nodes[k][ONTOUML_PROPERTIES] = properties_str
113
+ logger.info(f"Properties: {properties_str}")
114
+
115
+
116
+ elif v[ONTOUML_ELEMENT_TYPE] == ONTOUML_RELATION:
117
+ properties = v[ONTOUML_PROPERTIES] if isinstance(v[ONTOUML_PROPERTIES], list) else [v[ONTOUML_PROPERTIES]]
118
+ assert len(properties) == 2
119
+ try:
120
+ x_id = properties[0][ONTOUML_RELATION_PROPERTY_TYPE][ONTOUML_ELEMENT_ID]
121
+ y_id = properties[1][ONTOUML_RELATION_PROPERTY_TYPE][ONTOUML_ELEMENT_ID]
122
+ x_name = id2obj_map[x_id][ONTOUML_ELEMENT_NAME] if ONTOUML_ELEMENT_NAME is not None else ''
123
+ y_name = id2obj_map[y_id][ONTOUML_ELEMENT_NAME] if ONTOUML_ELEMENT_NAME is not None else ''
124
+
125
+ self.add_edge(x_id, v[ONTOUML_ELEMENT_ID], type='rel')
126
+ self.add_edge(v[ONTOUML_ELEMENT_ID], y_id, type='rel')
127
+
128
+ logger.info(f"\tRelationship:, {x_name} --> {y_name}\n")
129
+ except TypeError as e:
130
+ # print(f"Error in {v[ONTOUML_ELEMENT_TYPE]}, {v[ONTOUML_ELEMENT_NAME]}")
131
+ pass
132
+
133
+
134
+ elif v[ONTOUML_ELEMENT_TYPE] == ONTOUML_GENERALIZATION:
135
+ general = v[ONTOUML_GENERALIZATION_GENERAL][ONTOUML_ELEMENT_ID]
136
+ specific = v[ONTOUML_GENERALIZATION_SPECIFIC][ONTOUML_ELEMENT_ID]
137
+ general_name = id2obj_map[general][ONTOUML_ELEMENT_NAME]\
138
+ if ONTOUML_ELEMENT_NAME in id2obj_map[general] else ''
139
+ specific_name = id2obj_map[specific][ONTOUML_ELEMENT_NAME] \
140
+ if ONTOUML_ELEMENT_NAME in id2obj_map[specific] else ''
141
+
142
+ logger.info(f"\tGeneralization:, {specific_name} -->> {general_name}\n")
143
+ self.add_edge(specific, general, type='gen')
144
+
145
+ def create_nxg_rel_as_edge():
146
+ # TODO: To be implemented
147
+ pass
148
+
149
+
150
+ id2obj_map = dict()
151
+ ontouml_id2obj(self.json_obj)
152
+ if self.rel_as_node:
153
+ create_nxg()
154
+ else:
155
+ create_nxg_rel_as_edge()
156
+
157
+
158
+ def get_ontouml_to_nx(data_dir, min_stereotypes=10):
159
+ ontouml_graphs = list()
160
+ models = find_files_with_extension(data_dir, "json")
161
+ for mfp in tqdm(models, desc=f"Reading {len(models)} OntoUML models"):
162
+ if mfp.endswith(".ecore") or mfp.endswith(".json"):
163
+ json_obj = json.loads(open(mfp, 'r', encoding='iso-8859-1').read())
164
+ g = OntoUMLNxG(json_obj)
165
+ stereotype_nodes = [node for node, stereotype in g.nodes(data=ONTOUML_STEREOTYPE) if stereotype is not None]
166
+ if len(stereotype_nodes) >= min_stereotypes:
167
+ ontouml_graphs.append((g, mfp))
168
+
169
+ return ontouml_graphs
@@ -0,0 +1,80 @@
1
+ import signal
2
+ import networkx as nx
3
+
4
+
5
+ class TimeoutException(Exception):
6
+ pass
7
+
8
+ def timeout_handler(signum, frame):
9
+ raise TimeoutException("Took too long")
10
+
11
+ def run_with_timeout(func, args=(), kwargs={}, timeout_duration=5):
12
+ # Set the signal handler and a timeout alarm
13
+ signal.signal(signal.SIGALRM, timeout_handler)
14
+ signal.alarm(timeout_duration)
15
+ try:
16
+ result = func(*args, **kwargs)
17
+ except TimeoutException:
18
+ result = None
19
+ finally:
20
+ # Disable the alarm
21
+ signal.alarm(0)
22
+ return result
23
+
24
+
25
+ def get_triple_text(node, edge_data, neighbour):
26
+ src = f'Class {node}'
27
+ dest = f'Class {neighbour}'
28
+
29
+ if edge_data is None:
30
+ return f'{src} -> {dest}'
31
+
32
+ if edge_data['type'] == 'reference' and 'name' in edge_data:
33
+ return f'{src} -> ({edge_data["name"]}) -> {dest}'
34
+
35
+ return f'{src} -> {dest}'
36
+
37
+
38
+ def find_node_str_upto_distance(node, distance=1):
39
+ nodes_with_distance = find_nodes_within_distance(
40
+ node,
41
+ distance=distance
42
+ )
43
+ if distance == 0:
44
+ return f'Class {node}'
45
+
46
+ d2n = {dd[0]: set() for _, dd in nodes_with_distance}
47
+ for neighbour, dis_data in nodes_with_distance:
48
+ d, edge_data = dis_data
49
+ if d == 0:
50
+ continue
51
+
52
+ node_text = get_triple_text(
53
+ node, edge_data, neighbour
54
+ )
55
+ if node_text:
56
+ d2n[d].add(node_text)
57
+
58
+
59
+ d2n = sorted(d2n.items(), key=lambda x: x[0])
60
+ node_buckets = [f" ".join(nbs) for _, nbs in d2n]
61
+ path_str = " | ".join(node_buckets)
62
+
63
+ return path_str
64
+
65
+
66
+ def find_nodes_within_distance(g: nx.DiGraph, n, distance=1):
67
+ visited = {n: (0, None)}
68
+ queue = [(n, 0)]
69
+
70
+ while queue:
71
+ node, d = queue.pop(0)
72
+ if d == distance:
73
+ continue
74
+ for neighbor in g.neighbors(node):
75
+ if neighbor not in visited:
76
+ visited[neighbor] = (d+1, g.edges[node, neighbor])
77
+ queue.append((neighbor, d+1))
78
+
79
+ visited = sorted(visited.items(), key=lambda x: x[1][0])
80
+ return visited