nlptoolkit-amr 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,61 @@
1
+ from __future__ import annotations
2
+ from abc import abstractmethod
3
+ from typing import List
4
+
5
+ from AnnotatedSentence.AnnotatedSentence import AnnotatedSentence
6
+
7
+ class AmrConstructionAlgorithm(object):
8
+
9
+ @abstractmethod
10
+ def constructExcelAmr(self, sentence: AnnotatedSentence) -> list[str]:
11
+ pass
12
+
13
+ def toString(self, sentence: AnnotatedSentence):
14
+ result = ""
15
+ for item in self.constructExcelAmr(sentence):
16
+ result += item + "\n"
17
+ return result
18
+
19
+ def saveAmr(self, items: List[str], fileName: str):
20
+ start_x = 750
21
+ start_y = 100
22
+ last_parent = [""] * 50
23
+ last_x = [0] * 50
24
+ child_count = [0] * 50
25
+ words = set()
26
+ out_file = open(fileName, "w", encoding="utf8")
27
+ out_file.write("<Amr>\n")
28
+ line = items[0]
29
+ out_file.write("<Word name=\"" + line + "\" positionX=\"" + str(start_x) + "\" positionY=\"" + str(start_y) + "\"/>\n")
30
+ words.add(line)
31
+ last_parent[0] = line
32
+ child_count[0] = 0
33
+ last_x[0] = start_x
34
+ for j in range (1, len(items)):
35
+ line = items[j]
36
+ tab_count = 0
37
+ i = 0
38
+ while line[i] == '\t':
39
+ tab_count = tab_count + 1
40
+ i = i + 1
41
+ line = line[tab_count:]
42
+ if ":" in line:
43
+ last_parent[tab_count] = line[0: line.index(":")]
44
+ else:
45
+ last_parent[tab_count] = line
46
+ child_count[tab_count] = 0
47
+ last_x[tab_count] = last_x[tab_count - 1] + (child_count[tab_count - 1] - 1) * 100
48
+ if ":" in line:
49
+ candidate = line[0: line.rindex(":")]
50
+ if not candidate in words:
51
+ out_file.write("<Word name=\"" + candidate + "\" positionX=\"" + str(last_x[tab_count]) + "\" positionY=\"" + str(start_y + 100 * tab_count) + "\"/>\n")
52
+ words.add(candidate)
53
+ out_file.write("<Connection from=\"" + last_parent[tab_count - 1] + "\" to=\"" + candidate + "\" with=\"" + line[0: line.rindex(":") + 1] + "\"/>\n")
54
+ else:
55
+ if not line in words:
56
+ out_file.write("<Word name=\"" + line + "\" positionX=\"" + str(last_x[tab_count]) + "\" positionY=\"" + str(start_y + 100 * tab_count) + "\"/>")
57
+ words.add(line)
58
+ out_file.write("<Connection from=\"" + last_parent[tab_count - 1] + "\" to=\"" + line + "\"/>\n")
59
+ child_count[tab_count - 1] = child_count[tab_count - 1] + 1
60
+ out_file.write("</Amr>\n")
61
+ out_file.close()
@@ -0,0 +1,355 @@
1
+ from typing import List
2
+
3
+ from AnnotatedSentence.AnnotatedSentence import AnnotatedSentence
4
+ from AnnotatedSentence.AnnotatedWord import AnnotatedWord
5
+ from MorphologicalAnalysis import MorphologicalTag
6
+ from WordNet import SemanticRelation
7
+ from WordNet.SynSet import SynSet
8
+ from WordNet.WordNet import WordNet
9
+ from WordNet.SemanticRelation import SemanticRelation
10
+
11
+ from AmrConstructionAlgorithm import AmrConstructionAlgorithm
12
+ from MorphologicalAnalysis.MorphologicalTag import MorphologicalTag
13
+
14
+ class RuleBasedConstructionAlgorithm(AmrConstructionAlgorithm):
15
+
16
+ __word_net: WordNet
17
+ __sentence: AnnotatedSentence
18
+
19
+ def __init__(self, word_net: WordNet) -> None:
20
+ self.__word_net = word_net
21
+
22
+ def __with_tabs(self, tabCount: int, string: str) -> str:
23
+ result = ""
24
+ for i in range(tabCount):
25
+ result += "\t"
26
+ return result + string
27
+
28
+ def __only_word(self, word: AnnotatedWord, i: int) -> str:
29
+ return str(i + 1) + "/" + word.getParse().getWord().getName()
30
+
31
+ def __contains_arg0(self, semantic: str) -> bool:
32
+ for i in range(self.__sentence.wordCount()):
33
+ word = self.__sentence.getWord(i)
34
+ if isinstance(word, AnnotatedWord) and word.getArgumentList() and word.getArgumentList().containsArgument("ARG0", semantic):
35
+ return True
36
+ return False
37
+
38
+ def __extra_args(self, output: List[str], word: AnnotatedWord, tabCount: int):
39
+ if word.getParse().getRootPos() == "VERB" and word.getParse().containsTag(MorphologicalTag.A1SG) and "ben " not in self.__sentence.toStems():
40
+ output.append(self.__with_tabs(tabCount + 1, "ben:ARG0"))
41
+ if word.getParse().getRootPos() == "VERB" and word.getParse().getPos() == "NOUN" and word.getParse().containsTag(MorphologicalTag.P1SG):
42
+ output.append(self.__with_tabs(tabCount + 1, "ben:ARG0"))
43
+ if word.getParse().getRootPos() == "VERB" and word.getParse().containsTag(MorphologicalTag.A1PL) and "biz " not in self.__sentence.toStems():
44
+ output.append(self.__with_tabs(tabCount + 1, "biz:ARG0"))
45
+ if word.getParse().getRootPos() == "VERB" and word.getParse().getPos() == "NOUN" and word.getParse().containsTag(MorphologicalTag.P1PL):
46
+ output.append(self.__with_tabs(tabCount + 1, "biz:ARG0"))
47
+ if word.getParse().getRootPos() == "VERB" and word.getParse().containsTag(MorphologicalTag.A2SG) and "sen " not in self.__sentence.toStems():
48
+ output.append(self.__with_tabs(tabCount + 1, "sen:ARG0"))
49
+ if word.getParse().getRootPos() == "VERB" and word.getParse().getPos() == "NOUN" and word.getParse().containsTag(MorphologicalTag.P2SG):
50
+ output.append(self.__with_tabs(tabCount + 1, "sen:ARG0"))
51
+ if word.getParse().getRootPos() == "VERB" and word.getParse().containsTag(MorphologicalTag.A2PL) and "siz " not in self.__sentence.toStems():
52
+ output.append(self.__with_tabs(tabCount + 1, "siz:ARG0"))
53
+ if word.getParse().getRootPos() == "VERB" and word.getParse().getPos() == "NOUN" and word.getParse().containsTag(MorphologicalTag.P2PL):
54
+ output.append(self.__with_tabs(tabCount + 1, "siz:ARG0"))
55
+ if word.getParse().getRootPos() == "VERB" and word.getParse().containsTag(MorphologicalTag.A3SG) and "o " not in self.__sentence.toStems():
56
+ if not self.__contains_arg0(word.getSemantic()):
57
+ if not (word.getParse().getPos() == "NOUN" and (word.getParse().containsTag(MorphologicalTag.P1SG) or word.getParse().containsTag(MorphologicalTag.P1PL) or word.getParse().containsTag(MorphologicalTag.P2SG) or word.getParse().containsTag(MorphologicalTag.P2PL))):
58
+ output.append(self.__with_tabs(tabCount + 1, "o:ARG0"))
59
+ if word.getParse().getRootPos() == "VERB" and word.getParse().containsTag(MorphologicalTag.A3PL) and "onlar " not in self.__sentence.toStems():
60
+ if not self.__contains_arg0(word.getSemantic()):
61
+ if not (word.getParse().getPos() == "NOUN" and (word.getParse().containsTag(MorphologicalTag.P1SG) or word.getParse().containsTag(MorphologicalTag.P1PL) or word.getParse().containsTag(MorphologicalTag.P2SG) or word.getParse().containsTag(MorphologicalTag.P2PL))):
62
+ output.append(self.__with_tabs(tabCount + 1, "onlar:ARG0"))
63
+
64
+ def __contains_mode(self, index: int) -> bool:
65
+ for i in range(self.__sentence.wordCount()):
66
+ word = self.__sentence.getWord(i)
67
+ if isinstance(word, AnnotatedWord) and word.getUniversalDependency() is not None and word.getUniversalDependency().to() == index + 1:
68
+ if word.getUniversalDependency().__str__() == "AMOD" or word.getUniversalDependency().__str__() == "NMOD":
69
+ return True
70
+ return False
71
+
72
+ def __extra_possessive(self, output: List[str], word: AnnotatedWord, wordIndex: int, tabCount: int):
73
+ if word.getParse().containsTag(MorphologicalTag.P1SG):
74
+ if word.getParse().getRootPos() != "VERB" or word.getParse().getRootPos() != "NOUN":
75
+ output.append(self.__with_tabs(tabCount + 1, "ben:poss"))
76
+ if word.getParse().containsTag(MorphologicalTag.P1PL):
77
+ if word.getParse().getRootPos() != "VERB" or word.getParse().getRootPos() != "NOUN":
78
+ output.append(self.__with_tabs(tabCount + 1, "biz:poss"))
79
+ if word.getParse().containsTag(MorphologicalTag.P2SG):
80
+ if word.getParse().getRootPos() != "VERB" or word.getParse().getRootPos() != "NOUN":
81
+ output.append(self.__with_tabs(tabCount + 1, "sen:poss"))
82
+ if word.getParse().containsTag(MorphologicalTag.P2PL):
83
+ if word.getParse().getRootPos() != "VERB" or word.getParse().getRootPos() != "NOUN":
84
+ output.append(self.__with_tabs(tabCount + 1, "siz:poss"))
85
+ if word.getParse().containsTag(MorphologicalTag.P3SG):
86
+ if not self.__contains_mode(wordIndex):
87
+ output.append(self.__with_tabs(tabCount + 1, "o:poss"))
88
+ if word.getParse().containsTag(MorphologicalTag.P3PL):
89
+ if not self.__contains_mode(wordIndex):
90
+ output.append(self.__with_tabs(tabCount + 1, "onlar:poss"))
91
+
92
+ def __is_month(self, next: str) -> bool:
93
+ return next in ["ocak", "şubat", "mart", "nisan", "mayıs", "haziran", "temmuz", "ağustos",
94
+ "eylül", "ekim", "kasım", "aralık"]
95
+
96
+ def __is_weekday(self, next: str) -> bool:
97
+ return next in ["pazartesi", "salı", "çarşamba", "perşembe", "cuma", "cumartesi", "pazar"]
98
+
99
+ def __is_ordinal(self, next: str) -> int:
100
+ if next == "birinci":
101
+ return 1
102
+ elif next == "ikinci":
103
+ return 2
104
+ elif next == "üçüncü":
105
+ return 3
106
+ elif next == "dördüncü":
107
+ return 4
108
+ elif next == "beşinci":
109
+ return 5
110
+ elif next == "altıncı":
111
+ return 6
112
+ elif next == "yedinci":
113
+ return 7
114
+ elif next == "sekizinci":
115
+ return 8
116
+ elif next == "dokuzuncu":
117
+ return 9
118
+ return 0
119
+
120
+ def __add_argument_list(self, output: List[str], current: AnnotatedWord, semantic: str, currentText: str) -> bool:
121
+ if current.getArgumentList() is not None:
122
+ argument_list = current.getArgumentList()
123
+ if argument_list.containsArgument("ARG0", semantic):
124
+ output.append(currentText + ":ARG0")
125
+ return True
126
+ elif argument_list.containsArgument("ARG1", semantic):
127
+ output.append(currentText + ":ARG1")
128
+ return True
129
+ elif argument_list.containsArgument("ARG2", semantic):
130
+ output.append(currentText + ":ARG2")
131
+ return True
132
+ return False
133
+
134
+ def __add_details(self, tabCount: int, output: List[str], current: AnnotatedWord, wordIndex: int) -> bool:
135
+ if current.getParse().containsTag(MorphologicalTag.NEGATIVE):
136
+ output.append(self.__with_tabs(tabCount + 1, ":polarity"))
137
+ self.__extra_args(output, current, tabCount)
138
+ self.__extra_possessive(output, current, wordIndex, tabCount)
139
+ if current.getParse().containsTag(MorphologicalTag.IMPERATIVE):
140
+ output.append(self.__with_tabs(tabCount + 1, ":imperative:mode"))
141
+
142
+ def __get_preliminary_extra(self, current: AnnotatedWord, index: int, added: List[str]) -> int:
143
+ added.append("")
144
+ if current.getParse().containsTag(MorphologicalTag.CONDITIONAL):
145
+ added[0] = ":cond"
146
+ for i in range(self.__sentence.wordCount()):
147
+ word = self.__sentence.getWord(i)
148
+ if isinstance(word, AnnotatedWord) and word.getUniversalDependency() is not None and word.getUniversalDependency().to() == index + 1:
149
+ if word.getParse().getWord().getName() == "kadar":
150
+ added[0] = ":extent"
151
+ return i
152
+ elif word.getParse().getWord().getName() in ["rağmen", "karşın", "karşılık"]:
153
+ added[0] = ":concession"
154
+ return i
155
+ elif word.getParse().getWord().getName() in ["için", "sayesinde", "dolayı"]:
156
+ added[0] = ":cause"
157
+ return i
158
+ return -1
159
+
160
+ def __add_default_case(self, done: List[bool], tabCount: int, output: List[str], defaultString: str, extraAdded: str, added: str, addedIndex: int):
161
+ if extraAdded != "":
162
+ output.append(self.__with_tabs(tabCount, defaultString + extraAdded))
163
+ else:
164
+ output.append(self.__with_tabs(tabCount, defaultString + added))
165
+ if addedIndex != -1:
166
+ done[addedIndex] = True
167
+
168
+ def __print_amr_recursively(self, done: List[bool], index: int, tabCount: int, output: List[str], relation: str, semantic: str, wordNet: WordNet, extraAdded: str):
169
+ current_word_index = index
170
+ if done[index]:
171
+ return
172
+ done[index] = True
173
+ current = self.__sentence.getWord(index)
174
+ if isinstance(current, AnnotatedWord):
175
+ if relation == "DET" and current.getParse().getWord().getName() == "bir":
176
+ return
177
+ if current.getParse().getWord().getName() in ["ve", "veya", "hem", "ama"]:
178
+ return
179
+ if current.getParse().getWord().getName() == "değil":
180
+ output.append(self.__with_tabs(tabCount, "-:polarity"))
181
+ return
182
+ if current.isPunctuation():
183
+ return
184
+ added = [""]
185
+ added_index = self.__get_preliminary_extra(current, index, added)
186
+ if current.getParse().isCardinal() and index + 1 < self.__sentence.wordCount():
187
+ next_word = self.__sentence.getWord(index + 1)
188
+ if isinstance(next_word, AnnotatedWord):
189
+ next = next_word.getParse().getWord().getName()
190
+ if self.__is_month(next):
191
+ output.append(self.__with_tabs(tabCount, "date-entity:date"))
192
+ output.append(self.__with_tabs(tabCount + 1, self.__only_word(current, index) + ":day"))
193
+ output.append(self.__with_tabs(tabCount + 1, self.__only_word(next_word, index + 1) + ":month"))
194
+ else:
195
+ self.__add_default_case(done, tabCount, output, self.__only_word(current, index), extraAdded, added[0], added_index)
196
+ elif current.getParse().isProperNoun():
197
+ wiki_type = "person"
198
+ synsets = wordNet.getSynSetsWithLiteral(current.getParse().getWord().getName())
199
+ for synset in synsets:
200
+ if isinstance(synset, SynSet) and synset.containsRelation(SemanticRelation("TUR10-0820020", "INSTANCE_HYPERNYM")):
201
+ wiki_type = "city"
202
+ argument_added = self.__add_argument_list(output, current, semantic, self.__with_tabs(tabCount, wiki_type))
203
+ if not argument_added:
204
+ if not current.getParse().getRootPos() == "VERB" and current.getParse().containsTag(MorphologicalTag.INSTRUMENTAL):
205
+ output.append(self.__with_tabs(tabCount, wiki_type) + ":instrument")
206
+ elif not current.getParse().getRootPos() == "VERB" and current.getParse().containsTag(MorphologicalTag.LOCATIVE):
207
+ output.append(self.__with_tabs(tabCount, wiki_type) + ":location")
208
+ else:
209
+ self.__add_default_case(done, tabCount, output, self.__with_tabs(tabCount, wiki_type), extraAdded, added[0], added_index)
210
+ output.append(self.__with_tabs(tabCount + 1, "name:name"))
211
+ output.append(self.__with_tabs(tabCount + 2, self.__only_word(current, index) + ":op1"))
212
+ for i in range(1, 3):
213
+ if index + i < self.__sentence.wordCount():
214
+ check_word = self.__sentence.getWord(index + i)
215
+ if isinstance(check_word, AnnotatedWord):
216
+ if check_word.getSemantic() is not None and check_word.getSemantic() == current.getSemantic():
217
+ output.append(self.__with_tabs(tabCount + 2, self.__only_word(check_word, index + 1) + ":op1" + str(1 + i)))
218
+ done[index + i] = True
219
+ else:
220
+ break
221
+ else:
222
+ break
223
+ if wiki_type == "person":
224
+ output.append(self.__with_tabs(tabCount + 1, "-:wiki"))
225
+ else:
226
+ output.append(self.__with_tabs(tabCount + 1, current.getParse().getWord().getName() + "-:wiki"))
227
+ elif self.__is_month(current.getParse().getWord().getName()):
228
+ output.append(self.__with_tabs(tabCount, "date-entity:date"))
229
+ output.append(self.__with_tabs(tabCount + 1, self.__only_word(current, index) + ":month"))
230
+ elif self.__is_weekday(current.getParse().getWord().getName()):
231
+ output.append(self.__with_tabs(tabCount, "date-entity:date"))
232
+ output.append(self.__with_tabs(tabCount + 1, self.__only_word(current, index) + ":weekday"))
233
+ else:
234
+ current_word = self.__only_word(current, index)
235
+ for i in range(1, 3):
236
+ if index > i - 1 and index - i < self.__sentence.wordCount() and not done[index - i]:
237
+ check_word = self.__sentence.getWord(index - i)
238
+ if isinstance(check_word, AnnotatedWord):
239
+ if check_word.getSemantic() is not None and check_word.getSemantic() == current.getSemantic():
240
+ current_word = self.__only_word(check_word, index - i) + " " + current_word
241
+ done[index - i] = True
242
+ else:
243
+ break
244
+ else:
245
+ break
246
+ for i in range(1, 3):
247
+ if index + i < self.__sentence.wordCount():
248
+ check_word = self.__sentence.getWord(index + i)
249
+ if isinstance(check_word, AnnotatedWord):
250
+ if check_word.getSemantic() is not None and check_word.getSemantic() == current.getSemantic():
251
+ current_word += " " + self.__only_word(check_word, index + i)
252
+ done[index + i] = True
253
+ current = check_word
254
+ current_word_index = index + i
255
+ else:
256
+ break
257
+ else:
258
+ break
259
+ if current.getParse().getWord().getName() in ["çok", "gayet", "tam", "bayağı", "fazla", "hiç"]:
260
+ output.append(self.__with_tabs(tabCount, current_word) + ":degree")
261
+ elif current.getParse().getWord().getName() in ["hep", "sürekli"]:
262
+ output.append(self.__with_tabs(tabCount, current_word) + ":frequency")
263
+ else:
264
+ argument_added = self.__add_argument_list(output, current, semantic, self.__with_tabs(tabCount, current_word))
265
+ if argument_added:
266
+ self.__add_details(tabCount, output, current, current_word_index)
267
+ elif current.getParse().containsTag(MorphologicalTag.ORDINAL) or self.__is_ordinal(current.getParse().getWord().getName()) > 0:
268
+ output.append(self.__with_tabs(tabCount, "ordinal-entity:ord"))
269
+ value = self.__is_ordinal(current.getParse().getWord().getName())
270
+ if value > 0:
271
+ output.append(self.__with_tabs(tabCount + 1, str(value) + ":value"))
272
+ else:
273
+ output.append(self.__with_tabs(tabCount + 1, current.getParse().getWord().getName() + ":value"))
274
+ else:
275
+ if relation == "AMOD" or relation == "NMOD":
276
+ output.append(self.__with_tabs(tabCount, current_word) + ":mod")
277
+ elif relation == "NUMMOD":
278
+ output.append(self.__with_tabs(tabCount, current_word) + ":quant")
279
+ elif relation == "ADVMOD":
280
+ output.append(self.__with_tabs(tabCount, current_word) + ":manner")
281
+ else:
282
+ if current.getParse().getRootPos() != "VERB" and current.getParse().containsTag(
283
+ MorphologicalTag.INSTRUMENTAL):
284
+ output.append(self.__with_tabs(tabCount, current_word) + ":instrument")
285
+ elif current.getParse().getRootPos() != "VERB" and current.getParse().containsTag(
286
+ MorphologicalTag.LOCATIVE):
287
+ output.append(self.__with_tabs(tabCount, current_word) + ":location")
288
+ else:
289
+ self.__add_default_case(done, tabCount, output, self.__with_tabs(tabCount, current_word), extraAdded, added[0], added_index)
290
+ self.__add_details(tabCount, output, current, current_word_index)
291
+ i = 0
292
+ while i < self.__sentence.wordCount():
293
+ word = self.__sentence.getWord(i)
294
+ if isinstance(word, AnnotatedWord):
295
+ if word.getParse().isCardinal() and i + 1 < self.__sentence.wordCount():
296
+ next_word = self.__sentence.getWord(i + 1)
297
+ if isinstance(next_word, AnnotatedWord):
298
+ next = next_word.getParse().getWord().getName()
299
+ if self.__is_month(next):
300
+ if next_word.getUniversalDependency().to() == index + 1:
301
+ self.__meta_verb_tags(word, done, i, tabCount + 1, output, word.getUniversalDependency().__str__(), current.getSemantic(), wordNet)
302
+ i = i + 2
303
+ continue
304
+ j = i
305
+ while i < self.__sentence.wordCount() - 1 and self.__sentence.getWord(i + 1).getSemantic() is not None and self.__sentence.getWord(i + 1).getSemantic() == word.getSemantic():
306
+ i = i + 1
307
+ if word.getUniversalDependency() is not None and word.getUniversalDependency().to() == index + 1:
308
+ self.__meta_verb_tags(word, done, j, tabCount + 1, output, word.getUniversalDependency().__str__(), current.getSemantic(), wordNet)
309
+ elif j != i and self.__sentence.getWord(i).getUniversalDependency() is not None and self.__sentence.getWord(i).getUniversalDependency().to() == index + 1:
310
+ self.__meta_verb_tags(word, done, j, tabCount + 1, output, word.getUniversalDependency().__str__(), current.getSemantic(), wordNet)
311
+ i = i + 1
312
+
313
+ def __meta_verb_tags(self, word: AnnotatedWord, done: List[bool], index: int, tabCount: int, output: List[str], relation: str, semantic: str, wordNet: WordNet):
314
+ parataxis_or_conj = False
315
+ for i in range(self.__sentence.wordCount()):
316
+ connected_word = self.__sentence.getWord(i)
317
+ if isinstance(connected_word, AnnotatedWord):
318
+ if connected_word.getUniversalDependency() is not None and connected_word.getUniversalDependency().to() == index + 1 and connected_word.getUniversalDependency().__str__() in ["PARATAXIS", "CONJ"]:
319
+ parataxis_or_conj = True
320
+ break
321
+ if parataxis_or_conj:
322
+ output.append(self.__with_tabs(tabCount, "and"))
323
+ count = 1
324
+ for i in range(self.__sentence.wordCount()):
325
+ connected_word = self.__sentence.getWord(i)
326
+ if isinstance(connected_word, AnnotatedWord):
327
+ if connected_word.getUniversalDependency() is not None and connected_word.getUniversalDependency().to() == index + 1 and connected_word.getUniversalDependency().__str__() in ["PARATAXIS", "CONJ"]:
328
+ self.__print_amr_recursively(done, i, tabCount + 1, output, relation, semantic, wordNet, ":op" + str(count))
329
+ count = count + 1
330
+ self.__print_amr_recursively(done, index, tabCount + 1, output, relation, semantic, wordNet, ":op" + str(count))
331
+ else:
332
+ if word.getParse().containsTag(MorphologicalTag.NECESSITY):
333
+ output.append(self.__with_tabs(tabCount, "öner"))
334
+ tabCount = tabCount + 1
335
+ if word.getParse().containsTag(MorphologicalTag.ABLE):
336
+ output.append(self.__with_tabs(tabCount, "mümkün"))
337
+ tabCount = tabCount + 1
338
+ if word.getParse().containsTag(MorphologicalTag.CAUSATIVE):
339
+ output.append(self.__with_tabs(tabCount, "yap"))
340
+ tabCount = tabCount + 1
341
+ self.__print_amr_recursively(done, index, tabCount, output, relation, semantic, wordNet,"")
342
+
343
+ def constructExcelAmr(self, sentence: AnnotatedSentence) -> list[str]:
344
+ output = []
345
+ self.__sentence = sentence
346
+ done = []
347
+ for i in range(self.__sentence.wordCount()):
348
+ done.append(False)
349
+ output.append(sentence.getFileName() + "\t" + sentence.toString())
350
+ for i in range(sentence.wordCount()):
351
+ word = sentence.getWord(i)
352
+ if isinstance(word, AnnotatedWord):
353
+ if word.getUniversalDependency() is not None and word.getUniversalDependency().__str__() == "ROOT":
354
+ self.__meta_verb_tags(word, done, i, 0, output, "ROOT", word.getSemantic(), self.__word_net)
355
+ return output
@@ -0,0 +1,21 @@
1
+ from AmrWord import AmrWord
2
+
3
+ class AmrConnection:
4
+
5
+ __from : AmrWord
6
+ __to: AmrWord
7
+ __with: str
8
+
9
+ def __init__(self, _from: AmrWord, _to: AmrWord, _with: str):
10
+ self.__from = _from
11
+ self.__to = _to
12
+ self.__with = _with
13
+
14
+ def getFrom(self) -> AmrWord:
15
+ return self.__from
16
+
17
+ def getTo(self) -> AmrWord:
18
+ return self.__to
19
+
20
+ def getWith(self) -> str:
21
+ return self.__with
@@ -0,0 +1,32 @@
1
+ import os
2
+ import re
3
+
4
+ from Corpus.Corpus import Corpus
5
+
6
+ from AmrSentence import AmrSentence
7
+
8
+
9
+ class AmrCorpus(Corpus):
10
+
11
+ def __init__(self,
12
+ folder: str,
13
+ pattern: str = None):
14
+ """
15
+ A constructor of AmrCorpus class which reads all AmrSentence files with the file
16
+ name satisfying the given pattern inside the given folder. For each file inside that folder, the constructor
17
+ creates an AmrSentence and puts in inside the list sentences.
18
+
19
+ PARAMETERS
20
+ ----------
21
+ folder : str
22
+ Folder where all sentences reside.
23
+ pattern : str
24
+ File pattern such as "." ".train" ".test".
25
+ """
26
+ self.sentences = []
27
+ for root, dirs, files in os.walk(folder):
28
+ for file in files:
29
+ file_name = os.path.join(root, file)
30
+ if (pattern is None or pattern in file_name) and re.match("\\d+\\.", file):
31
+ sentence = AmrSentence(root, file_name)
32
+ self.sentences.append(sentence)
@@ -0,0 +1,101 @@
1
+ import xml.etree.ElementTree
2
+
3
+ from Corpus.FileDescription import FileDescription
4
+ from Corpus.Sentence import Sentence
5
+
6
+ from AmrConnection import AmrConnection
7
+ from AmrWord import AmrWord
8
+ from Point import Point
9
+
10
+
11
+ class AmrSentence(Sentence):
12
+
13
+ __connections: list[AmrConnection]
14
+ __file_description: FileDescription
15
+
16
+ def constructor1(self, file_description: FileDescription):
17
+ self.__file_description = file_description
18
+ self.reload()
19
+
20
+ def constructor2(self, path: str, file_name: str):
21
+ self.__file_description = FileDescription(path, file_name)
22
+ self.reload()
23
+
24
+ def __init__(self, param1: str | FileDescription, param2: str | None = None):
25
+ self.words = []
26
+ self.__connections = []
27
+ if param2 is not None and isinstance(param1, str):
28
+ self.constructor2(param1, param2)
29
+ elif isinstance(param1, FileDescription):
30
+ self.constructor1(param1)
31
+
32
+ def reload(self):
33
+ file_name = self.__file_description.getFileName()
34
+ root = xml.etree.ElementTree.parse(file_name).getroot()
35
+ self.__load_from_xml(root)
36
+
37
+ def getRawFileName(self) -> str:
38
+ return self.__file_description.getRawFileName()
39
+
40
+ def getFileName(self) -> str:
41
+ return self.__file_description.getFileName()
42
+
43
+ def getFileDescription(self) -> FileDescription:
44
+ return self.__file_description
45
+
46
+ def __getWord(self, name: str) -> AmrWord | None:
47
+ for word in self.words:
48
+ if word.getName() == name:
49
+ return word
50
+ return None
51
+
52
+ def getConnection(self, index: int) -> AmrConnection:
53
+ return self.__connections[index]
54
+
55
+ def connection_count(self) -> int:
56
+ return len(self.__connections)
57
+
58
+ def add_connection(self, _from: AmrWord, _to: AmrWord, _with: str):
59
+ self.__connections.append(AmrConnection(_from, _to, _with))
60
+
61
+ def __load_from_xml(self, root):
62
+ for child_node in root:
63
+ if child_node.tag == "Word":
64
+ new_word = AmrWord(child_node.attrib["name"], Point(child_node.attrib["positionX"], child_node.attrib["positionY"]))
65
+ self.addWord(new_word)
66
+ elif child_node.tag == "Connection":
67
+ _from = self.__getWord(child_node.attrib["from"])
68
+ _to = self.__getWord(child_node.attrib["to"])
69
+ if child_node.attrib["with"] is not None:
70
+ _with = child_node.attrib["with"]
71
+ else:
72
+ _with = ""
73
+ if _from is not None and _to is not None:
74
+ self.add_connection(_from, _to, _with)
75
+
76
+ def __get_root(self) -> AmrWord | None:
77
+ done = set()
78
+ for word in self.words:
79
+ done.add(word.getName())
80
+ for connection in self.__connections:
81
+ if connection.getTo().getName() in done:
82
+ done.remove(connection.getTo().getName())
83
+ return self.__getWord(done.pop())
84
+
85
+ def __word_to_string(self, word: AmrWord | None, tab_count: int) -> str:
86
+ result = "(" + word.getName()
87
+ for connection in self.__connections:
88
+ if connection.getFrom().getName() == word.getName():
89
+ child = "\n"
90
+ for i in range(tab_count + 1):
91
+ child += "\t"
92
+ child += ":" + connection.getWith()
93
+ child += " " + self.__word_to_string(connection.getTo(), tab_count + 1)
94
+ result += child
95
+ return result + ")"
96
+
97
+ def __repr__(self):
98
+ return self.getFileName() + "\n" + self.__word_to_string(self.__get_root(), 0)
99
+
100
+ def __str__(self):
101
+ return self.__repr__()
@@ -0,0 +1,21 @@
1
+ from __future__ import annotations
2
+ from Dictionary.Word import Word
3
+ from Point import Point
4
+
5
+ class AmrWord(Word):
6
+
7
+ position: Point
8
+
9
+ def __init__(self, name: str, position:Point):
10
+ super().__init__(name)
11
+ self.position = position
12
+
13
+ def getPosition(self) -> Point:
14
+ return self.position
15
+
16
+ def move(self, deltaX: int, deltaY: int) -> None:
17
+ self.position.setX(self.getPosition().getX() + deltaX)
18
+ self.position.setY(self.getPosition().getY() + deltaY)
19
+
20
+ def clone(self) -> AmrWord:
21
+ return AmrWord(self.getName(), Point(self.getPosition().getX(), self.getPosition().getY()))
@@ -0,0 +1,20 @@
1
+ class Point:
2
+
3
+ __x: int
4
+ __y: int
5
+
6
+ def __init__(self, x: int,y: int):
7
+ self.__x = x
8
+ self.__y = y
9
+
10
+ def getX(self) -> int:
11
+ return self.__x
12
+
13
+ def getY(self) -> int:
14
+ return self.__y
15
+
16
+ def setX(self, x):
17
+ self.__x = x
18
+
19
+ def setY(self, y):
20
+ self.__y = y
@@ -0,0 +1,127 @@
1
+ Metadata-Version: 2.1
2
+ Name: nlptoolkit_amr
3
+ Version: 1.0.0
4
+ Summary: Simple Amr Processing
5
+ Home-page: https://github.com/StarlangSoftware/Amr-Py
6
+ Author: olcaytaner
7
+ Author-email: olcay.yildiz@ozyegin.edu.tr
8
+ License: UNKNOWN
9
+ Description: For Contibutors
10
+ ============
11
+
12
+ ### Setup.py file
13
+ 1. Do not forget to set package list. All subfolders should be added to the package list.
14
+ ```
15
+ packages=['Classification', 'Classification.Model', 'Classification.Model.DecisionTree',
16
+ 'Classification.Model.Ensemble', 'Classification.Model.NeuralNetwork',
17
+ 'Classification.Model.NonParametric', 'Classification.Model.Parametric',
18
+ 'Classification.Filter', 'Classification.DataSet', 'Classification.Instance', 'Classification.Attribute',
19
+ 'Classification.Parameter', 'Classification.Experiment',
20
+ 'Classification.Performance', 'Classification.InstanceList', 'Classification.DistanceMetric',
21
+ 'Classification.StatisticalTest', 'Classification.FeatureSelection'],
22
+ ```
23
+ 2. Package name should be lowercase and only may include _ character.
24
+ ```
25
+ name='nlptoolkit_math',
26
+ ```
27
+
28
+ ### Python files
29
+ 1. Do not forget to comment each function.
30
+ ```
31
+ def __broadcast_shape(self, shape1: Tuple[int, ...], shape2: Tuple[int, ...]) -> Tuple[int, ...]:
32
+ """
33
+ Determines the broadcasted shape of two tensors.
34
+
35
+ :param shape1: Tuple representing the first tensor shape.
36
+ :param shape2: Tuple representing the second tensor shape.
37
+ :return: Tuple representing the broadcasted shape.
38
+ """
39
+ ```
40
+ 2. Function names should follow caml case.
41
+ ```
42
+ def addItem(self, item: str):
43
+ ```
44
+ 3. Local variables should follow snake case.
45
+ ```
46
+ det = 1.0
47
+ copy_of_matrix = copy.deepcopy(self)
48
+ ```
49
+ 4. Class variables should be declared in each file.
50
+ ```
51
+ class Eigenvector(Vector):
52
+ eigenvalue: float
53
+ ```
54
+ 5. Variable types should be defined for function parameters and class variables.
55
+ ```
56
+ def getIndex(self, item: str) -> int:
57
+ ```
58
+ 6. For abstract methods, use ABC package and declare them with @abstractmethod.
59
+ ```
60
+ @abstractmethod
61
+ def train(self, train_set: list[Tensor]):
62
+ pass
63
+ ```
64
+ 7. For private methods, use __ as prefix in their names.
65
+ ```
66
+ def __infer_shape(self, data: Union[List, List[List], List[List[List]]]) -> Tuple[int, ...]:
67
+ ```
68
+ 8. For private class variables, use __ as prefix in their names.
69
+ ```
70
+ class Matrix(object):
71
+ __row: int
72
+ __col: int
73
+ __values: list[list[float]]
74
+ ```
75
+ 9. Write \_\_repr\_\_ class methods as toString methods
76
+ 10. Write getter and setter class methods.
77
+ ```
78
+ def getOptimizer(self) -> Optimizer:
79
+ return self.optimizer
80
+ def setValue(self, value: Optional[Tensor]) -> None:
81
+ self._value = value
82
+ ```
83
+ 11. If there are multiple constructors for a class, define them as constructor1, constructor2, ..., then from the original constructor call these methods.
84
+ ```
85
+ def constructor1(self):
86
+ self.__values = []
87
+ self.__size = 0
88
+
89
+ def constructor2(self, values: list):
90
+ self.__values = values.copy()
91
+ self.__size = len(values)
92
+
93
+ def __init__(self,
94
+ valuesOrSize=None,
95
+ initial=None):
96
+ if valuesOrSize is None:
97
+ self.constructor1()
98
+ elif isinstance(valuesOrSize, list):
99
+ self.constructor2(valuesOrSize)
100
+ ```
101
+ 12. Extend test classes from unittest and use separate unit test methods.
102
+ ```
103
+ class TensorTest(unittest.TestCase):
104
+
105
+ def test_inferred_shape(self):
106
+ a = Tensor([[1.0, 2.0], [3.0, 4.0]])
107
+ self.assertEqual((2, 2), a.getShape())
108
+
109
+ def test_shape(self):
110
+ a = Tensor([1.0, 2.0, 3.0])
111
+ self.assertEqual((3, ), a.getShape())
112
+ ```
113
+ 13. Enumerated types should be used when necessary as enum classes.
114
+ ```
115
+ class AttributeType(Enum):
116
+ """
117
+ Continuous Attribute
118
+ """
119
+ CONTINUOUS = auto()
120
+ """
121
+ Discrete Attribute
122
+ """
123
+ DISCRETE = auto()
124
+ ```
125
+
126
+ Platform: UNKNOWN
127
+ Description-Content-Type: text/markdown
@@ -0,0 +1,116 @@
1
+ For Contibutors
2
+ ============
3
+
4
+ ### Setup.py file
5
+ 1. Do not forget to set package list. All subfolders should be added to the package list.
6
+ ```
7
+ packages=['Classification', 'Classification.Model', 'Classification.Model.DecisionTree',
8
+ 'Classification.Model.Ensemble', 'Classification.Model.NeuralNetwork',
9
+ 'Classification.Model.NonParametric', 'Classification.Model.Parametric',
10
+ 'Classification.Filter', 'Classification.DataSet', 'Classification.Instance', 'Classification.Attribute',
11
+ 'Classification.Parameter', 'Classification.Experiment',
12
+ 'Classification.Performance', 'Classification.InstanceList', 'Classification.DistanceMetric',
13
+ 'Classification.StatisticalTest', 'Classification.FeatureSelection'],
14
+ ```
15
+ 2. Package name should be lowercase and only may include _ character.
16
+ ```
17
+ name='nlptoolkit_math',
18
+ ```
19
+
20
+ ### Python files
21
+ 1. Do not forget to comment each function.
22
+ ```
23
+ def __broadcast_shape(self, shape1: Tuple[int, ...], shape2: Tuple[int, ...]) -> Tuple[int, ...]:
24
+ """
25
+ Determines the broadcasted shape of two tensors.
26
+
27
+ :param shape1: Tuple representing the first tensor shape.
28
+ :param shape2: Tuple representing the second tensor shape.
29
+ :return: Tuple representing the broadcasted shape.
30
+ """
31
+ ```
32
+ 2. Function names should follow caml case.
33
+ ```
34
+ def addItem(self, item: str):
35
+ ```
36
+ 3. Local variables should follow snake case.
37
+ ```
38
+ det = 1.0
39
+ copy_of_matrix = copy.deepcopy(self)
40
+ ```
41
+ 4. Class variables should be declared in each file.
42
+ ```
43
+ class Eigenvector(Vector):
44
+ eigenvalue: float
45
+ ```
46
+ 5. Variable types should be defined for function parameters and class variables.
47
+ ```
48
+ def getIndex(self, item: str) -> int:
49
+ ```
50
+ 6. For abstract methods, use ABC package and declare them with @abstractmethod.
51
+ ```
52
+ @abstractmethod
53
+ def train(self, train_set: list[Tensor]):
54
+ pass
55
+ ```
56
+ 7. For private methods, use __ as prefix in their names.
57
+ ```
58
+ def __infer_shape(self, data: Union[List, List[List], List[List[List]]]) -> Tuple[int, ...]:
59
+ ```
60
+ 8. For private class variables, use __ as prefix in their names.
61
+ ```
62
+ class Matrix(object):
63
+ __row: int
64
+ __col: int
65
+ __values: list[list[float]]
66
+ ```
67
+ 9. Write \_\_repr\_\_ class methods as toString methods
68
+ 10. Write getter and setter class methods.
69
+ ```
70
+ def getOptimizer(self) -> Optimizer:
71
+ return self.optimizer
72
+ def setValue(self, value: Optional[Tensor]) -> None:
73
+ self._value = value
74
+ ```
75
+ 11. If there are multiple constructors for a class, define them as constructor1, constructor2, ..., then from the original constructor call these methods.
76
+ ```
77
+ def constructor1(self):
78
+ self.__values = []
79
+ self.__size = 0
80
+
81
+ def constructor2(self, values: list):
82
+ self.__values = values.copy()
83
+ self.__size = len(values)
84
+
85
+ def __init__(self,
86
+ valuesOrSize=None,
87
+ initial=None):
88
+ if valuesOrSize is None:
89
+ self.constructor1()
90
+ elif isinstance(valuesOrSize, list):
91
+ self.constructor2(valuesOrSize)
92
+ ```
93
+ 12. Extend test classes from unittest and use separate unit test methods.
94
+ ```
95
+ class TensorTest(unittest.TestCase):
96
+
97
+ def test_inferred_shape(self):
98
+ a = Tensor([[1.0, 2.0], [3.0, 4.0]])
99
+ self.assertEqual((2, 2), a.getShape())
100
+
101
+ def test_shape(self):
102
+ a = Tensor([1.0, 2.0, 3.0])
103
+ self.assertEqual((3, ), a.getShape())
104
+ ```
105
+ 13. Enumerated types should be used when necessary as enum classes.
106
+ ```
107
+ class AttributeType(Enum):
108
+ """
109
+ Continuous Attribute
110
+ """
111
+ CONTINUOUS = auto()
112
+ """
113
+ Discrete Attribute
114
+ """
115
+ DISCRETE = auto()
116
+ ```
@@ -0,0 +1,127 @@
1
+ Metadata-Version: 2.1
2
+ Name: nlptoolkit-amr
3
+ Version: 1.0.0
4
+ Summary: Simple Amr Processing
5
+ Home-page: https://github.com/StarlangSoftware/Amr-Py
6
+ Author: olcaytaner
7
+ Author-email: olcay.yildiz@ozyegin.edu.tr
8
+ License: UNKNOWN
9
+ Description: For Contibutors
10
+ ============
11
+
12
+ ### Setup.py file
13
+ 1. Do not forget to set package list. All subfolders should be added to the package list.
14
+ ```
15
+ packages=['Classification', 'Classification.Model', 'Classification.Model.DecisionTree',
16
+ 'Classification.Model.Ensemble', 'Classification.Model.NeuralNetwork',
17
+ 'Classification.Model.NonParametric', 'Classification.Model.Parametric',
18
+ 'Classification.Filter', 'Classification.DataSet', 'Classification.Instance', 'Classification.Attribute',
19
+ 'Classification.Parameter', 'Classification.Experiment',
20
+ 'Classification.Performance', 'Classification.InstanceList', 'Classification.DistanceMetric',
21
+ 'Classification.StatisticalTest', 'Classification.FeatureSelection'],
22
+ ```
23
+ 2. Package name should be lowercase and only may include _ character.
24
+ ```
25
+ name='nlptoolkit_math',
26
+ ```
27
+
28
+ ### Python files
29
+ 1. Do not forget to comment each function.
30
+ ```
31
+ def __broadcast_shape(self, shape1: Tuple[int, ...], shape2: Tuple[int, ...]) -> Tuple[int, ...]:
32
+ """
33
+ Determines the broadcasted shape of two tensors.
34
+
35
+ :param shape1: Tuple representing the first tensor shape.
36
+ :param shape2: Tuple representing the second tensor shape.
37
+ :return: Tuple representing the broadcasted shape.
38
+ """
39
+ ```
40
+ 2. Function names should follow caml case.
41
+ ```
42
+ def addItem(self, item: str):
43
+ ```
44
+ 3. Local variables should follow snake case.
45
+ ```
46
+ det = 1.0
47
+ copy_of_matrix = copy.deepcopy(self)
48
+ ```
49
+ 4. Class variables should be declared in each file.
50
+ ```
51
+ class Eigenvector(Vector):
52
+ eigenvalue: float
53
+ ```
54
+ 5. Variable types should be defined for function parameters and class variables.
55
+ ```
56
+ def getIndex(self, item: str) -> int:
57
+ ```
58
+ 6. For abstract methods, use ABC package and declare them with @abstractmethod.
59
+ ```
60
+ @abstractmethod
61
+ def train(self, train_set: list[Tensor]):
62
+ pass
63
+ ```
64
+ 7. For private methods, use __ as prefix in their names.
65
+ ```
66
+ def __infer_shape(self, data: Union[List, List[List], List[List[List]]]) -> Tuple[int, ...]:
67
+ ```
68
+ 8. For private class variables, use __ as prefix in their names.
69
+ ```
70
+ class Matrix(object):
71
+ __row: int
72
+ __col: int
73
+ __values: list[list[float]]
74
+ ```
75
+ 9. Write \_\_repr\_\_ class methods as toString methods
76
+ 10. Write getter and setter class methods.
77
+ ```
78
+ def getOptimizer(self) -> Optimizer:
79
+ return self.optimizer
80
+ def setValue(self, value: Optional[Tensor]) -> None:
81
+ self._value = value
82
+ ```
83
+ 11. If there are multiple constructors for a class, define them as constructor1, constructor2, ..., then from the original constructor call these methods.
84
+ ```
85
+ def constructor1(self):
86
+ self.__values = []
87
+ self.__size = 0
88
+
89
+ def constructor2(self, values: list):
90
+ self.__values = values.copy()
91
+ self.__size = len(values)
92
+
93
+ def __init__(self,
94
+ valuesOrSize=None,
95
+ initial=None):
96
+ if valuesOrSize is None:
97
+ self.constructor1()
98
+ elif isinstance(valuesOrSize, list):
99
+ self.constructor2(valuesOrSize)
100
+ ```
101
+ 12. Extend test classes from unittest and use separate unit test methods.
102
+ ```
103
+ class TensorTest(unittest.TestCase):
104
+
105
+ def test_inferred_shape(self):
106
+ a = Tensor([[1.0, 2.0], [3.0, 4.0]])
107
+ self.assertEqual((2, 2), a.getShape())
108
+
109
+ def test_shape(self):
110
+ a = Tensor([1.0, 2.0, 3.0])
111
+ self.assertEqual((3, ), a.getShape())
112
+ ```
113
+ 13. Enumerated types should be used when necessary as enum classes.
114
+ ```
115
+ class AttributeType(Enum):
116
+ """
117
+ Continuous Attribute
118
+ """
119
+ CONTINUOUS = auto()
120
+ """
121
+ Discrete Attribute
122
+ """
123
+ DISCRETE = auto()
124
+ ```
125
+
126
+ Platform: UNKNOWN
127
+ Description-Content-Type: text/markdown
@@ -0,0 +1,14 @@
1
+ README.md
2
+ setup.py
3
+ Construction/AmrConstructionAlgorithm.py
4
+ Construction/RuleBasedConstructionAlgorithm.py
5
+ Corpus/AmrConnection.py
6
+ Corpus/AmrCorpus.py
7
+ Corpus/AmrSentence.py
8
+ Corpus/AmrWord.py
9
+ Corpus/Point.py
10
+ nlptoolkit_amr.egg-info/PKG-INFO
11
+ nlptoolkit_amr.egg-info/SOURCES.txt
12
+ nlptoolkit_amr.egg-info/dependency_links.txt
13
+ nlptoolkit_amr.egg-info/requires.txt
14
+ nlptoolkit_amr.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ NlpToolkit-AnnotatedSentence
@@ -0,0 +1,2 @@
1
+ Construction
2
+ Corpus
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,19 @@
1
+ from setuptools import setup
2
+
3
+ from pathlib import Path
4
+ this_directory = Path(__file__).parent
5
+ long_description = (this_directory / "README.md").read_text(encoding="utf-8")
6
+
7
+ setup(
8
+ name='nlptoolkit_amr',
9
+ version='1.0.0',
10
+ packages=['Corpus', 'Construction'],
11
+ url='https://github.com/StarlangSoftware/Amr-Py',
12
+ license='',
13
+ author='olcaytaner',
14
+ author_email='olcay.yildiz@ozyegin.edu.tr',
15
+ description='Simple Amr Processing',
16
+ install_requires=['NlpToolkit-AnnotatedSentence'],
17
+ long_description=long_description,
18
+ long_description_content_type='text/markdown'
19
+ )