nlptoolkit-amr 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nlptoolkit_amr-1.0.0/Construction/AmrConstructionAlgorithm.py +61 -0
- nlptoolkit_amr-1.0.0/Construction/RuleBasedConstructionAlgorithm.py +355 -0
- nlptoolkit_amr-1.0.0/Corpus/AmrConnection.py +21 -0
- nlptoolkit_amr-1.0.0/Corpus/AmrCorpus.py +32 -0
- nlptoolkit_amr-1.0.0/Corpus/AmrSentence.py +101 -0
- nlptoolkit_amr-1.0.0/Corpus/AmrWord.py +21 -0
- nlptoolkit_amr-1.0.0/Corpus/Point.py +20 -0
- nlptoolkit_amr-1.0.0/PKG-INFO +127 -0
- nlptoolkit_amr-1.0.0/README.md +116 -0
- nlptoolkit_amr-1.0.0/nlptoolkit_amr.egg-info/PKG-INFO +127 -0
- nlptoolkit_amr-1.0.0/nlptoolkit_amr.egg-info/SOURCES.txt +14 -0
- nlptoolkit_amr-1.0.0/nlptoolkit_amr.egg-info/dependency_links.txt +1 -0
- nlptoolkit_amr-1.0.0/nlptoolkit_amr.egg-info/requires.txt +1 -0
- nlptoolkit_amr-1.0.0/nlptoolkit_amr.egg-info/top_level.txt +2 -0
- nlptoolkit_amr-1.0.0/setup.cfg +4 -0
- nlptoolkit_amr-1.0.0/setup.py +19 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from AnnotatedSentence.AnnotatedSentence import AnnotatedSentence
|
|
6
|
+
|
|
7
|
+
class AmrConstructionAlgorithm(object):
|
|
8
|
+
|
|
9
|
+
@abstractmethod
|
|
10
|
+
def constructExcelAmr(self, sentence: AnnotatedSentence) -> list[str]:
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
def toString(self, sentence: AnnotatedSentence):
|
|
14
|
+
result = ""
|
|
15
|
+
for item in self.constructExcelAmr(sentence):
|
|
16
|
+
result += item + "\n"
|
|
17
|
+
return result
|
|
18
|
+
|
|
19
|
+
def saveAmr(self, items: List[str], fileName: str):
|
|
20
|
+
start_x = 750
|
|
21
|
+
start_y = 100
|
|
22
|
+
last_parent = [""] * 50
|
|
23
|
+
last_x = [0] * 50
|
|
24
|
+
child_count = [0] * 50
|
|
25
|
+
words = set()
|
|
26
|
+
out_file = open(fileName, "w", encoding="utf8")
|
|
27
|
+
out_file.write("<Amr>\n")
|
|
28
|
+
line = items[0]
|
|
29
|
+
out_file.write("<Word name=\"" + line + "\" positionX=\"" + str(start_x) + "\" positionY=\"" + str(start_y) + "\"/>\n")
|
|
30
|
+
words.add(line)
|
|
31
|
+
last_parent[0] = line
|
|
32
|
+
child_count[0] = 0
|
|
33
|
+
last_x[0] = start_x
|
|
34
|
+
for j in range (1, len(items)):
|
|
35
|
+
line = items[j]
|
|
36
|
+
tab_count = 0
|
|
37
|
+
i = 0
|
|
38
|
+
while line[i] == '\t':
|
|
39
|
+
tab_count = tab_count + 1
|
|
40
|
+
i = i + 1
|
|
41
|
+
line = line[tab_count:]
|
|
42
|
+
if ":" in line:
|
|
43
|
+
last_parent[tab_count] = line[0: line.index(":")]
|
|
44
|
+
else:
|
|
45
|
+
last_parent[tab_count] = line
|
|
46
|
+
child_count[tab_count] = 0
|
|
47
|
+
last_x[tab_count] = last_x[tab_count - 1] + (child_count[tab_count - 1] - 1) * 100
|
|
48
|
+
if ":" in line:
|
|
49
|
+
candidate = line[0: line.rindex(":")]
|
|
50
|
+
if not candidate in words:
|
|
51
|
+
out_file.write("<Word name=\"" + candidate + "\" positionX=\"" + str(last_x[tab_count]) + "\" positionY=\"" + str(start_y + 100 * tab_count) + "\"/>\n")
|
|
52
|
+
words.add(candidate)
|
|
53
|
+
out_file.write("<Connection from=\"" + last_parent[tab_count - 1] + "\" to=\"" + candidate + "\" with=\"" + line[0: line.rindex(":") + 1] + "\"/>\n")
|
|
54
|
+
else:
|
|
55
|
+
if not line in words:
|
|
56
|
+
out_file.write("<Word name=\"" + line + "\" positionX=\"" + str(last_x[tab_count]) + "\" positionY=\"" + str(start_y + 100 * tab_count) + "\"/>")
|
|
57
|
+
words.add(line)
|
|
58
|
+
out_file.write("<Connection from=\"" + last_parent[tab_count - 1] + "\" to=\"" + line + "\"/>\n")
|
|
59
|
+
child_count[tab_count - 1] = child_count[tab_count - 1] + 1
|
|
60
|
+
out_file.write("</Amr>\n")
|
|
61
|
+
out_file.close()
|
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from AnnotatedSentence.AnnotatedSentence import AnnotatedSentence
|
|
4
|
+
from AnnotatedSentence.AnnotatedWord import AnnotatedWord
|
|
5
|
+
from MorphologicalAnalysis import MorphologicalTag
|
|
6
|
+
from WordNet import SemanticRelation
|
|
7
|
+
from WordNet.SynSet import SynSet
|
|
8
|
+
from WordNet.WordNet import WordNet
|
|
9
|
+
from WordNet.SemanticRelation import SemanticRelation
|
|
10
|
+
|
|
11
|
+
from AmrConstructionAlgorithm import AmrConstructionAlgorithm
|
|
12
|
+
from MorphologicalAnalysis.MorphologicalTag import MorphologicalTag
|
|
13
|
+
|
|
14
|
+
class RuleBasedConstructionAlgorithm(AmrConstructionAlgorithm):
|
|
15
|
+
|
|
16
|
+
__word_net: WordNet
|
|
17
|
+
__sentence: AnnotatedSentence
|
|
18
|
+
|
|
19
|
+
def __init__(self, word_net: WordNet) -> None:
|
|
20
|
+
self.__word_net = word_net
|
|
21
|
+
|
|
22
|
+
def __with_tabs(self, tabCount: int, string: str) -> str:
|
|
23
|
+
result = ""
|
|
24
|
+
for i in range(tabCount):
|
|
25
|
+
result += "\t"
|
|
26
|
+
return result + string
|
|
27
|
+
|
|
28
|
+
def __only_word(self, word: AnnotatedWord, i: int) -> str:
|
|
29
|
+
return str(i + 1) + "/" + word.getParse().getWord().getName()
|
|
30
|
+
|
|
31
|
+
def __contains_arg0(self, semantic: str) -> bool:
|
|
32
|
+
for i in range(self.__sentence.wordCount()):
|
|
33
|
+
word = self.__sentence.getWord(i)
|
|
34
|
+
if isinstance(word, AnnotatedWord) and word.getArgumentList() and word.getArgumentList().containsArgument("ARG0", semantic):
|
|
35
|
+
return True
|
|
36
|
+
return False
|
|
37
|
+
|
|
38
|
+
def __extra_args(self, output: List[str], word: AnnotatedWord, tabCount: int):
|
|
39
|
+
if word.getParse().getRootPos() == "VERB" and word.getParse().containsTag(MorphologicalTag.A1SG) and "ben " not in self.__sentence.toStems():
|
|
40
|
+
output.append(self.__with_tabs(tabCount + 1, "ben:ARG0"))
|
|
41
|
+
if word.getParse().getRootPos() == "VERB" and word.getParse().getPos() == "NOUN" and word.getParse().containsTag(MorphologicalTag.P1SG):
|
|
42
|
+
output.append(self.__with_tabs(tabCount + 1, "ben:ARG0"))
|
|
43
|
+
if word.getParse().getRootPos() == "VERB" and word.getParse().containsTag(MorphologicalTag.A1PL) and "biz " not in self.__sentence.toStems():
|
|
44
|
+
output.append(self.__with_tabs(tabCount + 1, "biz:ARG0"))
|
|
45
|
+
if word.getParse().getRootPos() == "VERB" and word.getParse().getPos() == "NOUN" and word.getParse().containsTag(MorphologicalTag.P1PL):
|
|
46
|
+
output.append(self.__with_tabs(tabCount + 1, "biz:ARG0"))
|
|
47
|
+
if word.getParse().getRootPos() == "VERB" and word.getParse().containsTag(MorphologicalTag.A2SG) and "sen " not in self.__sentence.toStems():
|
|
48
|
+
output.append(self.__with_tabs(tabCount + 1, "sen:ARG0"))
|
|
49
|
+
if word.getParse().getRootPos() == "VERB" and word.getParse().getPos() == "NOUN" and word.getParse().containsTag(MorphologicalTag.P2SG):
|
|
50
|
+
output.append(self.__with_tabs(tabCount + 1, "sen:ARG0"))
|
|
51
|
+
if word.getParse().getRootPos() == "VERB" and word.getParse().containsTag(MorphologicalTag.A2PL) and "siz " not in self.__sentence.toStems():
|
|
52
|
+
output.append(self.__with_tabs(tabCount + 1, "siz:ARG0"))
|
|
53
|
+
if word.getParse().getRootPos() == "VERB" and word.getParse().getPos() == "NOUN" and word.getParse().containsTag(MorphologicalTag.P2PL):
|
|
54
|
+
output.append(self.__with_tabs(tabCount + 1, "siz:ARG0"))
|
|
55
|
+
if word.getParse().getRootPos() == "VERB" and word.getParse().containsTag(MorphologicalTag.A3SG) and "o " not in self.__sentence.toStems():
|
|
56
|
+
if not self.__contains_arg0(word.getSemantic()):
|
|
57
|
+
if not (word.getParse().getPos() == "NOUN" and (word.getParse().containsTag(MorphologicalTag.P1SG) or word.getParse().containsTag(MorphologicalTag.P1PL) or word.getParse().containsTag(MorphologicalTag.P2SG) or word.getParse().containsTag(MorphologicalTag.P2PL))):
|
|
58
|
+
output.append(self.__with_tabs(tabCount + 1, "o:ARG0"))
|
|
59
|
+
if word.getParse().getRootPos() == "VERB" and word.getParse().containsTag(MorphologicalTag.A3PL) and "onlar " not in self.__sentence.toStems():
|
|
60
|
+
if not self.__contains_arg0(word.getSemantic()):
|
|
61
|
+
if not (word.getParse().getPos() == "NOUN" and (word.getParse().containsTag(MorphologicalTag.P1SG) or word.getParse().containsTag(MorphologicalTag.P1PL) or word.getParse().containsTag(MorphologicalTag.P2SG) or word.getParse().containsTag(MorphologicalTag.P2PL))):
|
|
62
|
+
output.append(self.__with_tabs(tabCount + 1, "onlar:ARG0"))
|
|
63
|
+
|
|
64
|
+
def __contains_mode(self, index: int) -> bool:
|
|
65
|
+
for i in range(self.__sentence.wordCount()):
|
|
66
|
+
word = self.__sentence.getWord(i)
|
|
67
|
+
if isinstance(word, AnnotatedWord) and word.getUniversalDependency() is not None and word.getUniversalDependency().to() == index + 1:
|
|
68
|
+
if word.getUniversalDependency().__str__() == "AMOD" or word.getUniversalDependency().__str__() == "NMOD":
|
|
69
|
+
return True
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
def __extra_possessive(self, output: List[str], word: AnnotatedWord, wordIndex: int, tabCount: int):
|
|
73
|
+
if word.getParse().containsTag(MorphologicalTag.P1SG):
|
|
74
|
+
if word.getParse().getRootPos() != "VERB" or word.getParse().getRootPos() != "NOUN":
|
|
75
|
+
output.append(self.__with_tabs(tabCount + 1, "ben:poss"))
|
|
76
|
+
if word.getParse().containsTag(MorphologicalTag.P1PL):
|
|
77
|
+
if word.getParse().getRootPos() != "VERB" or word.getParse().getRootPos() != "NOUN":
|
|
78
|
+
output.append(self.__with_tabs(tabCount + 1, "biz:poss"))
|
|
79
|
+
if word.getParse().containsTag(MorphologicalTag.P2SG):
|
|
80
|
+
if word.getParse().getRootPos() != "VERB" or word.getParse().getRootPos() != "NOUN":
|
|
81
|
+
output.append(self.__with_tabs(tabCount + 1, "sen:poss"))
|
|
82
|
+
if word.getParse().containsTag(MorphologicalTag.P2PL):
|
|
83
|
+
if word.getParse().getRootPos() != "VERB" or word.getParse().getRootPos() != "NOUN":
|
|
84
|
+
output.append(self.__with_tabs(tabCount + 1, "siz:poss"))
|
|
85
|
+
if word.getParse().containsTag(MorphologicalTag.P3SG):
|
|
86
|
+
if not self.__contains_mode(wordIndex):
|
|
87
|
+
output.append(self.__with_tabs(tabCount + 1, "o:poss"))
|
|
88
|
+
if word.getParse().containsTag(MorphologicalTag.P3PL):
|
|
89
|
+
if not self.__contains_mode(wordIndex):
|
|
90
|
+
output.append(self.__with_tabs(tabCount + 1, "onlar:poss"))
|
|
91
|
+
|
|
92
|
+
def __is_month(self, next: str) -> bool:
|
|
93
|
+
return next in ["ocak", "şubat", "mart", "nisan", "mayıs", "haziran", "temmuz", "ağustos",
|
|
94
|
+
"eylül", "ekim", "kasım", "aralık"]
|
|
95
|
+
|
|
96
|
+
def __is_weekday(self, next: str) -> bool:
|
|
97
|
+
return next in ["pazartesi", "salı", "çarşamba", "perşembe", "cuma", "cumartesi", "pazar"]
|
|
98
|
+
|
|
99
|
+
def __is_ordinal(self, next: str) -> int:
|
|
100
|
+
if next == "birinci":
|
|
101
|
+
return 1
|
|
102
|
+
elif next == "ikinci":
|
|
103
|
+
return 2
|
|
104
|
+
elif next == "üçüncü":
|
|
105
|
+
return 3
|
|
106
|
+
elif next == "dördüncü":
|
|
107
|
+
return 4
|
|
108
|
+
elif next == "beşinci":
|
|
109
|
+
return 5
|
|
110
|
+
elif next == "altıncı":
|
|
111
|
+
return 6
|
|
112
|
+
elif next == "yedinci":
|
|
113
|
+
return 7
|
|
114
|
+
elif next == "sekizinci":
|
|
115
|
+
return 8
|
|
116
|
+
elif next == "dokuzuncu":
|
|
117
|
+
return 9
|
|
118
|
+
return 0
|
|
119
|
+
|
|
120
|
+
def __add_argument_list(self, output: List[str], current: AnnotatedWord, semantic: str, currentText: str) -> bool:
|
|
121
|
+
if current.getArgumentList() is not None:
|
|
122
|
+
argument_list = current.getArgumentList()
|
|
123
|
+
if argument_list.containsArgument("ARG0", semantic):
|
|
124
|
+
output.append(currentText + ":ARG0")
|
|
125
|
+
return True
|
|
126
|
+
elif argument_list.containsArgument("ARG1", semantic):
|
|
127
|
+
output.append(currentText + ":ARG1")
|
|
128
|
+
return True
|
|
129
|
+
elif argument_list.containsArgument("ARG2", semantic):
|
|
130
|
+
output.append(currentText + ":ARG2")
|
|
131
|
+
return True
|
|
132
|
+
return False
|
|
133
|
+
|
|
134
|
+
def __add_details(self, tabCount: int, output: List[str], current: AnnotatedWord, wordIndex: int) -> bool:
|
|
135
|
+
if current.getParse().containsTag(MorphologicalTag.NEGATIVE):
|
|
136
|
+
output.append(self.__with_tabs(tabCount + 1, ":polarity"))
|
|
137
|
+
self.__extra_args(output, current, tabCount)
|
|
138
|
+
self.__extra_possessive(output, current, wordIndex, tabCount)
|
|
139
|
+
if current.getParse().containsTag(MorphologicalTag.IMPERATIVE):
|
|
140
|
+
output.append(self.__with_tabs(tabCount + 1, ":imperative:mode"))
|
|
141
|
+
|
|
142
|
+
def __get_preliminary_extra(self, current: AnnotatedWord, index: int, added: List[str]) -> int:
|
|
143
|
+
added.append("")
|
|
144
|
+
if current.getParse().containsTag(MorphologicalTag.CONDITIONAL):
|
|
145
|
+
added[0] = ":cond"
|
|
146
|
+
for i in range(self.__sentence.wordCount()):
|
|
147
|
+
word = self.__sentence.getWord(i)
|
|
148
|
+
if isinstance(word, AnnotatedWord) and word.getUniversalDependency() is not None and word.getUniversalDependency().to() == index + 1:
|
|
149
|
+
if word.getParse().getWord().getName() == "kadar":
|
|
150
|
+
added[0] = ":extent"
|
|
151
|
+
return i
|
|
152
|
+
elif word.getParse().getWord().getName() in ["rağmen", "karşın", "karşılık"]:
|
|
153
|
+
added[0] = ":concession"
|
|
154
|
+
return i
|
|
155
|
+
elif word.getParse().getWord().getName() in ["için", "sayesinde", "dolayı"]:
|
|
156
|
+
added[0] = ":cause"
|
|
157
|
+
return i
|
|
158
|
+
return -1
|
|
159
|
+
|
|
160
|
+
def __add_default_case(self, done: List[bool], tabCount: int, output: List[str], defaultString: str, extraAdded: str, added: str, addedIndex: int):
|
|
161
|
+
if extraAdded != "":
|
|
162
|
+
output.append(self.__with_tabs(tabCount, defaultString + extraAdded))
|
|
163
|
+
else:
|
|
164
|
+
output.append(self.__with_tabs(tabCount, defaultString + added))
|
|
165
|
+
if addedIndex != -1:
|
|
166
|
+
done[addedIndex] = True
|
|
167
|
+
|
|
168
|
+
def __print_amr_recursively(self, done: List[bool], index: int, tabCount: int, output: List[str], relation: str, semantic: str, wordNet: WordNet, extraAdded: str):
|
|
169
|
+
current_word_index = index
|
|
170
|
+
if done[index]:
|
|
171
|
+
return
|
|
172
|
+
done[index] = True
|
|
173
|
+
current = self.__sentence.getWord(index)
|
|
174
|
+
if isinstance(current, AnnotatedWord):
|
|
175
|
+
if relation == "DET" and current.getParse().getWord().getName() == "bir":
|
|
176
|
+
return
|
|
177
|
+
if current.getParse().getWord().getName() in ["ve", "veya", "hem", "ama"]:
|
|
178
|
+
return
|
|
179
|
+
if current.getParse().getWord().getName() == "değil":
|
|
180
|
+
output.append(self.__with_tabs(tabCount, "-:polarity"))
|
|
181
|
+
return
|
|
182
|
+
if current.isPunctuation():
|
|
183
|
+
return
|
|
184
|
+
added = [""]
|
|
185
|
+
added_index = self.__get_preliminary_extra(current, index, added)
|
|
186
|
+
if current.getParse().isCardinal() and index + 1 < self.__sentence.wordCount():
|
|
187
|
+
next_word = self.__sentence.getWord(index + 1)
|
|
188
|
+
if isinstance(next_word, AnnotatedWord):
|
|
189
|
+
next = next_word.getParse().getWord().getName()
|
|
190
|
+
if self.__is_month(next):
|
|
191
|
+
output.append(self.__with_tabs(tabCount, "date-entity:date"))
|
|
192
|
+
output.append(self.__with_tabs(tabCount + 1, self.__only_word(current, index) + ":day"))
|
|
193
|
+
output.append(self.__with_tabs(tabCount + 1, self.__only_word(next_word, index + 1) + ":month"))
|
|
194
|
+
else:
|
|
195
|
+
self.__add_default_case(done, tabCount, output, self.__only_word(current, index), extraAdded, added[0], added_index)
|
|
196
|
+
elif current.getParse().isProperNoun():
|
|
197
|
+
wiki_type = "person"
|
|
198
|
+
synsets = wordNet.getSynSetsWithLiteral(current.getParse().getWord().getName())
|
|
199
|
+
for synset in synsets:
|
|
200
|
+
if isinstance(synset, SynSet) and synset.containsRelation(SemanticRelation("TUR10-0820020", "INSTANCE_HYPERNYM")):
|
|
201
|
+
wiki_type = "city"
|
|
202
|
+
argument_added = self.__add_argument_list(output, current, semantic, self.__with_tabs(tabCount, wiki_type))
|
|
203
|
+
if not argument_added:
|
|
204
|
+
if not current.getParse().getRootPos() == "VERB" and current.getParse().containsTag(MorphologicalTag.INSTRUMENTAL):
|
|
205
|
+
output.append(self.__with_tabs(tabCount, wiki_type) + ":instrument")
|
|
206
|
+
elif not current.getParse().getRootPos() == "VERB" and current.getParse().containsTag(MorphologicalTag.LOCATIVE):
|
|
207
|
+
output.append(self.__with_tabs(tabCount, wiki_type) + ":location")
|
|
208
|
+
else:
|
|
209
|
+
self.__add_default_case(done, tabCount, output, self.__with_tabs(tabCount, wiki_type), extraAdded, added[0], added_index)
|
|
210
|
+
output.append(self.__with_tabs(tabCount + 1, "name:name"))
|
|
211
|
+
output.append(self.__with_tabs(tabCount + 2, self.__only_word(current, index) + ":op1"))
|
|
212
|
+
for i in range(1, 3):
|
|
213
|
+
if index + i < self.__sentence.wordCount():
|
|
214
|
+
check_word = self.__sentence.getWord(index + i)
|
|
215
|
+
if isinstance(check_word, AnnotatedWord):
|
|
216
|
+
if check_word.getSemantic() is not None and check_word.getSemantic() == current.getSemantic():
|
|
217
|
+
output.append(self.__with_tabs(tabCount + 2, self.__only_word(check_word, index + 1) + ":op1" + str(1 + i)))
|
|
218
|
+
done[index + i] = True
|
|
219
|
+
else:
|
|
220
|
+
break
|
|
221
|
+
else:
|
|
222
|
+
break
|
|
223
|
+
if wiki_type == "person":
|
|
224
|
+
output.append(self.__with_tabs(tabCount + 1, "-:wiki"))
|
|
225
|
+
else:
|
|
226
|
+
output.append(self.__with_tabs(tabCount + 1, current.getParse().getWord().getName() + "-:wiki"))
|
|
227
|
+
elif self.__is_month(current.getParse().getWord().getName()):
|
|
228
|
+
output.append(self.__with_tabs(tabCount, "date-entity:date"))
|
|
229
|
+
output.append(self.__with_tabs(tabCount + 1, self.__only_word(current, index) + ":month"))
|
|
230
|
+
elif self.__is_weekday(current.getParse().getWord().getName()):
|
|
231
|
+
output.append(self.__with_tabs(tabCount, "date-entity:date"))
|
|
232
|
+
output.append(self.__with_tabs(tabCount + 1, self.__only_word(current, index) + ":weekday"))
|
|
233
|
+
else:
|
|
234
|
+
current_word = self.__only_word(current, index)
|
|
235
|
+
for i in range(1, 3):
|
|
236
|
+
if index > i - 1 and index - i < self.__sentence.wordCount() and not done[index - i]:
|
|
237
|
+
check_word = self.__sentence.getWord(index - i)
|
|
238
|
+
if isinstance(check_word, AnnotatedWord):
|
|
239
|
+
if check_word.getSemantic() is not None and check_word.getSemantic() == current.getSemantic():
|
|
240
|
+
current_word = self.__only_word(check_word, index - i) + " " + current_word
|
|
241
|
+
done[index - i] = True
|
|
242
|
+
else:
|
|
243
|
+
break
|
|
244
|
+
else:
|
|
245
|
+
break
|
|
246
|
+
for i in range(1, 3):
|
|
247
|
+
if index + i < self.__sentence.wordCount():
|
|
248
|
+
check_word = self.__sentence.getWord(index + i)
|
|
249
|
+
if isinstance(check_word, AnnotatedWord):
|
|
250
|
+
if check_word.getSemantic() is not None and check_word.getSemantic() == current.getSemantic():
|
|
251
|
+
current_word += " " + self.__only_word(check_word, index + i)
|
|
252
|
+
done[index + i] = True
|
|
253
|
+
current = check_word
|
|
254
|
+
current_word_index = index + i
|
|
255
|
+
else:
|
|
256
|
+
break
|
|
257
|
+
else:
|
|
258
|
+
break
|
|
259
|
+
if current.getParse().getWord().getName() in ["çok", "gayet", "tam", "bayağı", "fazla", "hiç"]:
|
|
260
|
+
output.append(self.__with_tabs(tabCount, current_word) + ":degree")
|
|
261
|
+
elif current.getParse().getWord().getName() in ["hep", "sürekli"]:
|
|
262
|
+
output.append(self.__with_tabs(tabCount, current_word) + ":frequency")
|
|
263
|
+
else:
|
|
264
|
+
argument_added = self.__add_argument_list(output, current, semantic, self.__with_tabs(tabCount, current_word))
|
|
265
|
+
if argument_added:
|
|
266
|
+
self.__add_details(tabCount, output, current, current_word_index)
|
|
267
|
+
elif current.getParse().containsTag(MorphologicalTag.ORDINAL) or self.__is_ordinal(current.getParse().getWord().getName()) > 0:
|
|
268
|
+
output.append(self.__with_tabs(tabCount, "ordinal-entity:ord"))
|
|
269
|
+
value = self.__is_ordinal(current.getParse().getWord().getName())
|
|
270
|
+
if value > 0:
|
|
271
|
+
output.append(self.__with_tabs(tabCount + 1, str(value) + ":value"))
|
|
272
|
+
else:
|
|
273
|
+
output.append(self.__with_tabs(tabCount + 1, current.getParse().getWord().getName() + ":value"))
|
|
274
|
+
else:
|
|
275
|
+
if relation == "AMOD" or relation == "NMOD":
|
|
276
|
+
output.append(self.__with_tabs(tabCount, current_word) + ":mod")
|
|
277
|
+
elif relation == "NUMMOD":
|
|
278
|
+
output.append(self.__with_tabs(tabCount, current_word) + ":quant")
|
|
279
|
+
elif relation == "ADVMOD":
|
|
280
|
+
output.append(self.__with_tabs(tabCount, current_word) + ":manner")
|
|
281
|
+
else:
|
|
282
|
+
if current.getParse().getRootPos() != "VERB" and current.getParse().containsTag(
|
|
283
|
+
MorphologicalTag.INSTRUMENTAL):
|
|
284
|
+
output.append(self.__with_tabs(tabCount, current_word) + ":instrument")
|
|
285
|
+
elif current.getParse().getRootPos() != "VERB" and current.getParse().containsTag(
|
|
286
|
+
MorphologicalTag.LOCATIVE):
|
|
287
|
+
output.append(self.__with_tabs(tabCount, current_word) + ":location")
|
|
288
|
+
else:
|
|
289
|
+
self.__add_default_case(done, tabCount, output, self.__with_tabs(tabCount, current_word), extraAdded, added[0], added_index)
|
|
290
|
+
self.__add_details(tabCount, output, current, current_word_index)
|
|
291
|
+
i = 0
|
|
292
|
+
while i < self.__sentence.wordCount():
|
|
293
|
+
word = self.__sentence.getWord(i)
|
|
294
|
+
if isinstance(word, AnnotatedWord):
|
|
295
|
+
if word.getParse().isCardinal() and i + 1 < self.__sentence.wordCount():
|
|
296
|
+
next_word = self.__sentence.getWord(i + 1)
|
|
297
|
+
if isinstance(next_word, AnnotatedWord):
|
|
298
|
+
next = next_word.getParse().getWord().getName()
|
|
299
|
+
if self.__is_month(next):
|
|
300
|
+
if next_word.getUniversalDependency().to() == index + 1:
|
|
301
|
+
self.__meta_verb_tags(word, done, i, tabCount + 1, output, word.getUniversalDependency().__str__(), current.getSemantic(), wordNet)
|
|
302
|
+
i = i + 2
|
|
303
|
+
continue
|
|
304
|
+
j = i
|
|
305
|
+
while i < self.__sentence.wordCount() - 1 and self.__sentence.getWord(i + 1).getSemantic() is not None and self.__sentence.getWord(i + 1).getSemantic() == word.getSemantic():
|
|
306
|
+
i = i + 1
|
|
307
|
+
if word.getUniversalDependency() is not None and word.getUniversalDependency().to() == index + 1:
|
|
308
|
+
self.__meta_verb_tags(word, done, j, tabCount + 1, output, word.getUniversalDependency().__str__(), current.getSemantic(), wordNet)
|
|
309
|
+
elif j != i and self.__sentence.getWord(i).getUniversalDependency() is not None and self.__sentence.getWord(i).getUniversalDependency().to() == index + 1:
|
|
310
|
+
self.__meta_verb_tags(word, done, j, tabCount + 1, output, word.getUniversalDependency().__str__(), current.getSemantic(), wordNet)
|
|
311
|
+
i = i + 1
|
|
312
|
+
|
|
313
|
+
def __meta_verb_tags(self, word: AnnotatedWord, done: List[bool], index: int, tabCount: int, output: List[str], relation: str, semantic: str, wordNet: WordNet):
|
|
314
|
+
parataxis_or_conj = False
|
|
315
|
+
for i in range(self.__sentence.wordCount()):
|
|
316
|
+
connected_word = self.__sentence.getWord(i)
|
|
317
|
+
if isinstance(connected_word, AnnotatedWord):
|
|
318
|
+
if connected_word.getUniversalDependency() is not None and connected_word.getUniversalDependency().to() == index + 1 and connected_word.getUniversalDependency().__str__() in ["PARATAXIS", "CONJ"]:
|
|
319
|
+
parataxis_or_conj = True
|
|
320
|
+
break
|
|
321
|
+
if parataxis_or_conj:
|
|
322
|
+
output.append(self.__with_tabs(tabCount, "and"))
|
|
323
|
+
count = 1
|
|
324
|
+
for i in range(self.__sentence.wordCount()):
|
|
325
|
+
connected_word = self.__sentence.getWord(i)
|
|
326
|
+
if isinstance(connected_word, AnnotatedWord):
|
|
327
|
+
if connected_word.getUniversalDependency() is not None and connected_word.getUniversalDependency().to() == index + 1 and connected_word.getUniversalDependency().__str__() in ["PARATAXIS", "CONJ"]:
|
|
328
|
+
self.__print_amr_recursively(done, i, tabCount + 1, output, relation, semantic, wordNet, ":op" + str(count))
|
|
329
|
+
count = count + 1
|
|
330
|
+
self.__print_amr_recursively(done, index, tabCount + 1, output, relation, semantic, wordNet, ":op" + str(count))
|
|
331
|
+
else:
|
|
332
|
+
if word.getParse().containsTag(MorphologicalTag.NECESSITY):
|
|
333
|
+
output.append(self.__with_tabs(tabCount, "öner"))
|
|
334
|
+
tabCount = tabCount + 1
|
|
335
|
+
if word.getParse().containsTag(MorphologicalTag.ABLE):
|
|
336
|
+
output.append(self.__with_tabs(tabCount, "mümkün"))
|
|
337
|
+
tabCount = tabCount + 1
|
|
338
|
+
if word.getParse().containsTag(MorphologicalTag.CAUSATIVE):
|
|
339
|
+
output.append(self.__with_tabs(tabCount, "yap"))
|
|
340
|
+
tabCount = tabCount + 1
|
|
341
|
+
self.__print_amr_recursively(done, index, tabCount, output, relation, semantic, wordNet,"")
|
|
342
|
+
|
|
343
|
+
def constructExcelAmr(self, sentence: AnnotatedSentence) -> list[str]:
|
|
344
|
+
output = []
|
|
345
|
+
self.__sentence = sentence
|
|
346
|
+
done = []
|
|
347
|
+
for i in range(self.__sentence.wordCount()):
|
|
348
|
+
done.append(False)
|
|
349
|
+
output.append(sentence.getFileName() + "\t" + sentence.toString())
|
|
350
|
+
for i in range(sentence.wordCount()):
|
|
351
|
+
word = sentence.getWord(i)
|
|
352
|
+
if isinstance(word, AnnotatedWord):
|
|
353
|
+
if word.getUniversalDependency() is not None and word.getUniversalDependency().__str__() == "ROOT":
|
|
354
|
+
self.__meta_verb_tags(word, done, i, 0, output, "ROOT", word.getSemantic(), self.__word_net)
|
|
355
|
+
return output
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from AmrWord import AmrWord
|
|
2
|
+
|
|
3
|
+
class AmrConnection:
|
|
4
|
+
|
|
5
|
+
__from : AmrWord
|
|
6
|
+
__to: AmrWord
|
|
7
|
+
__with: str
|
|
8
|
+
|
|
9
|
+
def __init__(self, _from: AmrWord, _to: AmrWord, _with: str):
|
|
10
|
+
self.__from = _from
|
|
11
|
+
self.__to = _to
|
|
12
|
+
self.__with = _with
|
|
13
|
+
|
|
14
|
+
def getFrom(self) -> AmrWord:
|
|
15
|
+
return self.__from
|
|
16
|
+
|
|
17
|
+
def getTo(self) -> AmrWord:
|
|
18
|
+
return self.__to
|
|
19
|
+
|
|
20
|
+
def getWith(self) -> str:
|
|
21
|
+
return self.__with
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from Corpus.Corpus import Corpus
|
|
5
|
+
|
|
6
|
+
from AmrSentence import AmrSentence
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AmrCorpus(Corpus):
|
|
10
|
+
|
|
11
|
+
def __init__(self,
|
|
12
|
+
folder: str,
|
|
13
|
+
pattern: str = None):
|
|
14
|
+
"""
|
|
15
|
+
A constructor of AmrCorpus class which reads all AmrSentence files with the file
|
|
16
|
+
name satisfying the given pattern inside the given folder. For each file inside that folder, the constructor
|
|
17
|
+
creates an AmrSentence and puts in inside the list sentences.
|
|
18
|
+
|
|
19
|
+
PARAMETERS
|
|
20
|
+
----------
|
|
21
|
+
folder : str
|
|
22
|
+
Folder where all sentences reside.
|
|
23
|
+
pattern : str
|
|
24
|
+
File pattern such as "." ".train" ".test".
|
|
25
|
+
"""
|
|
26
|
+
self.sentences = []
|
|
27
|
+
for root, dirs, files in os.walk(folder):
|
|
28
|
+
for file in files:
|
|
29
|
+
file_name = os.path.join(root, file)
|
|
30
|
+
if (pattern is None or pattern in file_name) and re.match("\\d+\\.", file):
|
|
31
|
+
sentence = AmrSentence(root, file_name)
|
|
32
|
+
self.sentences.append(sentence)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import xml.etree.ElementTree
|
|
2
|
+
|
|
3
|
+
from Corpus.FileDescription import FileDescription
|
|
4
|
+
from Corpus.Sentence import Sentence
|
|
5
|
+
|
|
6
|
+
from AmrConnection import AmrConnection
|
|
7
|
+
from AmrWord import AmrWord
|
|
8
|
+
from Point import Point
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AmrSentence(Sentence):
|
|
12
|
+
|
|
13
|
+
__connections: list[AmrConnection]
|
|
14
|
+
__file_description: FileDescription
|
|
15
|
+
|
|
16
|
+
def constructor1(self, file_description: FileDescription):
|
|
17
|
+
self.__file_description = file_description
|
|
18
|
+
self.reload()
|
|
19
|
+
|
|
20
|
+
def constructor2(self, path: str, file_name: str):
|
|
21
|
+
self.__file_description = FileDescription(path, file_name)
|
|
22
|
+
self.reload()
|
|
23
|
+
|
|
24
|
+
def __init__(self, param1: str | FileDescription, param2: str | None = None):
|
|
25
|
+
self.words = []
|
|
26
|
+
self.__connections = []
|
|
27
|
+
if param2 is not None and isinstance(param1, str):
|
|
28
|
+
self.constructor2(param1, param2)
|
|
29
|
+
elif isinstance(param1, FileDescription):
|
|
30
|
+
self.constructor1(param1)
|
|
31
|
+
|
|
32
|
+
def reload(self):
|
|
33
|
+
file_name = self.__file_description.getFileName()
|
|
34
|
+
root = xml.etree.ElementTree.parse(file_name).getroot()
|
|
35
|
+
self.__load_from_xml(root)
|
|
36
|
+
|
|
37
|
+
def getRawFileName(self) -> str:
|
|
38
|
+
return self.__file_description.getRawFileName()
|
|
39
|
+
|
|
40
|
+
def getFileName(self) -> str:
|
|
41
|
+
return self.__file_description.getFileName()
|
|
42
|
+
|
|
43
|
+
def getFileDescription(self) -> FileDescription:
|
|
44
|
+
return self.__file_description
|
|
45
|
+
|
|
46
|
+
def __getWord(self, name: str) -> AmrWord | None:
|
|
47
|
+
for word in self.words:
|
|
48
|
+
if word.getName() == name:
|
|
49
|
+
return word
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
def getConnection(self, index: int) -> AmrConnection:
|
|
53
|
+
return self.__connections[index]
|
|
54
|
+
|
|
55
|
+
def connection_count(self) -> int:
|
|
56
|
+
return len(self.__connections)
|
|
57
|
+
|
|
58
|
+
def add_connection(self, _from: AmrWord, _to: AmrWord, _with: str):
|
|
59
|
+
self.__connections.append(AmrConnection(_from, _to, _with))
|
|
60
|
+
|
|
61
|
+
def __load_from_xml(self, root):
|
|
62
|
+
for child_node in root:
|
|
63
|
+
if child_node.tag == "Word":
|
|
64
|
+
new_word = AmrWord(child_node.attrib["name"], Point(child_node.attrib["positionX"], child_node.attrib["positionY"]))
|
|
65
|
+
self.addWord(new_word)
|
|
66
|
+
elif child_node.tag == "Connection":
|
|
67
|
+
_from = self.__getWord(child_node.attrib["from"])
|
|
68
|
+
_to = self.__getWord(child_node.attrib["to"])
|
|
69
|
+
if child_node.attrib["with"] is not None:
|
|
70
|
+
_with = child_node.attrib["with"]
|
|
71
|
+
else:
|
|
72
|
+
_with = ""
|
|
73
|
+
if _from is not None and _to is not None:
|
|
74
|
+
self.add_connection(_from, _to, _with)
|
|
75
|
+
|
|
76
|
+
def __get_root(self) -> AmrWord | None:
|
|
77
|
+
done = set()
|
|
78
|
+
for word in self.words:
|
|
79
|
+
done.add(word.getName())
|
|
80
|
+
for connection in self.__connections:
|
|
81
|
+
if connection.getTo().getName() in done:
|
|
82
|
+
done.remove(connection.getTo().getName())
|
|
83
|
+
return self.__getWord(done.pop())
|
|
84
|
+
|
|
85
|
+
def __word_to_string(self, word: AmrWord | None, tab_count: int) -> str:
|
|
86
|
+
result = "(" + word.getName()
|
|
87
|
+
for connection in self.__connections:
|
|
88
|
+
if connection.getFrom().getName() == word.getName():
|
|
89
|
+
child = "\n"
|
|
90
|
+
for i in range(tab_count + 1):
|
|
91
|
+
child += "\t"
|
|
92
|
+
child += ":" + connection.getWith()
|
|
93
|
+
child += " " + self.__word_to_string(connection.getTo(), tab_count + 1)
|
|
94
|
+
result += child
|
|
95
|
+
return result + ")"
|
|
96
|
+
|
|
97
|
+
def __repr__(self):
|
|
98
|
+
return self.getFileName() + "\n" + self.__word_to_string(self.__get_root(), 0)
|
|
99
|
+
|
|
100
|
+
def __str__(self):
|
|
101
|
+
return self.__repr__()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from Dictionary.Word import Word
|
|
3
|
+
from Point import Point
|
|
4
|
+
|
|
5
|
+
class AmrWord(Word):
|
|
6
|
+
|
|
7
|
+
position: Point
|
|
8
|
+
|
|
9
|
+
def __init__(self, name: str, position:Point):
|
|
10
|
+
super().__init__(name)
|
|
11
|
+
self.position = position
|
|
12
|
+
|
|
13
|
+
def getPosition(self) -> Point:
|
|
14
|
+
return self.position
|
|
15
|
+
|
|
16
|
+
def move(self, deltaX: int, deltaY: int) -> None:
|
|
17
|
+
self.position.setX(self.getPosition().getX() + deltaX)
|
|
18
|
+
self.position.setY(self.getPosition().getY() + deltaY)
|
|
19
|
+
|
|
20
|
+
def clone(self) -> AmrWord:
|
|
21
|
+
return AmrWord(self.getName(), Point(self.getPosition().getX(), self.getPosition().getY()))
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
class Point:
|
|
2
|
+
|
|
3
|
+
__x: int
|
|
4
|
+
__y: int
|
|
5
|
+
|
|
6
|
+
def __init__(self, x: int,y: int):
|
|
7
|
+
self.__x = x
|
|
8
|
+
self.__y = y
|
|
9
|
+
|
|
10
|
+
def getX(self) -> int:
|
|
11
|
+
return self.__x
|
|
12
|
+
|
|
13
|
+
def getY(self) -> int:
|
|
14
|
+
return self.__y
|
|
15
|
+
|
|
16
|
+
def setX(self, x):
|
|
17
|
+
self.__x = x
|
|
18
|
+
|
|
19
|
+
def setY(self, y):
|
|
20
|
+
self.__y = y
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: nlptoolkit_amr
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Simple Amr Processing
|
|
5
|
+
Home-page: https://github.com/StarlangSoftware/Amr-Py
|
|
6
|
+
Author: olcaytaner
|
|
7
|
+
Author-email: olcay.yildiz@ozyegin.edu.tr
|
|
8
|
+
License: UNKNOWN
|
|
9
|
+
Description: For Contibutors
|
|
10
|
+
============
|
|
11
|
+
|
|
12
|
+
### Setup.py file
|
|
13
|
+
1. Do not forget to set package list. All subfolders should be added to the package list.
|
|
14
|
+
```
|
|
15
|
+
packages=['Classification', 'Classification.Model', 'Classification.Model.DecisionTree',
|
|
16
|
+
'Classification.Model.Ensemble', 'Classification.Model.NeuralNetwork',
|
|
17
|
+
'Classification.Model.NonParametric', 'Classification.Model.Parametric',
|
|
18
|
+
'Classification.Filter', 'Classification.DataSet', 'Classification.Instance', 'Classification.Attribute',
|
|
19
|
+
'Classification.Parameter', 'Classification.Experiment',
|
|
20
|
+
'Classification.Performance', 'Classification.InstanceList', 'Classification.DistanceMetric',
|
|
21
|
+
'Classification.StatisticalTest', 'Classification.FeatureSelection'],
|
|
22
|
+
```
|
|
23
|
+
2. Package name should be lowercase and only may include _ character.
|
|
24
|
+
```
|
|
25
|
+
name='nlptoolkit_math',
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Python files
|
|
29
|
+
1. Do not forget to comment each function.
|
|
30
|
+
```
|
|
31
|
+
def __broadcast_shape(self, shape1: Tuple[int, ...], shape2: Tuple[int, ...]) -> Tuple[int, ...]:
|
|
32
|
+
"""
|
|
33
|
+
Determines the broadcasted shape of two tensors.
|
|
34
|
+
|
|
35
|
+
:param shape1: Tuple representing the first tensor shape.
|
|
36
|
+
:param shape2: Tuple representing the second tensor shape.
|
|
37
|
+
:return: Tuple representing the broadcasted shape.
|
|
38
|
+
"""
|
|
39
|
+
```
|
|
40
|
+
2. Function names should follow caml case.
|
|
41
|
+
```
|
|
42
|
+
def addItem(self, item: str):
|
|
43
|
+
```
|
|
44
|
+
3. Local variables should follow snake case.
|
|
45
|
+
```
|
|
46
|
+
det = 1.0
|
|
47
|
+
copy_of_matrix = copy.deepcopy(self)
|
|
48
|
+
```
|
|
49
|
+
4. Class variables should be declared in each file.
|
|
50
|
+
```
|
|
51
|
+
class Eigenvector(Vector):
|
|
52
|
+
eigenvalue: float
|
|
53
|
+
```
|
|
54
|
+
5. Variable types should be defined for function parameters and class variables.
|
|
55
|
+
```
|
|
56
|
+
def getIndex(self, item: str) -> int:
|
|
57
|
+
```
|
|
58
|
+
6. For abstract methods, use ABC package and declare them with @abstractmethod.
|
|
59
|
+
```
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def train(self, train_set: list[Tensor]):
|
|
62
|
+
pass
|
|
63
|
+
```
|
|
64
|
+
7. For private methods, use __ as prefix in their names.
|
|
65
|
+
```
|
|
66
|
+
def __infer_shape(self, data: Union[List, List[List], List[List[List]]]) -> Tuple[int, ...]:
|
|
67
|
+
```
|
|
68
|
+
8. For private class variables, use __ as prefix in their names.
|
|
69
|
+
```
|
|
70
|
+
class Matrix(object):
|
|
71
|
+
__row: int
|
|
72
|
+
__col: int
|
|
73
|
+
__values: list[list[float]]
|
|
74
|
+
```
|
|
75
|
+
9. Write \_\_repr\_\_ class methods as toString methods
|
|
76
|
+
10. Write getter and setter class methods.
|
|
77
|
+
```
|
|
78
|
+
def getOptimizer(self) -> Optimizer:
|
|
79
|
+
return self.optimizer
|
|
80
|
+
def setValue(self, value: Optional[Tensor]) -> None:
|
|
81
|
+
self._value = value
|
|
82
|
+
```
|
|
83
|
+
11. If there are multiple constructors for a class, define them as constructor1, constructor2, ..., then from the original constructor call these methods.
|
|
84
|
+
```
|
|
85
|
+
def constructor1(self):
|
|
86
|
+
self.__values = []
|
|
87
|
+
self.__size = 0
|
|
88
|
+
|
|
89
|
+
def constructor2(self, values: list):
|
|
90
|
+
self.__values = values.copy()
|
|
91
|
+
self.__size = len(values)
|
|
92
|
+
|
|
93
|
+
def __init__(self,
|
|
94
|
+
valuesOrSize=None,
|
|
95
|
+
initial=None):
|
|
96
|
+
if valuesOrSize is None:
|
|
97
|
+
self.constructor1()
|
|
98
|
+
elif isinstance(valuesOrSize, list):
|
|
99
|
+
self.constructor2(valuesOrSize)
|
|
100
|
+
```
|
|
101
|
+
12. Extend test classes from unittest and use separate unit test methods.
|
|
102
|
+
```
|
|
103
|
+
class TensorTest(unittest.TestCase):
|
|
104
|
+
|
|
105
|
+
def test_inferred_shape(self):
|
|
106
|
+
a = Tensor([[1.0, 2.0], [3.0, 4.0]])
|
|
107
|
+
self.assertEqual((2, 2), a.getShape())
|
|
108
|
+
|
|
109
|
+
def test_shape(self):
|
|
110
|
+
a = Tensor([1.0, 2.0, 3.0])
|
|
111
|
+
self.assertEqual((3, ), a.getShape())
|
|
112
|
+
```
|
|
113
|
+
13. Enumerated types should be used when necessary as enum classes.
|
|
114
|
+
```
|
|
115
|
+
class AttributeType(Enum):
|
|
116
|
+
"""
|
|
117
|
+
Continuous Attribute
|
|
118
|
+
"""
|
|
119
|
+
CONTINUOUS = auto()
|
|
120
|
+
"""
|
|
121
|
+
Discrete Attribute
|
|
122
|
+
"""
|
|
123
|
+
DISCRETE = auto()
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Platform: UNKNOWN
|
|
127
|
+
Description-Content-Type: text/markdown
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
For Contibutors
|
|
2
|
+
============
|
|
3
|
+
|
|
4
|
+
### Setup.py file
|
|
5
|
+
1. Do not forget to set package list. All subfolders should be added to the package list.
|
|
6
|
+
```
|
|
7
|
+
packages=['Classification', 'Classification.Model', 'Classification.Model.DecisionTree',
|
|
8
|
+
'Classification.Model.Ensemble', 'Classification.Model.NeuralNetwork',
|
|
9
|
+
'Classification.Model.NonParametric', 'Classification.Model.Parametric',
|
|
10
|
+
'Classification.Filter', 'Classification.DataSet', 'Classification.Instance', 'Classification.Attribute',
|
|
11
|
+
'Classification.Parameter', 'Classification.Experiment',
|
|
12
|
+
'Classification.Performance', 'Classification.InstanceList', 'Classification.DistanceMetric',
|
|
13
|
+
'Classification.StatisticalTest', 'Classification.FeatureSelection'],
|
|
14
|
+
```
|
|
15
|
+
2. Package name should be lowercase and only may include _ character.
|
|
16
|
+
```
|
|
17
|
+
name='nlptoolkit_math',
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
### Python files
|
|
21
|
+
1. Do not forget to comment each function.
|
|
22
|
+
```
|
|
23
|
+
def __broadcast_shape(self, shape1: Tuple[int, ...], shape2: Tuple[int, ...]) -> Tuple[int, ...]:
|
|
24
|
+
"""
|
|
25
|
+
Determines the broadcasted shape of two tensors.
|
|
26
|
+
|
|
27
|
+
:param shape1: Tuple representing the first tensor shape.
|
|
28
|
+
:param shape2: Tuple representing the second tensor shape.
|
|
29
|
+
:return: Tuple representing the broadcasted shape.
|
|
30
|
+
"""
|
|
31
|
+
```
|
|
32
|
+
2. Function names should follow caml case.
|
|
33
|
+
```
|
|
34
|
+
def addItem(self, item: str):
|
|
35
|
+
```
|
|
36
|
+
3. Local variables should follow snake case.
|
|
37
|
+
```
|
|
38
|
+
det = 1.0
|
|
39
|
+
copy_of_matrix = copy.deepcopy(self)
|
|
40
|
+
```
|
|
41
|
+
4. Class variables should be declared in each file.
|
|
42
|
+
```
|
|
43
|
+
class Eigenvector(Vector):
|
|
44
|
+
eigenvalue: float
|
|
45
|
+
```
|
|
46
|
+
5. Variable types should be defined for function parameters and class variables.
|
|
47
|
+
```
|
|
48
|
+
def getIndex(self, item: str) -> int:
|
|
49
|
+
```
|
|
50
|
+
6. For abstract methods, use ABC package and declare them with @abstractmethod.
|
|
51
|
+
```
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def train(self, train_set: list[Tensor]):
|
|
54
|
+
pass
|
|
55
|
+
```
|
|
56
|
+
7. For private methods, use __ as prefix in their names.
|
|
57
|
+
```
|
|
58
|
+
def __infer_shape(self, data: Union[List, List[List], List[List[List]]]) -> Tuple[int, ...]:
|
|
59
|
+
```
|
|
60
|
+
8. For private class variables, use __ as prefix in their names.
|
|
61
|
+
```
|
|
62
|
+
class Matrix(object):
|
|
63
|
+
__row: int
|
|
64
|
+
__col: int
|
|
65
|
+
__values: list[list[float]]
|
|
66
|
+
```
|
|
67
|
+
9. Write \_\_repr\_\_ class methods as toString methods
|
|
68
|
+
10. Write getter and setter class methods.
|
|
69
|
+
```
|
|
70
|
+
def getOptimizer(self) -> Optimizer:
|
|
71
|
+
return self.optimizer
|
|
72
|
+
def setValue(self, value: Optional[Tensor]) -> None:
|
|
73
|
+
self._value = value
|
|
74
|
+
```
|
|
75
|
+
11. If there are multiple constructors for a class, define them as constructor1, constructor2, ..., then from the original constructor call these methods.
|
|
76
|
+
```
|
|
77
|
+
def constructor1(self):
|
|
78
|
+
self.__values = []
|
|
79
|
+
self.__size = 0
|
|
80
|
+
|
|
81
|
+
def constructor2(self, values: list):
|
|
82
|
+
self.__values = values.copy()
|
|
83
|
+
self.__size = len(values)
|
|
84
|
+
|
|
85
|
+
def __init__(self,
|
|
86
|
+
valuesOrSize=None,
|
|
87
|
+
initial=None):
|
|
88
|
+
if valuesOrSize is None:
|
|
89
|
+
self.constructor1()
|
|
90
|
+
elif isinstance(valuesOrSize, list):
|
|
91
|
+
self.constructor2(valuesOrSize)
|
|
92
|
+
```
|
|
93
|
+
12. Extend test classes from unittest and use separate unit test methods.
|
|
94
|
+
```
|
|
95
|
+
class TensorTest(unittest.TestCase):
|
|
96
|
+
|
|
97
|
+
def test_inferred_shape(self):
|
|
98
|
+
a = Tensor([[1.0, 2.0], [3.0, 4.0]])
|
|
99
|
+
self.assertEqual((2, 2), a.getShape())
|
|
100
|
+
|
|
101
|
+
def test_shape(self):
|
|
102
|
+
a = Tensor([1.0, 2.0, 3.0])
|
|
103
|
+
self.assertEqual((3, ), a.getShape())
|
|
104
|
+
```
|
|
105
|
+
13. Enumerated types should be used when necessary as enum classes.
|
|
106
|
+
```
|
|
107
|
+
class AttributeType(Enum):
|
|
108
|
+
"""
|
|
109
|
+
Continuous Attribute
|
|
110
|
+
"""
|
|
111
|
+
CONTINUOUS = auto()
|
|
112
|
+
"""
|
|
113
|
+
Discrete Attribute
|
|
114
|
+
"""
|
|
115
|
+
DISCRETE = auto()
|
|
116
|
+
```
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: nlptoolkit-amr
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Simple Amr Processing
|
|
5
|
+
Home-page: https://github.com/StarlangSoftware/Amr-Py
|
|
6
|
+
Author: olcaytaner
|
|
7
|
+
Author-email: olcay.yildiz@ozyegin.edu.tr
|
|
8
|
+
License: UNKNOWN
|
|
9
|
+
Description: For Contibutors
|
|
10
|
+
============
|
|
11
|
+
|
|
12
|
+
### Setup.py file
|
|
13
|
+
1. Do not forget to set package list. All subfolders should be added to the package list.
|
|
14
|
+
```
|
|
15
|
+
packages=['Classification', 'Classification.Model', 'Classification.Model.DecisionTree',
|
|
16
|
+
'Classification.Model.Ensemble', 'Classification.Model.NeuralNetwork',
|
|
17
|
+
'Classification.Model.NonParametric', 'Classification.Model.Parametric',
|
|
18
|
+
'Classification.Filter', 'Classification.DataSet', 'Classification.Instance', 'Classification.Attribute',
|
|
19
|
+
'Classification.Parameter', 'Classification.Experiment',
|
|
20
|
+
'Classification.Performance', 'Classification.InstanceList', 'Classification.DistanceMetric',
|
|
21
|
+
'Classification.StatisticalTest', 'Classification.FeatureSelection'],
|
|
22
|
+
```
|
|
23
|
+
2. Package name should be lowercase and only may include _ character.
|
|
24
|
+
```
|
|
25
|
+
name='nlptoolkit_math',
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Python files
|
|
29
|
+
1. Do not forget to comment each function.
|
|
30
|
+
```
|
|
31
|
+
def __broadcast_shape(self, shape1: Tuple[int, ...], shape2: Tuple[int, ...]) -> Tuple[int, ...]:
|
|
32
|
+
"""
|
|
33
|
+
Determines the broadcasted shape of two tensors.
|
|
34
|
+
|
|
35
|
+
:param shape1: Tuple representing the first tensor shape.
|
|
36
|
+
:param shape2: Tuple representing the second tensor shape.
|
|
37
|
+
:return: Tuple representing the broadcasted shape.
|
|
38
|
+
"""
|
|
39
|
+
```
|
|
40
|
+
2. Function names should follow caml case.
|
|
41
|
+
```
|
|
42
|
+
def addItem(self, item: str):
|
|
43
|
+
```
|
|
44
|
+
3. Local variables should follow snake case.
|
|
45
|
+
```
|
|
46
|
+
det = 1.0
|
|
47
|
+
copy_of_matrix = copy.deepcopy(self)
|
|
48
|
+
```
|
|
49
|
+
4. Class variables should be declared in each file.
|
|
50
|
+
```
|
|
51
|
+
class Eigenvector(Vector):
|
|
52
|
+
eigenvalue: float
|
|
53
|
+
```
|
|
54
|
+
5. Variable types should be defined for function parameters and class variables.
|
|
55
|
+
```
|
|
56
|
+
def getIndex(self, item: str) -> int:
|
|
57
|
+
```
|
|
58
|
+
6. For abstract methods, use ABC package and declare them with @abstractmethod.
|
|
59
|
+
```
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def train(self, train_set: list[Tensor]):
|
|
62
|
+
pass
|
|
63
|
+
```
|
|
64
|
+
7. For private methods, use __ as prefix in their names.
|
|
65
|
+
```
|
|
66
|
+
def __infer_shape(self, data: Union[List, List[List], List[List[List]]]) -> Tuple[int, ...]:
|
|
67
|
+
```
|
|
68
|
+
8. For private class variables, use __ as prefix in their names.
|
|
69
|
+
```
|
|
70
|
+
class Matrix(object):
|
|
71
|
+
__row: int
|
|
72
|
+
__col: int
|
|
73
|
+
__values: list[list[float]]
|
|
74
|
+
```
|
|
75
|
+
9. Write \_\_repr\_\_ class methods as toString methods
|
|
76
|
+
10. Write getter and setter class methods.
|
|
77
|
+
```
|
|
78
|
+
def getOptimizer(self) -> Optimizer:
|
|
79
|
+
return self.optimizer
|
|
80
|
+
def setValue(self, value: Optional[Tensor]) -> None:
|
|
81
|
+
self._value = value
|
|
82
|
+
```
|
|
83
|
+
11. If there are multiple constructors for a class, define them as constructor1, constructor2, ..., then from the original constructor call these methods.
|
|
84
|
+
```
|
|
85
|
+
def constructor1(self):
|
|
86
|
+
self.__values = []
|
|
87
|
+
self.__size = 0
|
|
88
|
+
|
|
89
|
+
def constructor2(self, values: list):
|
|
90
|
+
self.__values = values.copy()
|
|
91
|
+
self.__size = len(values)
|
|
92
|
+
|
|
93
|
+
def __init__(self,
|
|
94
|
+
valuesOrSize=None,
|
|
95
|
+
initial=None):
|
|
96
|
+
if valuesOrSize is None:
|
|
97
|
+
self.constructor1()
|
|
98
|
+
elif isinstance(valuesOrSize, list):
|
|
99
|
+
self.constructor2(valuesOrSize)
|
|
100
|
+
```
|
|
101
|
+
12. Extend test classes from unittest and use separate unit test methods.
|
|
102
|
+
```
|
|
103
|
+
class TensorTest(unittest.TestCase):
|
|
104
|
+
|
|
105
|
+
def test_inferred_shape(self):
|
|
106
|
+
a = Tensor([[1.0, 2.0], [3.0, 4.0]])
|
|
107
|
+
self.assertEqual((2, 2), a.getShape())
|
|
108
|
+
|
|
109
|
+
def test_shape(self):
|
|
110
|
+
a = Tensor([1.0, 2.0, 3.0])
|
|
111
|
+
self.assertEqual((3, ), a.getShape())
|
|
112
|
+
```
|
|
113
|
+
13. Enumerated types should be used when necessary as enum classes.
|
|
114
|
+
```
|
|
115
|
+
class AttributeType(Enum):
|
|
116
|
+
"""
|
|
117
|
+
Continuous Attribute
|
|
118
|
+
"""
|
|
119
|
+
CONTINUOUS = auto()
|
|
120
|
+
"""
|
|
121
|
+
Discrete Attribute
|
|
122
|
+
"""
|
|
123
|
+
DISCRETE = auto()
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Platform: UNKNOWN
|
|
127
|
+
Description-Content-Type: text/markdown
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
setup.py
|
|
3
|
+
Construction/AmrConstructionAlgorithm.py
|
|
4
|
+
Construction/RuleBasedConstructionAlgorithm.py
|
|
5
|
+
Corpus/AmrConnection.py
|
|
6
|
+
Corpus/AmrCorpus.py
|
|
7
|
+
Corpus/AmrSentence.py
|
|
8
|
+
Corpus/AmrWord.py
|
|
9
|
+
Corpus/Point.py
|
|
10
|
+
nlptoolkit_amr.egg-info/PKG-INFO
|
|
11
|
+
nlptoolkit_amr.egg-info/SOURCES.txt
|
|
12
|
+
nlptoolkit_amr.egg-info/dependency_links.txt
|
|
13
|
+
nlptoolkit_amr.egg-info/requires.txt
|
|
14
|
+
nlptoolkit_amr.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
NlpToolkit-AnnotatedSentence
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from setuptools import setup
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
this_directory = Path(__file__).parent
|
|
5
|
+
long_description = (this_directory / "README.md").read_text(encoding="utf-8")
|
|
6
|
+
|
|
7
|
+
setup(
|
|
8
|
+
name='nlptoolkit_amr',
|
|
9
|
+
version='1.0.0',
|
|
10
|
+
packages=['Corpus', 'Construction'],
|
|
11
|
+
url='https://github.com/StarlangSoftware/Amr-Py',
|
|
12
|
+
license='',
|
|
13
|
+
author='olcaytaner',
|
|
14
|
+
author_email='olcay.yildiz@ozyegin.edu.tr',
|
|
15
|
+
description='Simple Amr Processing',
|
|
16
|
+
install_requires=['NlpToolkit-AnnotatedSentence'],
|
|
17
|
+
long_description=long_description,
|
|
18
|
+
long_description_content_type='text/markdown'
|
|
19
|
+
)
|