disvortilo 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disvortilo/__init__.py +172 -0
- disvortilo/full_words.txt +113 -0
- disvortilo/prefixes.txt +10 -0
- disvortilo/roots.txt +502 -0
- disvortilo/suffixes.txt +45 -0
- disvortilo-0.1.0.dist-info/METADATA +45 -0
- disvortilo-0.1.0.dist-info/RECORD +10 -0
- disvortilo-0.1.0.dist-info/WHEEL +5 -0
- disvortilo-0.1.0.dist-info/licenses/LICENSE +21 -0
- disvortilo-0.1.0.dist-info/top_level.txt +1 -0
disvortilo/__init__.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import importlib.resources
|
|
2
|
+
import re
|
|
3
|
+
from collections.abc import Generator
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def load_word_list(resource_name: str) -> set[str]:
|
|
7
|
+
result = []
|
|
8
|
+
for line in importlib.resources.files(__package__).joinpath(resource_name).read_text("utf-8").splitlines():
|
|
9
|
+
# Remove comments
|
|
10
|
+
word, _, _ = line.partition("#")
|
|
11
|
+
word = word.strip()
|
|
12
|
+
|
|
13
|
+
if word: # Ignore empty lines
|
|
14
|
+
result.append(word)
|
|
15
|
+
|
|
16
|
+
return set(result)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def growing_string(string: str) -> Generator[str]:
|
|
20
|
+
before = ""
|
|
21
|
+
for char in string:
|
|
22
|
+
before += char
|
|
23
|
+
yield before
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
WORD_ENDS = {
|
|
27
|
+
"e", "en",
|
|
28
|
+
"a", "an", "ajn", "aj",
|
|
29
|
+
"o", "on", "ojn", "oj",
|
|
30
|
+
"as", "os", "is", "us", "u", "i"
|
|
31
|
+
}
|
|
32
|
+
CORRELATIVE_WORD_STARTS = {
|
|
33
|
+
"ki", "ti", "i", "ĉi", "neni"
|
|
34
|
+
}
|
|
35
|
+
CORRELATIVE_WORD_ENDS = {
|
|
36
|
+
"o", "on", "oj", "ojn",
|
|
37
|
+
"u", "un", "uj", "ujn",
|
|
38
|
+
"a",
|
|
39
|
+
"e", "en",
|
|
40
|
+
"am", "el", "es", "om", "al"
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class Disvortilo:
|
|
45
|
+
def __init__(self):
|
|
46
|
+
self.suffixes = load_word_list("suffixes.txt")
|
|
47
|
+
self.prefixes = load_word_list("prefixes.txt")
|
|
48
|
+
self.roots = load_word_list("roots.txt")
|
|
49
|
+
self.full_words = load_word_list("full_words.txt")
|
|
50
|
+
|
|
51
|
+
def _is_in(self, word: str, _suffix, _prefix, _root, _full_word):
|
|
52
|
+
if _root and word in self.roots:
|
|
53
|
+
return "root"
|
|
54
|
+
elif _suffix and word in self.suffixes:
|
|
55
|
+
return "suffix"
|
|
56
|
+
elif _prefix and word in self.prefixes:
|
|
57
|
+
return "prefix"
|
|
58
|
+
elif _full_word and word in self.full_words:
|
|
59
|
+
return "full_words"
|
|
60
|
+
|
|
61
|
+
return ""
|
|
62
|
+
|
|
63
|
+
def _parse_correlative(self, word: str) -> list[tuple[str, ...]]:
|
|
64
|
+
for part in growing_string(word):
|
|
65
|
+
if part in CORRELATIVE_WORD_STARTS:
|
|
66
|
+
prefix = part
|
|
67
|
+
remaining = word[len(part):]
|
|
68
|
+
break
|
|
69
|
+
else:
|
|
70
|
+
# word didn't match the word starts
|
|
71
|
+
return []
|
|
72
|
+
|
|
73
|
+
if remaining in CORRELATIVE_WORD_ENDS:
|
|
74
|
+
return [(prefix, remaining)]
|
|
75
|
+
|
|
76
|
+
return []
|
|
77
|
+
|
|
78
|
+
def _parse_number(self, word: str) -> list[tuple[str, ...]]:
|
|
79
|
+
valid = []
|
|
80
|
+
for part in growing_string(word):
|
|
81
|
+
if part.isdigit():
|
|
82
|
+
remaining = word[len(part):]
|
|
83
|
+
if not remaining:
|
|
84
|
+
valid.append((part,))
|
|
85
|
+
elif remaining in ("a", "an"):
|
|
86
|
+
valid.append((part, remaining))
|
|
87
|
+
|
|
88
|
+
return valid
|
|
89
|
+
|
|
90
|
+
def parse(
|
|
91
|
+
self,
|
|
92
|
+
word: str,
|
|
93
|
+
|
|
94
|
+
# Controls the valid next part
|
|
95
|
+
_suffix: bool = False,
|
|
96
|
+
_prefix: bool = True,
|
|
97
|
+
_root: bool = True,
|
|
98
|
+
_full_word_integrated: bool = True,
|
|
99
|
+
_correlative: bool = True,
|
|
100
|
+
_full_word_standalone: bool = True,
|
|
101
|
+
_number: bool = True
|
|
102
|
+
) -> list[tuple[str, ...]]:
|
|
103
|
+
if _full_word_standalone and word in self.full_words:
|
|
104
|
+
return [(word,)]
|
|
105
|
+
|
|
106
|
+
if _correlative:
|
|
107
|
+
correlative = self._parse_correlative(word)
|
|
108
|
+
if correlative:
|
|
109
|
+
return correlative
|
|
110
|
+
|
|
111
|
+
if _number:
|
|
112
|
+
number = self._parse_number(word)
|
|
113
|
+
if number:
|
|
114
|
+
return number
|
|
115
|
+
|
|
116
|
+
valid = []
|
|
117
|
+
for part in growing_string(word):
|
|
118
|
+
if check := self._is_in(part, _suffix, _prefix, _root, _full_word_integrated):
|
|
119
|
+
remaining = word[len(part):]
|
|
120
|
+
if remaining.startswith("o") and len(remaining) > 1:
|
|
121
|
+
remaining_parsed = self.parse(
|
|
122
|
+
remaining[1:],
|
|
123
|
+
_correlative=False,
|
|
124
|
+
_full_word_standalone=False,
|
|
125
|
+
_suffix=False,
|
|
126
|
+
_prefix=False,
|
|
127
|
+
_number=False
|
|
128
|
+
)
|
|
129
|
+
for parsed_part in remaining_parsed:
|
|
130
|
+
valid.append((part, "o") + parsed_part)
|
|
131
|
+
|
|
132
|
+
if check != "prefix" and remaining in WORD_ENDS:
|
|
133
|
+
# Allow if the prefix can be used as a root too. Disallow an end after a prefix
|
|
134
|
+
valid.append((part, remaining))
|
|
135
|
+
else: # try recursion
|
|
136
|
+
remaining_parsed = self.parse(
|
|
137
|
+
remaining,
|
|
138
|
+
_correlative=False,
|
|
139
|
+
_full_word_standalone=False,
|
|
140
|
+
_suffix=True,
|
|
141
|
+
_number=False
|
|
142
|
+
)
|
|
143
|
+
for parsed_part in remaining_parsed:
|
|
144
|
+
valid.append((part,) + parsed_part)
|
|
145
|
+
|
|
146
|
+
return valid
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
_ESPERANTO_SPLIT_WORDS = r"[A-Za-zĉĝĥĵŝŭĈĜĤĴŜŬ0-9]+"
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _split_sentence(sentence: str):
|
|
153
|
+
return re.findall(_ESPERANTO_SPLIT_WORDS, sentence)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _parse_sentence(sentence: str):
|
|
157
|
+
words = _split_sentence(sentence)
|
|
158
|
+
|
|
159
|
+
disvortilo = Disvortilo()
|
|
160
|
+
|
|
161
|
+
parsed_words = (disvortilo.parse(word) or word for word in words)
|
|
162
|
+
|
|
163
|
+
end = "\n"
|
|
164
|
+
sep = "·"
|
|
165
|
+
|
|
166
|
+
for parsed in parsed_words:
|
|
167
|
+
if isinstance(parsed, str):
|
|
168
|
+
print(f"~{parsed}~", end=end)
|
|
169
|
+
else:
|
|
170
|
+
print(" ".join(sep.join(option) for option in parsed), end=end)
|
|
171
|
+
|
|
172
|
+
print()
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
min
|
|
2
|
+
vin
|
|
3
|
+
ŝin
|
|
4
|
+
lin
|
|
5
|
+
ĝin
|
|
6
|
+
nin
|
|
7
|
+
ilin
|
|
8
|
+
|
|
9
|
+
morgaŭ
|
|
10
|
+
sen
|
|
11
|
+
|
|
12
|
+
# Taken from eoparser/full_words.txt:
|
|
13
|
+
|
|
14
|
+
adiaŭ
|
|
15
|
+
al
|
|
16
|
+
ankaŭ
|
|
17
|
+
ankoraŭ
|
|
18
|
+
anstataŭ
|
|
19
|
+
apenaŭ
|
|
20
|
+
antaŭ
|
|
21
|
+
apud
|
|
22
|
+
aŭ
|
|
23
|
+
baldaŭ
|
|
24
|
+
cent
|
|
25
|
+
ci
|
|
26
|
+
ĉi
|
|
27
|
+
ĉar
|
|
28
|
+
ĉe
|
|
29
|
+
ĉirkaŭ
|
|
30
|
+
ĉu
|
|
31
|
+
da
|
|
32
|
+
de
|
|
33
|
+
do
|
|
34
|
+
dum
|
|
35
|
+
eĉ
|
|
36
|
+
el
|
|
37
|
+
en
|
|
38
|
+
for
|
|
39
|
+
ĝis
|
|
40
|
+
ĝi
|
|
41
|
+
hieraŭ
|
|
42
|
+
hodiaŭ
|
|
43
|
+
ili
|
|
44
|
+
inter
|
|
45
|
+
jam
|
|
46
|
+
ja
|
|
47
|
+
je
|
|
48
|
+
jen
|
|
49
|
+
jes
|
|
50
|
+
ĵus
|
|
51
|
+
kaj
|
|
52
|
+
kontraŭ
|
|
53
|
+
ke
|
|
54
|
+
kun
|
|
55
|
+
kvankam
|
|
56
|
+
kvazaŭ
|
|
57
|
+
la
|
|
58
|
+
laŭ
|
|
59
|
+
li
|
|
60
|
+
mi
|
|
61
|
+
mem
|
|
62
|
+
ne
|
|
63
|
+
nek
|
|
64
|
+
ni
|
|
65
|
+
nur
|
|
66
|
+
nun
|
|
67
|
+
nu
|
|
68
|
+
oni
|
|
69
|
+
ol
|
|
70
|
+
plu
|
|
71
|
+
por
|
|
72
|
+
per
|
|
73
|
+
post
|
|
74
|
+
preskaŭ
|
|
75
|
+
preter
|
|
76
|
+
pro
|
|
77
|
+
plej
|
|
78
|
+
pli
|
|
79
|
+
si
|
|
80
|
+
se
|
|
81
|
+
sed
|
|
82
|
+
sur
|
|
83
|
+
tra
|
|
84
|
+
tamen
|
|
85
|
+
trans
|
|
86
|
+
tre
|
|
87
|
+
tro
|
|
88
|
+
tuj
|
|
89
|
+
vi
|
|
90
|
+
l'
|
|
91
|
+
sub
|
|
92
|
+
ho
|
|
93
|
+
aux
|
|
94
|
+
ekde
|
|
95
|
+
pri
|
|
96
|
+
ili
|
|
97
|
+
ajn
|
|
98
|
+
ŝi
|
|
99
|
+
almenaŭ
|
|
100
|
+
ĉirkaŭ
|
|
101
|
+
ktp
|
|
102
|
+
ia
|
|
103
|
+
super
|
|
104
|
+
nord
|
|
105
|
+
krom
|
|
106
|
+
ambaŭ
|
|
107
|
+
malpli
|
|
108
|
+
escepte
|
|
109
|
+
#okcident
|
|
110
|
+
#nord
|
|
111
|
+
#sud
|
|
112
|
+
#orient
|
|
113
|
+
#centr
|
disvortilo/prefixes.txt
ADDED
disvortilo/roots.txt
ADDED
|
@@ -0,0 +1,502 @@
|
|
|
1
|
+
esperant
|
|
2
|
+
fingr
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
absolut
|
|
6
|
+
adres
|
|
7
|
+
afer
|
|
8
|
+
afrik
|
|
9
|
+
ag
|
|
10
|
+
akcept
|
|
11
|
+
akv
|
|
12
|
+
ali
|
|
13
|
+
alt
|
|
14
|
+
am
|
|
15
|
+
amas
|
|
16
|
+
amerik
|
|
17
|
+
amik
|
|
18
|
+
ampleks
|
|
19
|
+
amuz
|
|
20
|
+
angl
|
|
21
|
+
anonc
|
|
22
|
+
aparat
|
|
23
|
+
apart
|
|
24
|
+
aper
|
|
25
|
+
aranĝ
|
|
26
|
+
asoci
|
|
27
|
+
aspekt
|
|
28
|
+
atend
|
|
29
|
+
atent
|
|
30
|
+
ating
|
|
31
|
+
av
|
|
32
|
+
aĉet
|
|
33
|
+
aĝ
|
|
34
|
+
aŭd
|
|
35
|
+
aŭskult
|
|
36
|
+
aŭtobus
|
|
37
|
+
banan
|
|
38
|
+
batal
|
|
39
|
+
baz
|
|
40
|
+
bedaŭr
|
|
41
|
+
bel
|
|
42
|
+
bend
|
|
43
|
+
bezon
|
|
44
|
+
bibliotek
|
|
45
|
+
bild
|
|
46
|
+
bilet
|
|
47
|
+
bird
|
|
48
|
+
bon
|
|
49
|
+
botel
|
|
50
|
+
bulgar
|
|
51
|
+
cel
|
|
52
|
+
centr
|
|
53
|
+
cert
|
|
54
|
+
ceter
|
|
55
|
+
cigared
|
|
56
|
+
dan
|
|
57
|
+
dank
|
|
58
|
+
daŭr
|
|
59
|
+
debat
|
|
60
|
+
decid
|
|
61
|
+
dek
|
|
62
|
+
dekstr
|
|
63
|
+
delegaci
|
|
64
|
+
demand
|
|
65
|
+
dev
|
|
66
|
+
dezir
|
|
67
|
+
deĵor
|
|
68
|
+
diabl
|
|
69
|
+
difin
|
|
70
|
+
dir
|
|
71
|
+
diskriminaci
|
|
72
|
+
diskut
|
|
73
|
+
divers
|
|
74
|
+
divid
|
|
75
|
+
dom
|
|
76
|
+
don
|
|
77
|
+
donac
|
|
78
|
+
dorm
|
|
79
|
+
du
|
|
80
|
+
edz
|
|
81
|
+
ekskurs
|
|
82
|
+
ekster
|
|
83
|
+
ekzempl
|
|
84
|
+
ekzempler
|
|
85
|
+
ekzist
|
|
86
|
+
elekt
|
|
87
|
+
entrepren
|
|
88
|
+
erinac
|
|
89
|
+
esenc
|
|
90
|
+
esper
|
|
91
|
+
est
|
|
92
|
+
eventual
|
|
93
|
+
evolu
|
|
94
|
+
eŭrop
|
|
95
|
+
facil
|
|
96
|
+
fak
|
|
97
|
+
fakt
|
|
98
|
+
fal
|
|
99
|
+
famili
|
|
100
|
+
far
|
|
101
|
+
fart
|
|
102
|
+
feliĉ
|
|
103
|
+
ferm
|
|
104
|
+
festival
|
|
105
|
+
film
|
|
106
|
+
fin
|
|
107
|
+
finn
|
|
108
|
+
firm
|
|
109
|
+
fiŝ
|
|
110
|
+
flank
|
|
111
|
+
flav
|
|
112
|
+
flor
|
|
113
|
+
flug
|
|
114
|
+
foj
|
|
115
|
+
forges
|
|
116
|
+
form
|
|
117
|
+
fort
|
|
118
|
+
fot
|
|
119
|
+
franc
|
|
120
|
+
frank
|
|
121
|
+
frat
|
|
122
|
+
fraz
|
|
123
|
+
fraŭl
|
|
124
|
+
frenez
|
|
125
|
+
fru
|
|
126
|
+
funkci
|
|
127
|
+
fuŝ
|
|
128
|
+
german
|
|
129
|
+
giĉet
|
|
130
|
+
grad
|
|
131
|
+
grand
|
|
132
|
+
gratul
|
|
133
|
+
grav
|
|
134
|
+
grup
|
|
135
|
+
gvid
|
|
136
|
+
ha
|
|
137
|
+
hav
|
|
138
|
+
hebre
|
|
139
|
+
hejm
|
|
140
|
+
help
|
|
141
|
+
hom
|
|
142
|
+
hor
|
|
143
|
+
hotel
|
|
144
|
+
ide
|
|
145
|
+
imag
|
|
146
|
+
infan
|
|
147
|
+
inform
|
|
148
|
+
instru
|
|
149
|
+
inteligent
|
|
150
|
+
interes
|
|
151
|
+
interpret
|
|
152
|
+
invit
|
|
153
|
+
ir
|
|
154
|
+
iran
|
|
155
|
+
ital
|
|
156
|
+
japan
|
|
157
|
+
jar
|
|
158
|
+
jun
|
|
159
|
+
kamp
|
|
160
|
+
kant
|
|
161
|
+
kapabl
|
|
162
|
+
kapt
|
|
163
|
+
kar
|
|
164
|
+
karot
|
|
165
|
+
kart
|
|
166
|
+
kaz
|
|
167
|
+
kaŝ
|
|
168
|
+
kelk
|
|
169
|
+
kilo
|
|
170
|
+
klar
|
|
171
|
+
klopod
|
|
172
|
+
knab
|
|
173
|
+
kolor
|
|
174
|
+
komenc
|
|
175
|
+
komision
|
|
176
|
+
komitat
|
|
177
|
+
kompetent
|
|
178
|
+
komplet
|
|
179
|
+
komplik
|
|
180
|
+
kompren
|
|
181
|
+
kon
|
|
182
|
+
koncept
|
|
183
|
+
koncern
|
|
184
|
+
kongres
|
|
185
|
+
konkret
|
|
186
|
+
konkurs
|
|
187
|
+
konsci
|
|
188
|
+
konsent
|
|
189
|
+
konserv
|
|
190
|
+
konsil
|
|
191
|
+
konsili
|
|
192
|
+
konsist
|
|
193
|
+
konstant
|
|
194
|
+
kontakt
|
|
195
|
+
kontrol
|
|
196
|
+
korb
|
|
197
|
+
korespond
|
|
198
|
+
kost
|
|
199
|
+
kovr
|
|
200
|
+
kred
|
|
201
|
+
kresk
|
|
202
|
+
kri
|
|
203
|
+
kruel
|
|
204
|
+
kuir
|
|
205
|
+
kuler
|
|
206
|
+
kultur
|
|
207
|
+
kunikl
|
|
208
|
+
kur
|
|
209
|
+
kutim
|
|
210
|
+
kvar
|
|
211
|
+
kvin
|
|
212
|
+
labor
|
|
213
|
+
lag
|
|
214
|
+
land
|
|
215
|
+
lanĉ
|
|
216
|
+
las
|
|
217
|
+
last
|
|
218
|
+
lav
|
|
219
|
+
leg
|
|
220
|
+
lern
|
|
221
|
+
lert
|
|
222
|
+
leter
|
|
223
|
+
lev
|
|
224
|
+
liber
|
|
225
|
+
libr
|
|
226
|
+
lig
|
|
227
|
+
lingv
|
|
228
|
+
lit
|
|
229
|
+
liter
|
|
230
|
+
literatur
|
|
231
|
+
lok
|
|
232
|
+
long
|
|
233
|
+
loĝ
|
|
234
|
+
lu
|
|
235
|
+
lud
|
|
236
|
+
man
|
|
237
|
+
manier
|
|
238
|
+
mank
|
|
239
|
+
manĝ
|
|
240
|
+
map
|
|
241
|
+
mar
|
|
242
|
+
marŝ
|
|
243
|
+
maten
|
|
244
|
+
material
|
|
245
|
+
maŝin
|
|
246
|
+
membr
|
|
247
|
+
memor
|
|
248
|
+
met
|
|
249
|
+
mez
|
|
250
|
+
miks
|
|
251
|
+
mil
|
|
252
|
+
minimum
|
|
253
|
+
minut
|
|
254
|
+
mir
|
|
255
|
+
moment
|
|
256
|
+
mon
|
|
257
|
+
monat
|
|
258
|
+
mond
|
|
259
|
+
mont
|
|
260
|
+
montr
|
|
261
|
+
mov
|
|
262
|
+
mult
|
|
263
|
+
naci
|
|
264
|
+
nask
|
|
265
|
+
naĝ
|
|
266
|
+
naŭ
|
|
267
|
+
neces
|
|
268
|
+
nederland
|
|
269
|
+
nepr
|
|
270
|
+
neŭtral
|
|
271
|
+
nivel
|
|
272
|
+
nokt
|
|
273
|
+
nom
|
|
274
|
+
normal
|
|
275
|
+
nov
|
|
276
|
+
nud
|
|
277
|
+
numer
|
|
278
|
+
ofert
|
|
279
|
+
oficial
|
|
280
|
+
oft
|
|
281
|
+
ok
|
|
282
|
+
okaz
|
|
283
|
+
okcident
|
|
284
|
+
okup
|
|
285
|
+
opini
|
|
286
|
+
ord
|
|
287
|
+
ordinar
|
|
288
|
+
organiz
|
|
289
|
+
orient
|
|
290
|
+
ov
|
|
291
|
+
pag
|
|
292
|
+
paper
|
|
293
|
+
pardon
|
|
294
|
+
parol
|
|
295
|
+
pas
|
|
296
|
+
patr
|
|
297
|
+
paĝ
|
|
298
|
+
paŝ
|
|
299
|
+
pend
|
|
300
|
+
pens
|
|
301
|
+
perd
|
|
302
|
+
perfekt
|
|
303
|
+
period
|
|
304
|
+
persik
|
|
305
|
+
person
|
|
306
|
+
pet
|
|
307
|
+
pied
|
|
308
|
+
plan
|
|
309
|
+
plank
|
|
310
|
+
plaĉ
|
|
311
|
+
plen
|
|
312
|
+
plor
|
|
313
|
+
plur
|
|
314
|
+
pokal
|
|
315
|
+
pom
|
|
316
|
+
pont
|
|
317
|
+
popular
|
|
318
|
+
port
|
|
319
|
+
postul
|
|
320
|
+
pov
|
|
321
|
+
prav
|
|
322
|
+
precip
|
|
323
|
+
preciz
|
|
324
|
+
prefer
|
|
325
|
+
preleg
|
|
326
|
+
premi
|
|
327
|
+
pren
|
|
328
|
+
prepar
|
|
329
|
+
pret
|
|
330
|
+
prez
|
|
331
|
+
prezent
|
|
332
|
+
prezid
|
|
333
|
+
princip
|
|
334
|
+
problem
|
|
335
|
+
produkt
|
|
336
|
+
profesi
|
|
337
|
+
profesor
|
|
338
|
+
program
|
|
339
|
+
proksim
|
|
340
|
+
propon
|
|
341
|
+
protest
|
|
342
|
+
protokol
|
|
343
|
+
prov
|
|
344
|
+
publik
|
|
345
|
+
punkt
|
|
346
|
+
pup
|
|
347
|
+
pur
|
|
348
|
+
rajt
|
|
349
|
+
rakont
|
|
350
|
+
rapid
|
|
351
|
+
raport
|
|
352
|
+
region
|
|
353
|
+
regul
|
|
354
|
+
reklam
|
|
355
|
+
rekomend
|
|
356
|
+
rekt
|
|
357
|
+
relativ
|
|
358
|
+
renkont
|
|
359
|
+
respond
|
|
360
|
+
rest
|
|
361
|
+
ricev
|
|
362
|
+
rid
|
|
363
|
+
rigard
|
|
364
|
+
rilat
|
|
365
|
+
rimark
|
|
366
|
+
river
|
|
367
|
+
riĉ
|
|
368
|
+
romp
|
|
369
|
+
rus
|
|
370
|
+
sal
|
|
371
|
+
salon
|
|
372
|
+
salt
|
|
373
|
+
salut
|
|
374
|
+
sam
|
|
375
|
+
sat
|
|
376
|
+
saĝ
|
|
377
|
+
saŭn
|
|
378
|
+
sci
|
|
379
|
+
scienc
|
|
380
|
+
seg
|
|
381
|
+
seks
|
|
382
|
+
sekv
|
|
383
|
+
semajn
|
|
384
|
+
senc
|
|
385
|
+
send
|
|
386
|
+
sent
|
|
387
|
+
sep
|
|
388
|
+
seri
|
|
389
|
+
serĉ
|
|
390
|
+
ses
|
|
391
|
+
sid
|
|
392
|
+
signif
|
|
393
|
+
simil
|
|
394
|
+
simpl
|
|
395
|
+
simul
|
|
396
|
+
sinjor
|
|
397
|
+
siren
|
|
398
|
+
sistem
|
|
399
|
+
situaci
|
|
400
|
+
skandinavi
|
|
401
|
+
skatol
|
|
402
|
+
ski
|
|
403
|
+
skrib
|
|
404
|
+
soci
|
|
405
|
+
sol
|
|
406
|
+
solv
|
|
407
|
+
sorĉ
|
|
408
|
+
spec
|
|
409
|
+
special
|
|
410
|
+
specif
|
|
411
|
+
spert
|
|
412
|
+
spinac
|
|
413
|
+
star
|
|
414
|
+
stat
|
|
415
|
+
statut
|
|
416
|
+
strat
|
|
417
|
+
struktur
|
|
418
|
+
stult
|
|
419
|
+
sud
|
|
420
|
+
sufiĉ
|
|
421
|
+
suk
|
|
422
|
+
sukces
|
|
423
|
+
supoz
|
|
424
|
+
supr
|
|
425
|
+
sved
|
|
426
|
+
svis
|
|
427
|
+
tabl
|
|
428
|
+
tabul
|
|
429
|
+
tag
|
|
430
|
+
tajp
|
|
431
|
+
task
|
|
432
|
+
te
|
|
433
|
+
teatr
|
|
434
|
+
tekst
|
|
435
|
+
teler
|
|
436
|
+
tem
|
|
437
|
+
temp
|
|
438
|
+
temperatur
|
|
439
|
+
ten
|
|
440
|
+
teren
|
|
441
|
+
terur
|
|
442
|
+
tim
|
|
443
|
+
traduk
|
|
444
|
+
trajn
|
|
445
|
+
trakt
|
|
446
|
+
tranĉ
|
|
447
|
+
tri
|
|
448
|
+
trink
|
|
449
|
+
trov
|
|
450
|
+
tuk
|
|
451
|
+
tut
|
|
452
|
+
tuŝ
|
|
453
|
+
universal
|
|
454
|
+
universitat
|
|
455
|
+
unu
|
|
456
|
+
urb
|
|
457
|
+
uson
|
|
458
|
+
util
|
|
459
|
+
uz
|
|
460
|
+
valor
|
|
461
|
+
varm
|
|
462
|
+
vast
|
|
463
|
+
ven
|
|
464
|
+
vend
|
|
465
|
+
venk
|
|
466
|
+
ver
|
|
467
|
+
verk
|
|
468
|
+
vesper
|
|
469
|
+
vest
|
|
470
|
+
veter
|
|
471
|
+
vetur
|
|
472
|
+
vid
|
|
473
|
+
vir
|
|
474
|
+
viv
|
|
475
|
+
vizit
|
|
476
|
+
voj
|
|
477
|
+
vojaĝ
|
|
478
|
+
vol
|
|
479
|
+
volv
|
|
480
|
+
vort
|
|
481
|
+
voĉ
|
|
482
|
+
vulp
|
|
483
|
+
zorg
|
|
484
|
+
ĉambr
|
|
485
|
+
ĉarm
|
|
486
|
+
ĉef
|
|
487
|
+
ĉeval
|
|
488
|
+
ĉin
|
|
489
|
+
ĝen
|
|
490
|
+
ĝeneral
|
|
491
|
+
ĝoj
|
|
492
|
+
ĝust
|
|
493
|
+
ĵet
|
|
494
|
+
ŝaf
|
|
495
|
+
ŝajn
|
|
496
|
+
ŝanc
|
|
497
|
+
ŝanĝ
|
|
498
|
+
ŝat
|
|
499
|
+
ŝip
|
|
500
|
+
ŝir
|
|
501
|
+
ŝlos
|
|
502
|
+
ŝtel
|
disvortilo/suffixes.txt
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
aĉ # contemptible
|
|
2
|
+
ad # frequent or continuous action (gerund)
|
|
3
|
+
aĵ # substance
|
|
4
|
+
an # member
|
|
5
|
+
ar # collection of
|
|
6
|
+
ĉj # male affectionate suffix
|
|
7
|
+
ebl # possibility
|
|
8
|
+
ec # abstract quality; -ness
|
|
9
|
+
eg # of great size or degree
|
|
10
|
+
ej # place
|
|
11
|
+
em # tending to
|
|
12
|
+
end # something that must be done
|
|
13
|
+
er # single, individual, unit
|
|
14
|
+
estr # leader
|
|
15
|
+
et # of small size or degree
|
|
16
|
+
id # offspring
|
|
17
|
+
ig # causing something to be
|
|
18
|
+
iĝ # becoming something
|
|
19
|
+
il # instrument; tool
|
|
20
|
+
ind # worthy of
|
|
21
|
+
ing # holder for an object
|
|
22
|
+
in # feminine suffix
|
|
23
|
+
ism # -ism
|
|
24
|
+
ist # -ist
|
|
25
|
+
nj # female affectionate suffix
|
|
26
|
+
obl # times (with numbers)
|
|
27
|
+
on # fraction (with numbers)
|
|
28
|
+
op # together (with numbers)
|
|
29
|
+
uj # container
|
|
30
|
+
ul # person possessing a certain quality
|
|
31
|
+
um # indefinite suffix
|
|
32
|
+
|
|
33
|
+
# passive forms
|
|
34
|
+
at
|
|
35
|
+
it
|
|
36
|
+
ot
|
|
37
|
+
|
|
38
|
+
# active forms
|
|
39
|
+
ant
|
|
40
|
+
int
|
|
41
|
+
ont
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# Unofficial
|
|
45
|
+
iĉ # masculine form
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: disvortilo
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Disvortilo is a simple tool that breaks Esperanto words into roots and affixes.
|
|
5
|
+
Author-email: Franz Weingartz <scaui0@gmx.net>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/LerniloEO/disvortilo
|
|
8
|
+
Project-URL: Repository, https://github.com/LerniloEO/disvortilo
|
|
9
|
+
Project-URL: Issues, https://github.com/LerniloEO/disvortilo/issues
|
|
10
|
+
Keywords: Esperanto,morphology,linguistics,NLP
|
|
11
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Requires-Python: >=3.9
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Dynamic: license-file
|
|
18
|
+
|
|
19
|
+
# Disvortilo
|
|
20
|
+
|
|
21
|
+
Disvortilo is a simple tool that breaks Esperanto words into roots and affixes.
|
|
22
|
+
|
|
23
|
+
## Getting Started
|
|
24
|
+
|
|
25
|
+
You can install Disvortilo from PyPI using pip:
|
|
26
|
+
|
|
27
|
+
```shell
|
|
28
|
+
pip install disvortilo
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Examples
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from disvortilo import Disvortilo
|
|
35
|
+
|
|
36
|
+
disvortilo = Disvortilo()
|
|
37
|
+
|
|
38
|
+
print(disvortilo.parse("malliberejo"))
|
|
39
|
+
# > [('mal', 'liber', 'ej', 'o')]
|
|
40
|
+
|
|
41
|
+
# some have more than one possible output
|
|
42
|
+
# like "Esperanto" which means "a hoping person"
|
|
43
|
+
print(disvortilo.parse("esperantistino"))
|
|
44
|
+
# > [('esper', 'ant', 'ist', 'in', 'o'), ('esperant', 'ist', 'in', 'o')]
|
|
45
|
+
```
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
disvortilo/__init__.py,sha256=kk2bn3ukv7QPcOHoQ9N9vTT3X8FqRhzF4R_Z-Dtb73A,5158
|
|
2
|
+
disvortilo/full_words.txt,sha256=gqlLPTDPLnylGOWSQvEsxb_lOUUs1HtODUrMDbbSL94,578
|
|
3
|
+
disvortilo/prefixes.txt,sha256=jiVFM0haIRxwfL9wUrGqvcDI67E4lhgxxV9YrwLOe-w,199
|
|
4
|
+
disvortilo/roots.txt,sha256=07oJw43UbSGuVvpQu4jfkKj60zOyIhQOeaGcWuPziEg,2909
|
|
5
|
+
disvortilo/suffixes.txt,sha256=lwjCpl_4GN0G295P3W1o0abtUgU34o7sTpHyOpN8HTo,845
|
|
6
|
+
disvortilo-0.1.0.dist-info/licenses/LICENSE,sha256=3J4UnGzcrGIf2Lc5evXPrVJokpAYNGuvHYAlfzsjOmU,1072
|
|
7
|
+
disvortilo-0.1.0.dist-info/METADATA,sha256=8SY3i6t5WxkD2fsW9S26oxDyf7IQOM7d1NOdWuC1ezU,1281
|
|
8
|
+
disvortilo-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
9
|
+
disvortilo-0.1.0.dist-info/top_level.txt,sha256=K05C9mVwZZRGYnjvcJwsU8cg6L3fjrwE73ydJUCu5AA,11
|
|
10
|
+
disvortilo-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Franz Weingartz
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
disvortilo
|