semantic-compressor 2.1__py3-none-any.whl → 2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
- compressor/semantic.py +37 -3
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/METADATA +1 -1
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/RECORD +84 -6
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/LICENSE +0 -0
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/WHEEL +0 -0
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
já
|
2
|
+
milena
|
3
|
+
tomáš
|
4
|
+
oznámila
|
5
|
+
podle
|
6
|
+
my
|
7
|
+
vyplývá
|
8
|
+
hlavní
|
9
|
+
jelikož
|
10
|
+
musíme
|
11
|
+
kdyby
|
12
|
+
foto
|
13
|
+
rozptylové
|
14
|
+
snad
|
15
|
+
zároveň
|
16
|
+
jaroslav
|
17
|
+
po
|
18
|
+
v
|
19
|
+
kromě
|
20
|
+
pokud
|
21
|
+
toto
|
22
|
+
jenže
|
23
|
+
oba
|
24
|
+
jak
|
25
|
+
zatímco
|
26
|
+
ten
|
27
|
+
myslím
|
28
|
+
navíc
|
29
|
+
dušan
|
30
|
+
zdá
|
31
|
+
dnes
|
32
|
+
přesto
|
33
|
+
tato
|
34
|
+
ti
|
35
|
+
bratislava
|
36
|
+
ale
|
37
|
+
když
|
38
|
+
nicméně
|
39
|
+
tento
|
40
|
+
mirka
|
41
|
+
přitom
|
42
|
+
dokud
|
43
|
+
jan
|
44
|
+
bohužel
|
45
|
+
ta
|
46
|
+
díky
|
47
|
+
prohlásil
|
48
|
+
praha
|
49
|
+
jestliže
|
50
|
+
jde
|
51
|
+
vždyť
|
52
|
+
moskva
|
53
|
+
proto
|
54
|
+
to
|
@@ -0,0 +1,211 @@
|
|
1
|
+
t
|
2
|
+
tlf
|
3
|
+
b.p
|
4
|
+
evt
|
5
|
+
j.h
|
6
|
+
lenz
|
7
|
+
mht
|
8
|
+
gl
|
9
|
+
bl
|
10
|
+
stud.polit
|
11
|
+
e.j
|
12
|
+
st
|
13
|
+
o
|
14
|
+
dec
|
15
|
+
mag
|
16
|
+
h.b
|
17
|
+
p
|
18
|
+
adm
|
19
|
+
el.lign
|
20
|
+
e.s
|
21
|
+
saalba
|
22
|
+
styrt
|
23
|
+
nr
|
24
|
+
m.a.s.h
|
25
|
+
etc
|
26
|
+
pharm
|
27
|
+
hg
|
28
|
+
j.j
|
29
|
+
dj
|
30
|
+
mountainb
|
31
|
+
f.kr
|
32
|
+
h.r
|
33
|
+
cand.jur
|
34
|
+
sp
|
35
|
+
osv
|
36
|
+
s.g
|
37
|
+
ndr
|
38
|
+
inc
|
39
|
+
b.i.g
|
40
|
+
dk-sver
|
41
|
+
sl
|
42
|
+
v.s.o.d
|
43
|
+
cand.mag
|
44
|
+
d.v.s
|
45
|
+
v.i
|
46
|
+
bøddel
|
47
|
+
fr
|
48
|
+
ø«
|
49
|
+
dr.phil
|
50
|
+
chr
|
51
|
+
p.d
|
52
|
+
bj
|
53
|
+
fhv
|
54
|
+
tilskudsforhold
|
55
|
+
m.a
|
56
|
+
sek
|
57
|
+
p.g.a
|
58
|
+
int
|
59
|
+
pokalf
|
60
|
+
ik
|
61
|
+
dir
|
62
|
+
em-lodtrækn
|
63
|
+
a.h
|
64
|
+
o.lign
|
65
|
+
p.t
|
66
|
+
m.v
|
67
|
+
n.j
|
68
|
+
m.h.t
|
69
|
+
m.m
|
70
|
+
a.p
|
71
|
+
pers
|
72
|
+
4-bakketurn
|
73
|
+
dr.med
|
74
|
+
w.ø
|
75
|
+
polit
|
76
|
+
fremsættes
|
77
|
+
techn
|
78
|
+
tidl
|
79
|
+
o.g
|
80
|
+
i.c.i
|
81
|
+
mill
|
82
|
+
skt
|
83
|
+
m.fl
|
84
|
+
cand.merc
|
85
|
+
kbh
|
86
|
+
indiv
|
87
|
+
stk
|
88
|
+
dk-maked
|
89
|
+
memorandum
|
90
|
+
mestersk
|
91
|
+
mag.art
|
92
|
+
kitzb
|
93
|
+
h
|
94
|
+
lic
|
95
|
+
fig
|
96
|
+
dressurst
|
97
|
+
sportsg
|
98
|
+
r.e.m
|
99
|
+
d.u.m
|
100
|
+
sct
|
101
|
+
kld
|
102
|
+
bl.a
|
103
|
+
hf
|
104
|
+
g.a
|
105
|
+
corp
|
106
|
+
w
|
107
|
+
konk
|
108
|
+
zoeterm
|
109
|
+
b.t
|
110
|
+
a.d
|
111
|
+
l.b
|
112
|
+
jf
|
113
|
+
s.b
|
114
|
+
kgl
|
115
|
+
ill
|
116
|
+
beck
|
117
|
+
tosset
|
118
|
+
afd
|
119
|
+
johs
|
120
|
+
pct
|
121
|
+
k.b
|
122
|
+
sv
|
123
|
+
verbalt
|
124
|
+
kgs
|
125
|
+
l.m.k
|
126
|
+
j.l
|
127
|
+
aus
|
128
|
+
superl
|
129
|
+
t.v
|
130
|
+
mia
|
131
|
+
kr
|
132
|
+
pr
|
133
|
+
præmien
|
134
|
+
j.b.s
|
135
|
+
j.o
|
136
|
+
o.s.v
|
137
|
+
edb-oplysninger
|
138
|
+
o.m.a
|
139
|
+
ca
|
140
|
+
1b
|
141
|
+
f.eks
|
142
|
+
rens
|
143
|
+
ch
|
144
|
+
mr
|
145
|
+
schw
|
146
|
+
d.c
|
147
|
+
utraditionelt
|
148
|
+
idrætsgym
|
149
|
+
hhv
|
150
|
+
e.l
|
151
|
+
s.s
|
152
|
+
eks
|
153
|
+
f.o.m
|
154
|
+
dk-storbrit
|
155
|
+
dk-jugo
|
156
|
+
n.z
|
157
|
+
derivater
|
158
|
+
c
|
159
|
+
pt
|
160
|
+
vm-kval
|
161
|
+
kl
|
162
|
+
hr
|
163
|
+
cand
|
164
|
+
jur
|
165
|
+
sav
|
166
|
+
h.c
|
167
|
+
arab.-danm
|
168
|
+
d.a.d
|
169
|
+
fl
|
170
|
+
o.a
|
171
|
+
a.s
|
172
|
+
cand.polit
|
173
|
+
grundejerform
|
174
|
+
j
|
175
|
+
faglærte
|
176
|
+
cr
|
177
|
+
a.a
|
178
|
+
mou
|
179
|
+
f.r.i
|
180
|
+
årh
|
181
|
+
o.m.m
|
182
|
+
sve
|
183
|
+
c.a
|
184
|
+
engl
|
185
|
+
sikkerhedssystemerne
|
186
|
+
m.f
|
187
|
+
j.k
|
188
|
+
phil
|
189
|
+
f
|
190
|
+
vet
|
191
|
+
mio
|
192
|
+
k.e
|
193
|
+
m.k
|
194
|
+
atla
|
195
|
+
idrætsg
|
196
|
+
n.n
|
197
|
+
4-bakketur
|
198
|
+
dvs
|
199
|
+
sdr
|
200
|
+
s.j
|
201
|
+
hol
|
202
|
+
s.h
|
203
|
+
pei
|
204
|
+
kbhvn
|
205
|
+
aa
|
206
|
+
m.g.i
|
207
|
+
fvt
|
208
|
+
i«
|
209
|
+
b.c
|
210
|
+
th
|
211
|
+
lrs
|
@@ -0,0 +1,101 @@
|
|
1
|
+
##number## skak
|
2
|
+
##number## speedway
|
3
|
+
##number## rally
|
4
|
+
##number## april
|
5
|
+
##number## dm-fin
|
6
|
+
##number## viceformand
|
7
|
+
m jensen
|
8
|
+
##number## kano/kajak
|
9
|
+
##number## bowling
|
10
|
+
##number## dm-finale
|
11
|
+
##number## årh.
|
12
|
+
##number## januar
|
13
|
+
##number## august
|
14
|
+
##number## marathon
|
15
|
+
##number## kamp
|
16
|
+
##number## skihop
|
17
|
+
##number## etage
|
18
|
+
##number## tennis
|
19
|
+
##number## cykling
|
20
|
+
e andersen
|
21
|
+
##number## december
|
22
|
+
g h.
|
23
|
+
##number## neb
|
24
|
+
##number## sektion
|
25
|
+
##number## afd.
|
26
|
+
##number## klasse
|
27
|
+
##number## trampolin
|
28
|
+
##number## bordtennis
|
29
|
+
##number## formel
|
30
|
+
##number## århundredes
|
31
|
+
##number## dm-semifin
|
32
|
+
##number## heks
|
33
|
+
##number## taekwondo
|
34
|
+
##number## galop
|
35
|
+
##number## basketball
|
36
|
+
##number## dm
|
37
|
+
m skræl
|
38
|
+
##number## trav
|
39
|
+
##number## provins
|
40
|
+
##number## triathlon
|
41
|
+
k axel
|
42
|
+
##number## rugby
|
43
|
+
s h.
|
44
|
+
##number## klaverkoncert
|
45
|
+
a p.
|
46
|
+
e løgstrup
|
47
|
+
k telefax
|
48
|
+
##number## gyldendal
|
49
|
+
##number## fodbold
|
50
|
+
e rosenfeldt
|
51
|
+
##number## oktober
|
52
|
+
k o.
|
53
|
+
##number## september
|
54
|
+
##number## dec.
|
55
|
+
##number## juledag
|
56
|
+
##number## badminton
|
57
|
+
##number## sejlsport
|
58
|
+
##number## håndbold
|
59
|
+
r førsund
|
60
|
+
e jørgensen
|
61
|
+
d ##number##
|
62
|
+
k e
|
63
|
+
##number## alp.ski
|
64
|
+
##number## judo
|
65
|
+
##number## roning
|
66
|
+
##number## november
|
67
|
+
##number## atletik
|
68
|
+
##number## århundrede
|
69
|
+
##number## ridning
|
70
|
+
##number## marts
|
71
|
+
m andersen
|
72
|
+
d roosevelt
|
73
|
+
##number## brydning
|
74
|
+
s kr.
|
75
|
+
##number## runde
|
76
|
+
##number## division
|
77
|
+
##number## sal
|
78
|
+
##number## boksning
|
79
|
+
##number## minut
|
80
|
+
##number## golf
|
81
|
+
##number## juni
|
82
|
+
##number## symfoni
|
83
|
+
##number## hurtigløb
|
84
|
+
k jørgensen
|
85
|
+
##number## jörgen
|
86
|
+
##number## klasses
|
87
|
+
e jacobsen
|
88
|
+
k jensen
|
89
|
+
##number## februar
|
90
|
+
k nielsen
|
91
|
+
##number## volleyball
|
92
|
+
##number## maj
|
93
|
+
##number## verdenskrig
|
94
|
+
##number## juli
|
95
|
+
##number## ishockey
|
96
|
+
##number## kunstskøjteløb
|
97
|
+
b jørgensen
|
98
|
+
##number## gymnastik
|
99
|
+
##number## svømning
|
100
|
+
##number## tw
|
101
|
+
i pedersens
|