semantic-compressor 2.1__py3-none-any.whl → 2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/METADATA +1 -1
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/RECORD +83 -5
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/LICENSE +0 -0
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/WHEEL +0 -0
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,63 @@
|
|
1
|
+
de
|
2
|
+
først
|
3
|
+
selv
|
4
|
+
vi
|
5
|
+
derfor
|
6
|
+
dessutan
|
7
|
+
njvi
|
8
|
+
dette
|
9
|
+
politiet
|
10
|
+
nji
|
11
|
+
njde
|
12
|
+
hensikten
|
13
|
+
totalt
|
14
|
+
njnår
|
15
|
+
ingen
|
16
|
+
mannen
|
17
|
+
då
|
18
|
+
anmeldt
|
19
|
+
foto
|
20
|
+
begge
|
21
|
+
difor
|
22
|
+
njdet
|
23
|
+
ifølge
|
24
|
+
nå
|
25
|
+
arkiv
|
26
|
+
når
|
27
|
+
det
|
28
|
+
så
|
29
|
+
no
|
30
|
+
deretter
|
31
|
+
samtidig
|
32
|
+
side
|
33
|
+
men
|
34
|
+
både
|
35
|
+
njden
|
36
|
+
dersom
|
37
|
+
dessverre
|
38
|
+
årsaken
|
39
|
+
da
|
40
|
+
njmen
|
41
|
+
dessuten
|
42
|
+
denne
|
43
|
+
her
|
44
|
+
ho
|
45
|
+
hvis
|
46
|
+
tekst
|
47
|
+
problemet
|
48
|
+
dermed
|
49
|
+
etter
|
50
|
+
jeg
|
51
|
+
den
|
52
|
+
kanskje
|
53
|
+
ørdag
|
54
|
+
tanken
|
55
|
+
foruten
|
56
|
+
resultatet
|
57
|
+
njjeg
|
58
|
+
dei
|
59
|
+
ntb
|
60
|
+
nkjeg
|
61
|
+
hun
|
62
|
+
eg
|
63
|
+
han
|
@@ -0,0 +1,225 @@
|
|
1
|
+
t
|
2
|
+
w.m
|
3
|
+
o.o
|
4
|
+
n.p.m
|
5
|
+
podst
|
6
|
+
a.c
|
7
|
+
paszportów
|
8
|
+
sn
|
9
|
+
ms
|
10
|
+
j.u
|
11
|
+
st
|
12
|
+
szydercy-realisty
|
13
|
+
k.p
|
14
|
+
lit
|
15
|
+
h.b
|
16
|
+
ad.4
|
17
|
+
św
|
18
|
+
dz
|
19
|
+
pd
|
20
|
+
rys
|
21
|
+
e.s
|
22
|
+
2e
|
23
|
+
h.s
|
24
|
+
a.w
|
25
|
+
etc
|
26
|
+
p.s
|
27
|
+
hab
|
28
|
+
rz
|
29
|
+
dn
|
30
|
+
wikipedii
|
31
|
+
itp
|
32
|
+
l.w
|
33
|
+
dyr
|
34
|
+
hrubieszowie
|
35
|
+
8.i
|
36
|
+
k.m.b
|
37
|
+
m.st
|
38
|
+
kaszb
|
39
|
+
b.b
|
40
|
+
m.o
|
41
|
+
inc
|
42
|
+
n
|
43
|
+
szt
|
44
|
+
7.d
|
45
|
+
przejaśnieniami
|
46
|
+
godz
|
47
|
+
j.ch
|
48
|
+
n.e
|
49
|
+
głęb
|
50
|
+
r.c
|
51
|
+
inż
|
52
|
+
o.p
|
53
|
+
z.d
|
54
|
+
episkopat
|
55
|
+
c.o
|
56
|
+
tzw
|
57
|
+
ew
|
58
|
+
c.s
|
59
|
+
habeo
|
60
|
+
ł.g
|
61
|
+
e.cz
|
62
|
+
ub
|
63
|
+
a.k.m
|
64
|
+
m.a
|
65
|
+
wf
|
66
|
+
9.i
|
67
|
+
kpt
|
68
|
+
g3
|
69
|
+
szer
|
70
|
+
d.k.p.c
|
71
|
+
sz
|
72
|
+
2d
|
73
|
+
9.b
|
74
|
+
pocz
|
75
|
+
zw
|
76
|
+
b
|
77
|
+
adminem
|
78
|
+
k
|
79
|
+
łac
|
80
|
+
ryc
|
81
|
+
tad
|
82
|
+
tab
|
83
|
+
gen
|
84
|
+
p.g
|
85
|
+
r.m
|
86
|
+
j.w
|
87
|
+
d.o.c
|
88
|
+
a.l
|
89
|
+
1971r
|
90
|
+
s.a
|
91
|
+
xiw
|
92
|
+
art
|
93
|
+
k.p.c
|
94
|
+
s
|
95
|
+
ds
|
96
|
+
21.1.—18.2
|
97
|
+
j.c
|
98
|
+
w.w
|
99
|
+
h
|
100
|
+
af
|
101
|
+
płn
|
102
|
+
lic
|
103
|
+
s.k
|
104
|
+
tzn
|
105
|
+
j.a
|
106
|
+
s.o.s
|
107
|
+
z.z
|
108
|
+
ad.5
|
109
|
+
pn
|
110
|
+
cyt
|
111
|
+
kw
|
112
|
+
woj
|
113
|
+
red
|
114
|
+
ang
|
115
|
+
2b
|
116
|
+
ok
|
117
|
+
sześc
|
118
|
+
ul
|
119
|
+
pow
|
120
|
+
str
|
121
|
+
a.ch
|
122
|
+
strawy
|
123
|
+
w.c
|
124
|
+
śr
|
125
|
+
bie
|
126
|
+
zm
|
127
|
+
nt
|
128
|
+
wł
|
129
|
+
br
|
130
|
+
tow
|
131
|
+
u.s
|
132
|
+
prof
|
133
|
+
zach
|
134
|
+
ś.p
|
135
|
+
por
|
136
|
+
op
|
137
|
+
ł.s
|
138
|
+
agd
|
139
|
+
g
|
140
|
+
os
|
141
|
+
proc
|
142
|
+
reż
|
143
|
+
n.t
|
144
|
+
wsch
|
145
|
+
bm
|
146
|
+
dol
|
147
|
+
s3
|
148
|
+
j.s
|
149
|
+
p.n.e
|
150
|
+
b.r
|
151
|
+
błędem
|
152
|
+
1b
|
153
|
+
l
|
154
|
+
m.t
|
155
|
+
h.p
|
156
|
+
mr
|
157
|
+
zn
|
158
|
+
1d
|
159
|
+
ur
|
160
|
+
g.w
|
161
|
+
9x
|
162
|
+
doc
|
163
|
+
7.i
|
164
|
+
p.j
|
165
|
+
tys
|
166
|
+
offenbacha
|
167
|
+
k.i
|
168
|
+
mkw
|
169
|
+
pt
|
170
|
+
gq
|
171
|
+
s.t
|
172
|
+
kl
|
173
|
+
fot
|
174
|
+
k.k
|
175
|
+
a.k
|
176
|
+
w.h
|
177
|
+
dł
|
178
|
+
r
|
179
|
+
mec
|
180
|
+
poł
|
181
|
+
m.in
|
182
|
+
śp
|
183
|
+
o.n
|
184
|
+
a.s
|
185
|
+
maks
|
186
|
+
mk
|
187
|
+
j
|
188
|
+
p.e.n
|
189
|
+
ub.r
|
190
|
+
cz
|
191
|
+
pw
|
192
|
+
13.i
|
193
|
+
ks
|
194
|
+
np
|
195
|
+
pl
|
196
|
+
m.d
|
197
|
+
l.r
|
198
|
+
v.f
|
199
|
+
k.i.g
|
200
|
+
odl
|
201
|
+
poz
|
202
|
+
p.o
|
203
|
+
tj
|
204
|
+
j.k
|
205
|
+
f2
|
206
|
+
obw
|
207
|
+
f
|
208
|
+
cenowo
|
209
|
+
2c
|
210
|
+
małg
|
211
|
+
m.k
|
212
|
+
jez
|
213
|
+
7.b
|
214
|
+
s.c
|
215
|
+
ż
|
216
|
+
nm
|
217
|
+
luf
|
218
|
+
3d
|
219
|
+
hm
|
220
|
+
m.b
|
221
|
+
4a
|
222
|
+
inf
|
223
|
+
dh
|
224
|
+
mł
|
225
|
+
th
|
@@ -0,0 +1,57 @@
|
|
1
|
+
##number## jedność
|
2
|
+
##number## strefę
|
3
|
+
##number## przywrócenie
|
4
|
+
##number## przyjmowane
|
5
|
+
##number## minucie
|
6
|
+
##number## funkcjonariuszowi
|
7
|
+
##number## podaj
|
8
|
+
##number## organ
|
9
|
+
##number## przedsiębiorca
|
10
|
+
##number## schematy
|
11
|
+
##number## konferencja
|
12
|
+
##number## likwidatorzy
|
13
|
+
m in
|
14
|
+
##number## wojewódzka
|
15
|
+
##number## organizację
|
16
|
+
##number## ilekroć
|
17
|
+
##number## członek
|
18
|
+
##number## przełożeni
|
19
|
+
##number## wydanie
|
20
|
+
##number## zaliczka
|
21
|
+
c wiatr
|
22
|
+
##number## kapitał
|
23
|
+
##number## instruktor
|
24
|
+
##number## ubezpieczenie
|
25
|
+
##number## rejestr
|
26
|
+
##number## ilu
|
27
|
+
m położ
|
28
|
+
##number## państwowa
|
29
|
+
##number## przyjrzyj
|
30
|
+
##number## ujawnienie
|
31
|
+
##number## rozpoznawanie
|
32
|
+
##number## wdrażanie
|
33
|
+
##number## min
|
34
|
+
##number## rocznicy
|
35
|
+
##number## osoba
|
36
|
+
##number## kociarz
|
37
|
+
##number## przewody
|
38
|
+
##number## przepis
|
39
|
+
##number## kompetencje
|
40
|
+
##number## zamawiający
|
41
|
+
a leppera
|
42
|
+
##number## edycji
|
43
|
+
##number## upoważnia
|
44
|
+
##number## naczelna
|
45
|
+
##number## patrz
|
46
|
+
##number## przepisu
|
47
|
+
u nr
|
48
|
+
##number## pułku
|
49
|
+
##number## ugodę
|
50
|
+
##number## minister
|
51
|
+
##number## poł.
|
52
|
+
##number## medytacja
|
53
|
+
##number## fundusz
|
54
|
+
##number## 30godz
|
55
|
+
w n.e.
|
56
|
+
##number## producentem
|
57
|
+
##number## wysokość
|