semantic-compressor 2.1__py3-none-any.whl → 2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/METADATA +1 -1
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/RECORD +83 -5
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/LICENSE +0 -0
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/WHEEL +0 -0
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
most
|
2
|
+
he
|
3
|
+
since
|
4
|
+
so
|
5
|
+
both
|
6
|
+
these
|
7
|
+
it
|
8
|
+
nevertheless
|
9
|
+
this
|
10
|
+
indeed
|
11
|
+
however
|
12
|
+
instead
|
13
|
+
under
|
14
|
+
similarly
|
15
|
+
some
|
16
|
+
though
|
17
|
+
while
|
18
|
+
when
|
19
|
+
in
|
20
|
+
despite
|
21
|
+
although
|
22
|
+
nonetheless
|
23
|
+
thus
|
24
|
+
there
|
25
|
+
if
|
26
|
+
the
|
27
|
+
nor
|
28
|
+
separately
|
29
|
+
moreover
|
30
|
+
but
|
31
|
+
they
|
32
|
+
yet
|
33
|
+
many
|
34
|
+
according
|
35
|
+
sales
|
36
|
+
among
|
37
|
+
meanwhile
|
38
|
+
even
|
39
|
+
i
|
@@ -0,0 +1,48 @@
|
|
1
|
+
eos
|
2
|
+
c
|
3
|
+
a.d
|
4
|
+
t.a.s.s
|
5
|
+
e.t
|
6
|
+
päevapiltnikud
|
7
|
+
c.h
|
8
|
+
b.p
|
9
|
+
amm
|
10
|
+
ameerika-mees
|
11
|
+
n.-ö
|
12
|
+
cm
|
13
|
+
b
|
14
|
+
mhm
|
15
|
+
a.s
|
16
|
+
m.e
|
17
|
+
j.l
|
18
|
+
j
|
19
|
+
u.t
|
20
|
+
vm
|
21
|
+
g.u.n
|
22
|
+
hajutada
|
23
|
+
p.s
|
24
|
+
a.b
|
25
|
+
c.h.-r
|
26
|
+
i.q
|
27
|
+
gr
|
28
|
+
fido
|
29
|
+
pankurit
|
30
|
+
s.v
|
31
|
+
l.l
|
32
|
+
c.-h
|
33
|
+
m.h
|
34
|
+
h.l
|
35
|
+
m.k
|
36
|
+
j.r
|
37
|
+
t.k
|
38
|
+
k.h
|
39
|
+
89/90
|
40
|
+
h
|
41
|
+
a
|
42
|
+
dost
|
43
|
+
v.k
|
44
|
+
e.q
|
45
|
+
t.j
|
46
|
+
m.b
|
47
|
+
d
|
48
|
+
p.k
|
@@ -0,0 +1,100 @@
|
|
1
|
+
##number## juuni
|
2
|
+
##number## novembril
|
3
|
+
##number## juulilt
|
4
|
+
r järve-vomm
|
5
|
+
##number## mida
|
6
|
+
n liidu
|
7
|
+
##number## milliseid
|
8
|
+
##number## oktoobri
|
9
|
+
##number## iidol
|
10
|
+
m e
|
11
|
+
##number## klassist
|
12
|
+
##number## millest
|
13
|
+
##number## august
|
14
|
+
##number## pariis
|
15
|
+
##number## septembrist
|
16
|
+
##number## oktoober
|
17
|
+
##number## märtsini
|
18
|
+
##number## kust
|
19
|
+
k mägi
|
20
|
+
##number## detsembrist
|
21
|
+
##number## jaanuari
|
22
|
+
##number## epee
|
23
|
+
##number## nimetage
|
24
|
+
##number## novembrini
|
25
|
+
##number## eluaasta
|
26
|
+
s mill
|
27
|
+
##number## helsingi
|
28
|
+
##number## jaanuarini
|
29
|
+
##number## aastail
|
30
|
+
##number## augustil
|
31
|
+
##number## millise
|
32
|
+
##number## juulist
|
33
|
+
##number## mai
|
34
|
+
##number## novembri
|
35
|
+
##number## oktoobrist
|
36
|
+
##number## juunini
|
37
|
+
##number## septembriks
|
38
|
+
##number## detsembril
|
39
|
+
p s
|
40
|
+
##number## jaanuar
|
41
|
+
##number## aastate
|
42
|
+
##number## milline
|
43
|
+
##number## kelle
|
44
|
+
##number## jaanuaril
|
45
|
+
s stadnikov
|
46
|
+
##number## aastaks
|
47
|
+
##number## stockholm
|
48
|
+
##number## suurim
|
49
|
+
##number## aasta
|
50
|
+
##number## sajandi
|
51
|
+
##number## millega
|
52
|
+
##number## aastast
|
53
|
+
##number## aastal
|
54
|
+
##number## kumb
|
55
|
+
##number## septembril
|
56
|
+
##number## korruselt
|
57
|
+
##number## septembri
|
58
|
+
##number## veebruarini
|
59
|
+
##number## london
|
60
|
+
##number## aastatel
|
61
|
+
##number## september
|
62
|
+
##number## veebruari
|
63
|
+
##number## oktoobrini
|
64
|
+
##number## mail
|
65
|
+
m kassovitz
|
66
|
+
##number## action-film
|
67
|
+
##number## mis
|
68
|
+
k herkül
|
69
|
+
n n
|
70
|
+
##number## detsembrini
|
71
|
+
##number## imre
|
72
|
+
t jõgeda
|
73
|
+
##number## casino
|
74
|
+
##number## septembrit
|
75
|
+
##number## augustini
|
76
|
+
##number## juulil
|
77
|
+
##number## november
|
78
|
+
##number## kuupäeval
|
79
|
+
##number## taevas
|
80
|
+
##number## septembrini
|
81
|
+
##number## detsember
|
82
|
+
##number## detsembri
|
83
|
+
##number## juunil
|
84
|
+
##number## augustist
|
85
|
+
n jurist
|
86
|
+
##number## missugust
|
87
|
+
##number## aastatesse
|
88
|
+
##number## aprillil
|
89
|
+
##number## augusti
|
90
|
+
##number## oktoobril
|
91
|
+
##number## märtsil
|
92
|
+
##number## a
|
93
|
+
##number## the
|
94
|
+
##number## sajandil
|
95
|
+
##number## aastani
|
96
|
+
##number## juuli
|
97
|
+
##number## septembrile
|
98
|
+
##number## millist
|
99
|
+
##number## millised
|
100
|
+
##number## veebruaril
|