semantic-compressor 2.1__py3-none-any.whl → 2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
- compressor/semantic.py +37 -3
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/METADATA +1 -1
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/RECORD +84 -6
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/LICENSE +0 -0
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/WHEEL +0 -0
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
francisco
|
2
|
+
la
|
3
|
+
el
|
4
|
+
mayoral
|
5
|
+
tras
|
6
|
+
durante
|
7
|
+
yo
|
8
|
+
ben
|
9
|
+
aunque
|
10
|
+
foto
|
11
|
+
esto
|
12
|
+
arbitro
|
13
|
+
hoy
|
14
|
+
los
|
15
|
+
posteriormente
|
16
|
+
así
|
17
|
+
josé
|
18
|
+
por
|
19
|
+
no
|
20
|
+
_pero
|
21
|
+
pero
|
22
|
+
arbitros
|
23
|
+
estos
|
24
|
+
además
|
25
|
+
este
|
26
|
+
también
|
27
|
+
estas
|
28
|
+
pese
|
29
|
+
si
|
30
|
+
ahora
|
31
|
+
fuentes
|
32
|
+
en
|
33
|
+
esta
|
34
|
+
recibimos
|
35
|
+
igualmente
|
36
|
+
incidencias
|
37
|
+
asimismo
|
38
|
+
sin
|
39
|
+
las
|
40
|
+
silvema
|
41
|
+
juan
|
42
|
+
_toros
|
43
|
+
según
|
44
|
+
pepe
|
45
|
+
_novillos
|
46
|
+
fax
|
@@ -0,0 +1,39 @@
|
|
1
|
+
f.m
|
2
|
+
p.g.a
|
3
|
+
t.ex
|
4
|
+
ppm
|
5
|
+
e.m
|
6
|
+
t.o.m
|
7
|
+
dir
|
8
|
+
m.m
|
9
|
+
o.d
|
10
|
+
ordf
|
11
|
+
p
|
12
|
+
prop
|
13
|
+
ex
|
14
|
+
postst
|
15
|
+
jaha
|
16
|
+
kap
|
17
|
+
m.fl
|
18
|
+
landtm.-förb
|
19
|
+
f.n
|
20
|
+
o.s.v
|
21
|
+
osv
|
22
|
+
z.b
|
23
|
+
mom
|
24
|
+
ital
|
25
|
+
rskr
|
26
|
+
resp
|
27
|
+
rif
|
28
|
+
f.d
|
29
|
+
fig
|
30
|
+
s.k
|
31
|
+
hrm
|
32
|
+
hushålln.-sällsk
|
33
|
+
bl.a
|
34
|
+
föreläsn.-fören
|
35
|
+
dna
|
36
|
+
åk
|
37
|
+
mm
|
38
|
+
bf
|
39
|
+
aig
|