semantic-compressor 2.1__py3-none-any.whl → 2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/METADATA +1 -1
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/RECORD +83 -5
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/LICENSE +0 -0
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/WHEEL +0 -0
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,86 @@
|
|
1
|
+
siinä
|
2
|
+
lämpötila
|
3
|
+
viiden
|
4
|
+
he
|
5
|
+
vapaa
|
6
|
+
viime
|
7
|
+
useimmat
|
8
|
+
kansallisooppera
|
9
|
+
rooleissa
|
10
|
+
näin
|
11
|
+
odotettavissa
|
12
|
+
tiedustelut
|
13
|
+
kansallisteatterin
|
14
|
+
sen
|
15
|
+
musiikki
|
16
|
+
monet
|
17
|
+
uusi
|
18
|
+
avoinna
|
19
|
+
pakkasta
|
20
|
+
freeze
|
21
|
+
tämä
|
22
|
+
lämpö
|
23
|
+
lautakunta
|
24
|
+
vastaväittäjänä
|
25
|
+
päivällä
|
26
|
+
tällä
|
27
|
+
esimerkiksi
|
28
|
+
varoituksia
|
29
|
+
merenkurkku
|
30
|
+
meriennuste
|
31
|
+
näyttelyssä
|
32
|
+
kun
|
33
|
+
pilvistä
|
34
|
+
silloin
|
35
|
+
selkämeren
|
36
|
+
suurin
|
37
|
+
se
|
38
|
+
jos
|
39
|
+
vaihtelevaa
|
40
|
+
vastaväittäjinä
|
41
|
+
sivu
|
42
|
+
kaupunginteatterin
|
43
|
+
pilvisyys
|
44
|
+
siellä
|
45
|
+
siksi
|
46
|
+
kurssimaksu
|
47
|
+
tämän
|
48
|
+
kotimaa
|
49
|
+
näiden
|
50
|
+
teatteri
|
51
|
+
kaikki
|
52
|
+
puolipilvistä
|
53
|
+
niiden
|
54
|
+
maksimilämpötila
|
55
|
+
lisäksi
|
56
|
+
kaupunginhallitus
|
57
|
+
helsingin
|
58
|
+
nyt
|
59
|
+
samalla
|
60
|
+
hänen
|
61
|
+
olen
|
62
|
+
kaupunkikierros
|
63
|
+
vastaväittäjä
|
64
|
+
ne
|
65
|
+
tästä
|
66
|
+
enimmäkseen
|
67
|
+
poika
|
68
|
+
niinpä
|
69
|
+
viirus
|
70
|
+
me
|
71
|
+
poliisi
|
72
|
+
liput
|
73
|
+
ilmoittautuminen
|
74
|
+
tarjoa
|
75
|
+
hän
|
76
|
+
molemmat
|
77
|
+
ulkomaat
|
78
|
+
rock
|
79
|
+
lääketieteen
|
80
|
+
tanssi
|
81
|
+
sainks
|
82
|
+
näyttely
|
83
|
+
lisätietoja
|
84
|
+
ulkomaiden
|
85
|
+
näyttelyn
|
86
|
+
palo
|
@@ -0,0 +1,61 @@
|
|
1
|
+
p.o.l
|
2
|
+
pds
|
3
|
+
3o
|
4
|
+
inscr
|
5
|
+
suè
|
6
|
+
z
|
7
|
+
abst
|
8
|
+
g.-b
|
9
|
+
tél
|
10
|
+
r
|
11
|
+
ed
|
12
|
+
o
|
13
|
+
b
|
14
|
+
esp
|
15
|
+
j.l
|
16
|
+
v
|
17
|
+
k
|
18
|
+
e.p
|
19
|
+
aus
|
20
|
+
jap
|
21
|
+
r.e
|
22
|
+
gb-bel
|
23
|
+
p
|
24
|
+
aut
|
25
|
+
usx
|
26
|
+
arg
|
27
|
+
g
|
28
|
+
e
|
29
|
+
etc
|
30
|
+
fra
|
31
|
+
p.s
|
32
|
+
j.-l
|
33
|
+
blu
|
34
|
+
e.-u
|
35
|
+
f.b
|
36
|
+
msf
|
37
|
+
e.d
|
38
|
+
shi
|
39
|
+
can
|
40
|
+
j.b
|
41
|
+
s.a
|
42
|
+
f.o
|
43
|
+
you
|
44
|
+
mir
|
45
|
+
inc
|
46
|
+
ital
|
47
|
+
expr
|
48
|
+
tch
|
49
|
+
g-b-bel
|
50
|
+
cid
|
51
|
+
c.u
|
52
|
+
ctk
|
53
|
+
j.-m.g
|
54
|
+
bta
|
55
|
+
p.-b
|
56
|
+
cie
|
57
|
+
ita
|
58
|
+
equ
|
59
|
+
corp
|
60
|
+
vot
|
61
|
+
w
|
@@ -0,0 +1,18 @@
|
|
1
|
+
##number## shinozuka-magne
|
2
|
+
##number## ambrosino-baumgartner
|
3
|
+
c tanvier
|
4
|
+
f b.
|
5
|
+
##number## waldegaard-fenouil
|
6
|
+
##number## fermé
|
7
|
+
a dechaume
|
8
|
+
i demongeot
|
9
|
+
s motos
|
10
|
+
##number## rahier
|
11
|
+
##number## magnaldi
|
12
|
+
##number## orioli
|
13
|
+
f tél.
|
14
|
+
##number## cowan-delferrier
|
15
|
+
##number## vatanen-berglund
|
16
|
+
##number## picco
|
17
|
+
##number## masuoka-oligo
|
18
|
+
##number## medardo
|