semantic-compressor 2.0__py3-none-any.whl → 2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
- compressor/semantic.py +1 -1
- {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/METADATA +1 -1
- {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/RECORD +84 -6
- {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/LICENSE +0 -0
- {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/WHEEL +0 -0
- {semantic_compressor-2.0.dist-info → semantic_compressor-2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,64 @@
|
|
1
|
+
kronik
|
2
|
+
alligevel
|
3
|
+
de
|
4
|
+
først
|
5
|
+
derfor
|
6
|
+
vi
|
7
|
+
selv
|
8
|
+
hertil
|
9
|
+
sådan
|
10
|
+
dette
|
11
|
+
sport
|
12
|
+
man
|
13
|
+
foto
|
14
|
+
begge
|
15
|
+
tag
|
16
|
+
dertil
|
17
|
+
reuter
|
18
|
+
efter
|
19
|
+
endelig
|
20
|
+
ifølge
|
21
|
+
lad
|
22
|
+
når
|
23
|
+
det
|
24
|
+
desuden
|
25
|
+
nu
|
26
|
+
reuters
|
27
|
+
årsagen
|
28
|
+
tænk
|
29
|
+
samtidig
|
30
|
+
udover
|
31
|
+
men
|
32
|
+
endvidere
|
33
|
+
rør
|
34
|
+
rb
|
35
|
+
udstillingen
|
36
|
+
faktabox
|
37
|
+
reception
|
38
|
+
blandt
|
39
|
+
hvad
|
40
|
+
skær
|
41
|
+
lilot
|
42
|
+
derudover
|
43
|
+
da
|
44
|
+
tilsæt
|
45
|
+
denne
|
46
|
+
afp
|
47
|
+
her
|
48
|
+
hvis
|
49
|
+
hæld
|
50
|
+
problemet
|
51
|
+
dermed
|
52
|
+
jeg
|
53
|
+
grafik
|
54
|
+
anmeldelse
|
55
|
+
den
|
56
|
+
ebbe
|
57
|
+
resultatet
|
58
|
+
tværtimod
|
59
|
+
hans
|
60
|
+
måske
|
61
|
+
feature
|
62
|
+
tillæg
|
63
|
+
hun
|
64
|
+
han
|
@@ -0,0 +1,99 @@
|
|
1
|
+
m.j
|
2
|
+
t
|
3
|
+
ph
|
4
|
+
j.h
|
5
|
+
p.a.m
|
6
|
+
j.m
|
7
|
+
dr
|
8
|
+
st
|
9
|
+
j.b.m
|
10
|
+
p
|
11
|
+
nr
|
12
|
+
h.s
|
13
|
+
e.d
|
14
|
+
t.e
|
15
|
+
a.v
|
16
|
+
esb
|
17
|
+
s.z
|
18
|
+
drs
|
19
|
+
b.b
|
20
|
+
m.o
|
21
|
+
inc
|
22
|
+
n
|
23
|
+
pensioenfonds
|
24
|
+
s.v.p
|
25
|
+
bod
|
26
|
+
fr
|
27
|
+
pk
|
28
|
+
r.p
|
29
|
+
c.p.j
|
30
|
+
v.l.n.r
|
31
|
+
chr
|
32
|
+
m.v.d
|
33
|
+
int
|
34
|
+
o.m
|
35
|
+
j.v.d
|
36
|
+
u.o.m
|
37
|
+
f.c
|
38
|
+
k
|
39
|
+
bijgebracht
|
40
|
+
ontwaakte
|
41
|
+
m
|
42
|
+
j.w
|
43
|
+
a.l
|
44
|
+
a.v.d
|
45
|
+
s.v
|
46
|
+
s
|
47
|
+
j.d
|
48
|
+
binnengekomen
|
49
|
+
ds
|
50
|
+
schouwburg
|
51
|
+
b.v
|
52
|
+
h
|
53
|
+
a
|
54
|
+
j.a
|
55
|
+
aanvielen
|
56
|
+
h.g
|
57
|
+
p.f
|
58
|
+
j.l
|
59
|
+
mgr
|
60
|
+
c.j
|
61
|
+
blz
|
62
|
+
l.e.h
|
63
|
+
w.k
|
64
|
+
g
|
65
|
+
m.g
|
66
|
+
r.v.d
|
67
|
+
ing
|
68
|
+
v.d
|
69
|
+
c.q
|
70
|
+
l
|
71
|
+
h.p
|
72
|
+
mr
|
73
|
+
gesch
|
74
|
+
e.l
|
75
|
+
p.j
|
76
|
+
mm
|
77
|
+
j.g
|
78
|
+
j.f
|
79
|
+
c
|
80
|
+
f.m
|
81
|
+
jl
|
82
|
+
r
|
83
|
+
o.a
|
84
|
+
a.s
|
85
|
+
ir
|
86
|
+
v
|
87
|
+
j
|
88
|
+
jr
|
89
|
+
e
|
90
|
+
m.i.v
|
91
|
+
l.a
|
92
|
+
f.v.d
|
93
|
+
aansluit
|
94
|
+
c.c
|
95
|
+
a.m
|
96
|
+
f.o.j
|
97
|
+
m.b
|
98
|
+
y
|
99
|
+
th
|
@@ -0,0 +1,37 @@
|
|
1
|
+
##number## sotelo
|
2
|
+
##number## clas
|
3
|
+
##number## buckler
|
4
|
+
##number## carrera
|
5
|
+
##number## rmo
|
6
|
+
##number## orioli
|
7
|
+
w baron
|
8
|
+
##number## morales
|
9
|
+
##number## snotselelaank
|
10
|
+
##number## arcarons
|
11
|
+
##number## cavandoli
|
12
|
+
##number## pdm
|
13
|
+
##number## helvetia
|
14
|
+
##number## panasonic
|
15
|
+
##number## motorola
|
16
|
+
w bruinsma
|
17
|
+
##number## heer
|
18
|
+
##number## lotus
|
19
|
+
##number## banesto
|
20
|
+
##number## magnaldi
|
21
|
+
w jense
|
22
|
+
w heuvelmans
|
23
|
+
w spatje
|
24
|
+
##number## telekom
|
25
|
+
f kennedy
|
26
|
+
##number## gatorade
|
27
|
+
##number## mg-gb
|
28
|
+
##number## once
|
29
|
+
##number## peterhansel
|
30
|
+
##number## ariostea
|
31
|
+
##number## tvm
|
32
|
+
##number## höl
|
33
|
+
##number## castorama
|
34
|
+
##number## tulip
|
35
|
+
b situatie
|
36
|
+
##number## mas
|
37
|
+
##number## lotto
|