semantic-compressor 2.1__py3-none-any.whl → 2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
- compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
- compressor/semantic.py +37 -3
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/METADATA +1 -1
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/RECORD +84 -6
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/LICENSE +0 -0
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/WHEEL +0 -0
- {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,107 @@
|
|
1
|
+
das
|
2
|
+
man
|
3
|
+
es
|
4
|
+
wir
|
5
|
+
dabei
|
6
|
+
ferner
|
7
|
+
ähnliches
|
8
|
+
während
|
9
|
+
entscheidend
|
10
|
+
ausserdem
|
11
|
+
ein
|
12
|
+
in
|
13
|
+
der
|
14
|
+
daraus
|
15
|
+
obschon
|
16
|
+
beide
|
17
|
+
hier
|
18
|
+
all
|
19
|
+
neben
|
20
|
+
solche
|
21
|
+
hingegen
|
22
|
+
selbstverständlich
|
23
|
+
daneben
|
24
|
+
hinzu
|
25
|
+
vielmehr
|
26
|
+
sie
|
27
|
+
natürlich
|
28
|
+
obwohl
|
29
|
+
nun
|
30
|
+
doch
|
31
|
+
ob
|
32
|
+
abgesehen
|
33
|
+
überdies
|
34
|
+
im
|
35
|
+
zweitens
|
36
|
+
darin
|
37
|
+
erstens
|
38
|
+
dieses
|
39
|
+
nach
|
40
|
+
wer
|
41
|
+
da
|
42
|
+
interessant
|
43
|
+
seit
|
44
|
+
zudem
|
45
|
+
darüber
|
46
|
+
umgekehrt
|
47
|
+
ähnlich
|
48
|
+
aber
|
49
|
+
was
|
50
|
+
nachdem
|
51
|
+
insbesondere
|
52
|
+
statt
|
53
|
+
angesichts
|
54
|
+
gefragt
|
55
|
+
gleiches
|
56
|
+
solange
|
57
|
+
wenn
|
58
|
+
dies
|
59
|
+
dass
|
60
|
+
wie
|
61
|
+
damit
|
62
|
+
allerdings
|
63
|
+
denn
|
64
|
+
letztere
|
65
|
+
eine
|
66
|
+
selbst
|
67
|
+
gleichzeitig
|
68
|
+
wo
|
69
|
+
weder
|
70
|
+
gerade
|
71
|
+
unter
|
72
|
+
problematischer
|
73
|
+
wieso
|
74
|
+
dennoch
|
75
|
+
bei
|
76
|
+
deshalb
|
77
|
+
davon
|
78
|
+
andernfalls
|
79
|
+
er
|
80
|
+
die
|
81
|
+
anders
|
82
|
+
auch
|
83
|
+
ebenso
|
84
|
+
so
|
85
|
+
inzwischen
|
86
|
+
sonst
|
87
|
+
immerhin
|
88
|
+
entsprechend
|
89
|
+
danach
|
90
|
+
am
|
91
|
+
trotz
|
92
|
+
trotzdem
|
93
|
+
worum
|
94
|
+
damals
|
95
|
+
dafür
|
96
|
+
schliesslich
|
97
|
+
gemäss
|
98
|
+
demgegenüber
|
99
|
+
warum
|
100
|
+
letzteres
|
101
|
+
mit
|
102
|
+
dazu
|
103
|
+
anderseits
|
104
|
+
ganz
|
105
|
+
zwar
|
106
|
+
dieser
|
107
|
+
diese
|
@@ -0,0 +1,100 @@
|
|
1
|
+
κλ
|
2
|
+
δημ
|
3
|
+
χλμ
|
4
|
+
σ.τ.ε
|
5
|
+
ό.π
|
6
|
+
δρχ
|
7
|
+
κων
|
8
|
+
χρ
|
9
|
+
π.α
|
10
|
+
ριχ
|
11
|
+
π.χρ
|
12
|
+
υγ
|
13
|
+
tel
|
14
|
+
ζ
|
15
|
+
ο.π
|
16
|
+
βασ
|
17
|
+
γλ
|
18
|
+
n.c
|
19
|
+
d.j
|
20
|
+
σωκ
|
21
|
+
π
|
22
|
+
ιω
|
23
|
+
αχ
|
24
|
+
βα
|
25
|
+
γερ
|
26
|
+
εκδ
|
27
|
+
κλπ
|
28
|
+
φ
|
29
|
+
ελ
|
30
|
+
οσ
|
31
|
+
α
|
32
|
+
σελ
|
33
|
+
ευ
|
34
|
+
ε.έ
|
35
|
+
ρ
|
36
|
+
ε.τ.α
|
37
|
+
λ
|
38
|
+
εβ
|
39
|
+
θρ
|
40
|
+
ν
|
41
|
+
βλ
|
42
|
+
ηλ
|
43
|
+
γ
|
44
|
+
αρ
|
45
|
+
π.χ
|
46
|
+
ε.μ
|
47
|
+
κ.μ
|
48
|
+
α.ε
|
49
|
+
μιχ
|
50
|
+
δισ
|
51
|
+
ολ
|
52
|
+
μ
|
53
|
+
κ.ά
|
54
|
+
κ
|
55
|
+
δηλ
|
56
|
+
ε.α.χ
|
57
|
+
πρ
|
58
|
+
αγ
|
59
|
+
μac
|
60
|
+
κ.ο.κ
|
61
|
+
λ.χ
|
62
|
+
θ
|
63
|
+
αδσ
|
64
|
+
εκατ
|
65
|
+
δρη
|
66
|
+
εμμ
|
67
|
+
δ
|
68
|
+
δεκ
|
69
|
+
σ.σ
|
70
|
+
55ο
|
71
|
+
κκ
|
72
|
+
αδ
|
73
|
+
τ.μ
|
74
|
+
ε.ε
|
75
|
+
μ.χ
|
76
|
+
ν.μ
|
77
|
+
κτλ
|
78
|
+
δολ
|
79
|
+
κ.ά.π
|
80
|
+
αγγ
|
81
|
+
μ.κ
|
82
|
+
δ.σ
|
83
|
+
μπ
|
84
|
+
έκδ
|
85
|
+
ι
|
86
|
+
v
|
87
|
+
χαρ
|
88
|
+
γρ
|
89
|
+
μ.μ.ε
|
90
|
+
σχ
|
91
|
+
λεκ
|
92
|
+
σπ
|
93
|
+
πλι
|
94
|
+
αθ
|
95
|
+
χ
|
96
|
+
τζ
|
97
|
+
τρισ
|
98
|
+
στ
|
99
|
+
ευθ
|
100
|
+
μ.μ
|