semantic-compressor 2.1__py3-none-any.whl → 2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
  2. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
  3. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
  4. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
  5. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
  6. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
  7. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
  8. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
  9. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
  10. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
  11. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
  12. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
  13. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
  14. compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
  15. compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
  16. compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
  17. compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
  18. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
  19. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
  20. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
  21. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
  22. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
  23. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
  24. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
  25. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
  26. compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
  27. compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
  28. compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
  29. compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
  30. compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
  31. compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
  32. compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
  33. compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
  34. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
  35. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
  36. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
  37. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
  38. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
  39. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
  40. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
  41. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
  42. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
  43. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
  44. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
  45. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
  46. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
  47. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
  48. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
  49. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
  50. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
  51. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
  52. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
  53. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
  54. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
  55. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
  56. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
  57. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
  58. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
  59. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
  60. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
  61. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
  62. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
  63. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
  64. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
  65. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
  66. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
  67. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
  68. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
  69. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
  70. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
  71. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
  72. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
  73. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
  74. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
  75. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
  76. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
  77. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
  78. compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
  79. {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/METADATA +1 -1
  80. {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/RECORD +83 -5
  81. {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/LICENSE +0 -0
  82. {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/WHEEL +0 -0
  83. {semantic_compressor-2.1.dist-info → semantic_compressor-2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,54 @@
1
+
2
+ milena
3
+ tomáš
4
+ oznámila
5
+ podle
6
+ my
7
+ vyplývá
8
+ hlavní
9
+ jelikož
10
+ musíme
11
+ kdyby
12
+ foto
13
+ rozptylové
14
+ snad
15
+ zároveň
16
+ jaroslav
17
+ po
18
+ v
19
+ kromě
20
+ pokud
21
+ toto
22
+ jenže
23
+ oba
24
+ jak
25
+ zatímco
26
+ ten
27
+ myslím
28
+ navíc
29
+ dušan
30
+ zdá
31
+ dnes
32
+ přesto
33
+ tato
34
+ ti
35
+ bratislava
36
+ ale
37
+ když
38
+ nicméně
39
+ tento
40
+ mirka
41
+ přitom
42
+ dokud
43
+ jan
44
+ bohužel
45
+ ta
46
+ díky
47
+ prohlásil
48
+ praha
49
+ jestliže
50
+ jde
51
+ vždyť
52
+ moskva
53
+ proto
54
+ to
@@ -0,0 +1,211 @@
1
+ t
2
+ tlf
3
+ b.p
4
+ evt
5
+ j.h
6
+ lenz
7
+ mht
8
+ gl
9
+ bl
10
+ stud.polit
11
+ e.j
12
+ st
13
+ o
14
+ dec
15
+ mag
16
+ h.b
17
+ p
18
+ adm
19
+ el.lign
20
+ e.s
21
+ saalba
22
+ styrt
23
+ nr
24
+ m.a.s.h
25
+ etc
26
+ pharm
27
+ hg
28
+ j.j
29
+ dj
30
+ mountainb
31
+ f.kr
32
+ h.r
33
+ cand.jur
34
+ sp
35
+ osv
36
+ s.g
37
+ ndr
38
+ inc
39
+ b.i.g
40
+ dk-sver
41
+ sl
42
+ v.s.o.d
43
+ cand.mag
44
+ d.v.s
45
+ v.i
46
+ bøddel
47
+ fr
48
+ ø«
49
+ dr.phil
50
+ chr
51
+ p.d
52
+ bj
53
+ fhv
54
+ tilskudsforhold
55
+ m.a
56
+ sek
57
+ p.g.a
58
+ int
59
+ pokalf
60
+ ik
61
+ dir
62
+ em-lodtrækn
63
+ a.h
64
+ o.lign
65
+ p.t
66
+ m.v
67
+ n.j
68
+ m.h.t
69
+ m.m
70
+ a.p
71
+ pers
72
+ 4-bakketurn
73
+ dr.med
74
+ w.ø
75
+ polit
76
+ fremsættes
77
+ techn
78
+ tidl
79
+ o.g
80
+ i.c.i
81
+ mill
82
+ skt
83
+ m.fl
84
+ cand.merc
85
+ kbh
86
+ indiv
87
+ stk
88
+ dk-maked
89
+ memorandum
90
+ mestersk
91
+ mag.art
92
+ kitzb
93
+ h
94
+ lic
95
+ fig
96
+ dressurst
97
+ sportsg
98
+ r.e.m
99
+ d.u.m
100
+ sct
101
+ kld
102
+ bl.a
103
+ hf
104
+ g.a
105
+ corp
106
+ w
107
+ konk
108
+ zoeterm
109
+ b.t
110
+ a.d
111
+ l.b
112
+ jf
113
+ s.b
114
+ kgl
115
+ ill
116
+ beck
117
+ tosset
118
+ afd
119
+ johs
120
+ pct
121
+ k.b
122
+ sv
123
+ verbalt
124
+ kgs
125
+ l.m.k
126
+ j.l
127
+ aus
128
+ superl
129
+ t.v
130
+ mia
131
+ kr
132
+ pr
133
+ præmien
134
+ j.b.s
135
+ j.o
136
+ o.s.v
137
+ edb-oplysninger
138
+ o.m.a
139
+ ca
140
+ 1b
141
+ f.eks
142
+ rens
143
+ ch
144
+ mr
145
+ schw
146
+ d.c
147
+ utraditionelt
148
+ idrætsgym
149
+ hhv
150
+ e.l
151
+ s.s
152
+ eks
153
+ f.o.m
154
+ dk-storbrit
155
+ dk-jugo
156
+ n.z
157
+ derivater
158
+ c
159
+ pt
160
+ vm-kval
161
+ kl
162
+ hr
163
+ cand
164
+ jur
165
+ sav
166
+ h.c
167
+ arab.-danm
168
+ d.a.d
169
+ fl
170
+ o.a
171
+ a.s
172
+ cand.polit
173
+ grundejerform
174
+ j
175
+ faglærte
176
+ cr
177
+ a.a
178
+ mou
179
+ f.r.i
180
+ årh
181
+ o.m.m
182
+ sve
183
+ c.a
184
+ engl
185
+ sikkerhedssystemerne
186
+ m.f
187
+ j.k
188
+ phil
189
+ f
190
+ vet
191
+ mio
192
+ k.e
193
+ m.k
194
+ atla
195
+ idrætsg
196
+ n.n
197
+ 4-bakketur
198
+ dvs
199
+ sdr
200
+ s.j
201
+ hol
202
+ s.h
203
+ pei
204
+ kbhvn
205
+ aa
206
+ m.g.i
207
+ fvt
208
+
209
+ b.c
210
+ th
211
+ lrs
@@ -0,0 +1,101 @@
1
+ ##number## skak
2
+ ##number## speedway
3
+ ##number## rally
4
+ ##number## april
5
+ ##number## dm-fin
6
+ ##number## viceformand
7
+ m jensen
8
+ ##number## kano/kajak
9
+ ##number## bowling
10
+ ##number## dm-finale
11
+ ##number## årh.
12
+ ##number## januar
13
+ ##number## august
14
+ ##number## marathon
15
+ ##number## kamp
16
+ ##number## skihop
17
+ ##number## etage
18
+ ##number## tennis
19
+ ##number## cykling
20
+ e andersen
21
+ ##number## december
22
+ g h.
23
+ ##number## neb
24
+ ##number## sektion
25
+ ##number## afd.
26
+ ##number## klasse
27
+ ##number## trampolin
28
+ ##number## bordtennis
29
+ ##number## formel
30
+ ##number## århundredes
31
+ ##number## dm-semifin
32
+ ##number## heks
33
+ ##number## taekwondo
34
+ ##number## galop
35
+ ##number## basketball
36
+ ##number## dm
37
+ m skræl
38
+ ##number## trav
39
+ ##number## provins
40
+ ##number## triathlon
41
+ k axel
42
+ ##number## rugby
43
+ s h.
44
+ ##number## klaverkoncert
45
+ a p.
46
+ e løgstrup
47
+ k telefax
48
+ ##number## gyldendal
49
+ ##number## fodbold
50
+ e rosenfeldt
51
+ ##number## oktober
52
+ k o.
53
+ ##number## september
54
+ ##number## dec.
55
+ ##number## juledag
56
+ ##number## badminton
57
+ ##number## sejlsport
58
+ ##number## håndbold
59
+ r førsund
60
+ e jørgensen
61
+ d ##number##
62
+ k e
63
+ ##number## alp.ski
64
+ ##number## judo
65
+ ##number## roning
66
+ ##number## november
67
+ ##number## atletik
68
+ ##number## århundrede
69
+ ##number## ridning
70
+ ##number## marts
71
+ m andersen
72
+ d roosevelt
73
+ ##number## brydning
74
+ s kr.
75
+ ##number## runde
76
+ ##number## division
77
+ ##number## sal
78
+ ##number## boksning
79
+ ##number## minut
80
+ ##number## golf
81
+ ##number## juni
82
+ ##number## symfoni
83
+ ##number## hurtigløb
84
+ k jørgensen
85
+ ##number## jörgen
86
+ ##number## klasses
87
+ e jacobsen
88
+ k jensen
89
+ ##number## februar
90
+ k nielsen
91
+ ##number## volleyball
92
+ ##number## maj
93
+ ##number## verdenskrig
94
+ ##number## juli
95
+ ##number## ishockey
96
+ ##number## kunstskøjteløb
97
+ b jørgensen
98
+ ##number## gymnastik
99
+ ##number## svømning
100
+ ##number## tw
101
+ i pedersens