semantic-compressor 2.1__py3-none-any.whl → 2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
  2. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
  3. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
  4. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
  5. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
  6. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
  7. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
  8. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
  9. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
  10. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
  11. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
  12. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
  13. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
  14. compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
  15. compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
  16. compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
  17. compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
  18. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
  19. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
  20. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
  21. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
  22. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
  23. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
  24. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
  25. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
  26. compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
  27. compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
  28. compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
  29. compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
  30. compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
  31. compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
  32. compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
  33. compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
  34. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
  35. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
  36. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
  37. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
  38. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
  39. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
  40. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
  41. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
  42. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
  43. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
  44. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
  45. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
  46. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
  47. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
  48. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
  49. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
  50. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
  51. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
  52. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
  53. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
  54. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
  55. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
  56. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
  57. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
  58. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
  59. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
  60. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
  61. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
  62. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
  63. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
  64. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
  65. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
  66. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
  67. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
  68. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
  69. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
  70. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
  71. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
  72. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
  73. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
  74. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
  75. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
  76. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
  77. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
  78. compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
  79. compressor/semantic.py +37 -3
  80. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/METADATA +1 -1
  81. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/RECORD +84 -6
  82. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/LICENSE +0 -0
  83. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/WHEEL +0 -0
  84. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,63 @@
1
+ de
2
+ først
3
+ selv
4
+ vi
5
+ derfor
6
+ dessutan
7
+ njvi
8
+ dette
9
+ politiet
10
+ nji
11
+ njde
12
+ hensikten
13
+ totalt
14
+ njnår
15
+ ingen
16
+ mannen
17
+
18
+ anmeldt
19
+ foto
20
+ begge
21
+ difor
22
+ njdet
23
+ ifølge
24
+
25
+ arkiv
26
+ når
27
+ det
28
+
29
+ no
30
+ deretter
31
+ samtidig
32
+ side
33
+ men
34
+ både
35
+ njden
36
+ dersom
37
+ dessverre
38
+ årsaken
39
+ da
40
+ njmen
41
+ dessuten
42
+ denne
43
+ her
44
+ ho
45
+ hvis
46
+ tekst
47
+ problemet
48
+ dermed
49
+ etter
50
+ jeg
51
+ den
52
+ kanskje
53
+ ørdag
54
+ tanken
55
+ foruten
56
+ resultatet
57
+ njjeg
58
+ dei
59
+ ntb
60
+ nkjeg
61
+ hun
62
+ eg
63
+ han
@@ -0,0 +1,225 @@
1
+ t
2
+ w.m
3
+ o.o
4
+ n.p.m
5
+ podst
6
+ a.c
7
+ paszportów
8
+ sn
9
+ ms
10
+ j.u
11
+ st
12
+ szydercy-realisty
13
+ k.p
14
+ lit
15
+ h.b
16
+ ad.4
17
+ św
18
+ dz
19
+ pd
20
+ rys
21
+ e.s
22
+ 2e
23
+ h.s
24
+ a.w
25
+ etc
26
+ p.s
27
+ hab
28
+ rz
29
+ dn
30
+ wikipedii
31
+ itp
32
+ l.w
33
+ dyr
34
+ hrubieszowie
35
+ 8.i
36
+ k.m.b
37
+ m.st
38
+ kaszb
39
+ b.b
40
+ m.o
41
+ inc
42
+ n
43
+ szt
44
+ 7.d
45
+ przejaśnieniami
46
+ godz
47
+ j.ch
48
+ n.e
49
+ głęb
50
+ r.c
51
+ inż
52
+ o.p
53
+ z.d
54
+ episkopat
55
+ c.o
56
+ tzw
57
+ ew
58
+ c.s
59
+ habeo
60
+ ł.g
61
+ e.cz
62
+ ub
63
+ a.k.m
64
+ m.a
65
+ wf
66
+ 9.i
67
+ kpt
68
+ g3
69
+ szer
70
+ d.k.p.c
71
+ sz
72
+ 2d
73
+ 9.b
74
+ pocz
75
+ zw
76
+ b
77
+ adminem
78
+ k
79
+ łac
80
+ ryc
81
+ tad
82
+ tab
83
+ gen
84
+ p.g
85
+ r.m
86
+ j.w
87
+ d.o.c
88
+ a.l
89
+ 1971r
90
+ s.a
91
+ xiw
92
+ art
93
+ k.p.c
94
+ s
95
+ ds
96
+ 21.1.—18.2
97
+ j.c
98
+ w.w
99
+ h
100
+ af
101
+ płn
102
+ lic
103
+ s.k
104
+ tzn
105
+ j.a
106
+ s.o.s
107
+ z.z
108
+ ad.5
109
+ pn
110
+ cyt
111
+ kw
112
+ woj
113
+ red
114
+ ang
115
+ 2b
116
+ ok
117
+ sześc
118
+ ul
119
+ pow
120
+ str
121
+ a.ch
122
+ strawy
123
+ w.c
124
+ śr
125
+ bie
126
+ zm
127
+ nt
128
+
129
+ br
130
+ tow
131
+ u.s
132
+ prof
133
+ zach
134
+ ś.p
135
+ por
136
+ op
137
+ ł.s
138
+ agd
139
+ g
140
+ os
141
+ proc
142
+ reż
143
+ n.t
144
+ wsch
145
+ bm
146
+ dol
147
+ s3
148
+ j.s
149
+ p.n.e
150
+ b.r
151
+ błędem
152
+ 1b
153
+ l
154
+ m.t
155
+ h.p
156
+ mr
157
+ zn
158
+ 1d
159
+ ur
160
+ g.w
161
+ 9x
162
+ doc
163
+ 7.i
164
+ p.j
165
+ tys
166
+ offenbacha
167
+ k.i
168
+ mkw
169
+ pt
170
+ gq
171
+ s.t
172
+ kl
173
+ fot
174
+ k.k
175
+ a.k
176
+ w.h
177
+
178
+ r
179
+ mec
180
+ poł
181
+ m.in
182
+ śp
183
+ o.n
184
+ a.s
185
+ maks
186
+ mk
187
+ j
188
+ p.e.n
189
+ ub.r
190
+ cz
191
+ pw
192
+ 13.i
193
+ ks
194
+ np
195
+ pl
196
+ m.d
197
+ l.r
198
+ v.f
199
+ k.i.g
200
+ odl
201
+ poz
202
+ p.o
203
+ tj
204
+ j.k
205
+ f2
206
+ obw
207
+ f
208
+ cenowo
209
+ 2c
210
+ małg
211
+ m.k
212
+ jez
213
+ 7.b
214
+ s.c
215
+ ż
216
+ nm
217
+ luf
218
+ 3d
219
+ hm
220
+ m.b
221
+ 4a
222
+ inf
223
+ dh
224
+
225
+ th
@@ -0,0 +1,57 @@
1
+ ##number## jedność
2
+ ##number## strefę
3
+ ##number## przywrócenie
4
+ ##number## przyjmowane
5
+ ##number## minucie
6
+ ##number## funkcjonariuszowi
7
+ ##number## podaj
8
+ ##number## organ
9
+ ##number## przedsiębiorca
10
+ ##number## schematy
11
+ ##number## konferencja
12
+ ##number## likwidatorzy
13
+ m in
14
+ ##number## wojewódzka
15
+ ##number## organizację
16
+ ##number## ilekroć
17
+ ##number## członek
18
+ ##number## przełożeni
19
+ ##number## wydanie
20
+ ##number## zaliczka
21
+ c wiatr
22
+ ##number## kapitał
23
+ ##number## instruktor
24
+ ##number## ubezpieczenie
25
+ ##number## rejestr
26
+ ##number## ilu
27
+ m położ
28
+ ##number## państwowa
29
+ ##number## przyjrzyj
30
+ ##number## ujawnienie
31
+ ##number## rozpoznawanie
32
+ ##number## wdrażanie
33
+ ##number## min
34
+ ##number## rocznicy
35
+ ##number## osoba
36
+ ##number## kociarz
37
+ ##number## przewody
38
+ ##number## przepis
39
+ ##number## kompetencje
40
+ ##number## zamawiający
41
+ a leppera
42
+ ##number## edycji
43
+ ##number## upoważnia
44
+ ##number## naczelna
45
+ ##number## patrz
46
+ ##number## przepisu
47
+ u nr
48
+ ##number## pułku
49
+ ##number## ugodę
50
+ ##number## minister
51
+ ##number## poł.
52
+ ##number## medytacja
53
+ ##number## fundusz
54
+ ##number## 30godz
55
+ w n.e.
56
+ ##number## producentem
57
+ ##number## wysokość