semantic-compressor 2.1__py3-none-any.whl → 2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. compressor/resources/nltk_data/tokenizers/punkt_tab/README +98 -0
  2. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/abbrev_types.txt +118 -0
  3. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/collocations.tab +96 -0
  4. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/ortho_context.tab +52789 -0
  5. compressor/resources/nltk_data/tokenizers/punkt_tab/czech/sent_starters.txt +54 -0
  6. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/abbrev_types.txt +211 -0
  7. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/collocations.tab +101 -0
  8. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/ortho_context.tab +53913 -0
  9. compressor/resources/nltk_data/tokenizers/punkt_tab/danish/sent_starters.txt +64 -0
  10. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/abbrev_types.txt +99 -0
  11. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/collocations.tab +37 -0
  12. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/ortho_context.tab +32208 -0
  13. compressor/resources/nltk_data/tokenizers/punkt_tab/dutch/sent_starters.txt +54 -0
  14. compressor/resources/nltk_data/tokenizers/punkt_tab/english/abbrev_types.txt +156 -0
  15. compressor/resources/nltk_data/tokenizers/punkt_tab/english/collocations.tab +37 -0
  16. compressor/resources/nltk_data/tokenizers/punkt_tab/english/ortho_context.tab +20366 -0
  17. compressor/resources/nltk_data/tokenizers/punkt_tab/english/sent_starters.txt +39 -0
  18. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/abbrev_types.txt +48 -0
  19. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/collocations.tab +100 -0
  20. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/ortho_context.tab +68544 -0
  21. compressor/resources/nltk_data/tokenizers/punkt_tab/estonian/sent_starters.txt +89 -0
  22. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/abbrev_types.txt +81 -0
  23. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/collocations.tab +167 -0
  24. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/ortho_context.tab +79765 -0
  25. compressor/resources/nltk_data/tokenizers/punkt_tab/finnish/sent_starters.txt +86 -0
  26. compressor/resources/nltk_data/tokenizers/punkt_tab/french/abbrev_types.txt +61 -0
  27. compressor/resources/nltk_data/tokenizers/punkt_tab/french/collocations.tab +18 -0
  28. compressor/resources/nltk_data/tokenizers/punkt_tab/french/ortho_context.tab +26726 -0
  29. compressor/resources/nltk_data/tokenizers/punkt_tab/french/sent_starters.txt +48 -0
  30. compressor/resources/nltk_data/tokenizers/punkt_tab/german/abbrev_types.txt +71 -0
  31. compressor/resources/nltk_data/tokenizers/punkt_tab/german/collocations.tab +28 -0
  32. compressor/resources/nltk_data/tokenizers/punkt_tab/german/ortho_context.tab +60260 -0
  33. compressor/resources/nltk_data/tokenizers/punkt_tab/german/sent_starters.txt +107 -0
  34. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/abbrev_types.txt +100 -0
  35. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/collocations.tab +7 -0
  36. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/ortho_context.tab +29624 -0
  37. compressor/resources/nltk_data/tokenizers/punkt_tab/greek/sent_starters.txt +54 -0
  38. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/abbrev_types.txt +125 -0
  39. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/collocations.tab +6 -0
  40. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/ortho_context.tab +29929 -0
  41. compressor/resources/nltk_data/tokenizers/punkt_tab/italian/sent_starters.txt +40 -0
  42. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/abbrev_types.txt +285 -0
  43. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/collocations.tab +153 -0
  44. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/ortho_context.tab +10520 -0
  45. compressor/resources/nltk_data/tokenizers/punkt_tab/malayalam/sent_starters.txt +14 -0
  46. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/abbrev_types.txt +106 -0
  47. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/collocations.tab +54 -0
  48. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/ortho_context.tab +54125 -0
  49. compressor/resources/nltk_data/tokenizers/punkt_tab/norwegian/sent_starters.txt +63 -0
  50. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/abbrev_types.txt +225 -0
  51. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/collocations.tab +57 -0
  52. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/ortho_context.tab +81425 -0
  53. compressor/resources/nltk_data/tokenizers/punkt_tab/polish/sent_starters.txt +71 -0
  54. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/abbrev_types.txt +72 -0
  55. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/collocations.tab +5 -0
  56. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/ortho_context.tab +30167 -0
  57. compressor/resources/nltk_data/tokenizers/punkt_tab/portuguese/sent_starters.txt +40 -0
  58. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/abbrev_types.txt +1989 -0
  59. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/collocations.tab +0 -0
  60. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/ortho_context.tab +1 -0
  61. compressor/resources/nltk_data/tokenizers/punkt_tab/russian/sent_starters.txt +0 -0
  62. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/abbrev_types.txt +73 -0
  63. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/collocations.tab +74 -0
  64. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/ortho_context.tab +35434 -0
  65. compressor/resources/nltk_data/tokenizers/punkt_tab/slovene/sent_starters.txt +58 -0
  66. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/abbrev_types.txt +66 -0
  67. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/collocations.tab +7 -0
  68. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/ortho_context.tab +27443 -0
  69. compressor/resources/nltk_data/tokenizers/punkt_tab/spanish/sent_starters.txt +46 -0
  70. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/abbrev_types.txt +39 -0
  71. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/collocations.tab +8 -0
  72. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/ortho_context.tab +44485 -0
  73. compressor/resources/nltk_data/tokenizers/punkt_tab/swedish/sent_starters.txt +49 -0
  74. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/abbrev_types.txt +67 -0
  75. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/collocations.tab +14 -0
  76. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/ortho_context.tab +45926 -0
  77. compressor/resources/nltk_data/tokenizers/punkt_tab/turkish/sent_starters.txt +87 -0
  78. compressor/resources/nltk_data/tokenizers/punkt_tab.zip +0 -0
  79. compressor/semantic.py +37 -3
  80. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/METADATA +1 -1
  81. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/RECORD +84 -6
  82. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/LICENSE +0 -0
  83. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/WHEEL +0 -0
  84. {semantic_compressor-2.1.dist-info → semantic_compressor-2.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,54 @@
1
+ het
2
+ daardoor
3
+ de
4
+ er
5
+ hoewel
6
+ wat
7
+ urlings
8
+ na
9
+ ze
10
+ alleen
11
+ dat
12
+ ik
13
+ pijls
14
+ wie
15
+ daarna
16
+ foto
17
+ als
18
+ boer
19
+ hammes
20
+ verder
21
+ ook
22
+ evers
23
+ vandaar
24
+ toen
25
+ we
26
+ langenberg
27
+ naast
28
+ want
29
+ in
30
+ wij
31
+ zo
32
+ hendrikx
33
+ daar
34
+ crouzen
35
+ dit
36
+ daarnaast
37
+ anp
38
+ zij
39
+ behalve
40
+ waarom
41
+ daarom
42
+ bovendien
43
+ hij
44
+ daarbij
45
+ nee
46
+ volgens
47
+ daarmee
48
+ bukkems
49
+ dvnl
50
+ eén
51
+ pas
52
+ tijdens
53
+ vooral
54
+ maar
@@ -0,0 +1,156 @@
1
+ ct
2
+ m.j
3
+ t
4
+ a.c
5
+ n.h
6
+ ms
7
+ p.a.m
8
+ dr
9
+ pa
10
+ p.m
11
+ u.k
12
+ st
13
+ dec
14
+ u.s.a
15
+ lt
16
+ g.k
17
+ adm
18
+ p
19
+ h.m
20
+ ga
21
+ tenn
22
+ yr
23
+ sen
24
+ n.c
25
+ j.j
26
+ d.h
27
+ s.g
28
+ inc
29
+ vs
30
+ s.p.a
31
+ a.t
32
+ n
33
+ feb
34
+ sr
35
+ jan
36
+ s.a.y
37
+ n.y
38
+ col
39
+ g.f
40
+ c.o.m.b
41
+ d
42
+ ft
43
+ va
44
+ r.k
45
+ e.f
46
+ chg
47
+ r.i
48
+ a.g
49
+ minn
50
+ a.h
51
+ k
52
+ n.j
53
+ m
54
+ l.f
55
+ f.j
56
+ gen
57
+ i.m.s
58
+ s.a
59
+ aug
60
+ j.p
61
+ okla
62
+ m.d.c
63
+ ltd
64
+ oct
65
+ s
66
+ vt
67
+ r.a
68
+ j.c
69
+ ariz
70
+ w.w
71
+ b.v
72
+ ore
73
+ h
74
+ w.r
75
+ e.h
76
+ mrs
77
+ cie
78
+ corp
79
+ w
80
+ n.v
81
+ a.d
82
+ r.j
83
+ ok
84
+ . .
85
+ e.m
86
+ w.c
87
+ ill
88
+ nov
89
+ u.s
90
+ prof
91
+ conn
92
+ u.s.s.r
93
+ mg
94
+ f.g
95
+ ph.d
96
+ g
97
+ calif
98
+ messrs
99
+ h.f
100
+ wash
101
+ tues
102
+ sw
103
+ bros
104
+ u.n
105
+ l
106
+ wis
107
+ mr
108
+ sep
109
+ d.c
110
+ ave
111
+ e.l
112
+ co
113
+ s.s
114
+ reps
115
+ c
116
+ r.t
117
+ h.c
118
+ r
119
+ wed
120
+ a.s
121
+ v
122
+ fla
123
+ jr
124
+ r.h
125
+ c.v
126
+ m.b.a
127
+ rep
128
+ a.a
129
+ e
130
+ c.i.t
131
+ l.a
132
+ b.f
133
+ j.b
134
+ d.w
135
+ j.k
136
+ ala
137
+ f
138
+ w.va
139
+ sept
140
+ mich
141
+ n.m
142
+ j.r
143
+ l.p
144
+ s.c
145
+ colo
146
+ fri
147
+ a.m
148
+ g.d
149
+ kan
150
+ maj
151
+ ky
152
+ a.m.e
153
+ n.d
154
+ t.j
155
+ cos
156
+ nev
@@ -0,0 +1,37 @@
1
+ ##number## international
2
+ ##number## rj
3
+ ##number## commodities
4
+ ##number## cooper
5
+ b stewart
6
+ ##number## genentech
7
+ ##number## wedgestone
8
+ i toussie
9
+ ##number## pepper
10
+ j fialka
11
+ o ludcke
12
+ ##number## insider
13
+ ##number## aes
14
+ i magnin
15
+ ##number## credit
16
+ ##number## corrections
17
+ ##number## financing
18
+ ##number## henley
19
+ ##number## business
20
+ ##number## pay-fone
21
+ b wigton
22
+ b edelman
23
+ b levine
24
+ ##number## leisure
25
+ b smith
26
+ j walter
27
+ ##number## pegasus
28
+ ##number## dividend
29
+ j aron
30
+ ##number## review
31
+ ##number## abreast
32
+ ##number## who
33
+ ##number## letters
34
+ ##number## colgate
35
+ ##number## cbot
36
+ ##number## notable
37
+ ##number## zimmer