rere 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.classpath +260 -0
- data/.gitignore +28 -0
- data/.project +14 -0
- data/.ruby-version +1 -0
- data/.settings/org.eclim.prefs +3 -0
- data/.settings/org.eclipse.jdt.core.prefs +5 -0
- data/.settings/org.eclipse.jdt.ui.prefs +2 -0
- data/Gemfile +7 -0
- data/LICENSE +22 -0
- data/README.md +44 -0
- data/Rakefile +65 -0
- data/bin/solr +61 -0
- data/example/config/solr.yml +23 -0
- data/example/log/.gitkeep +0 -0
- data/example/solr/README.txt +63 -0
- data/example/solr/collection1/README.txt +50 -0
- data/example/solr/collection1/conf/admin-extra.html +24 -0
- data/example/solr/collection1/conf/admin-extra.menu-bottom.html +25 -0
- data/example/solr/collection1/conf/admin-extra.menu-top.html +25 -0
- data/example/solr/collection1/conf/currency.xml +67 -0
- data/example/solr/collection1/conf/elevate.xml +38 -0
- data/example/solr/collection1/conf/lang/contractions_ca.txt +8 -0
- data/example/solr/collection1/conf/lang/contractions_fr.txt +15 -0
- data/example/solr/collection1/conf/lang/contractions_ga.txt +5 -0
- data/example/solr/collection1/conf/lang/contractions_it.txt +23 -0
- data/example/solr/collection1/conf/lang/hyphenations_ga.txt +5 -0
- data/example/solr/collection1/conf/lang/stemdict_nl.txt +6 -0
- data/example/solr/collection1/conf/lang/stoptags_ja.txt +420 -0
- data/example/solr/collection1/conf/lang/stopwords_ar.txt +125 -0
- data/example/solr/collection1/conf/lang/stopwords_bg.txt +193 -0
- data/example/solr/collection1/conf/lang/stopwords_ca.txt +220 -0
- data/example/solr/collection1/conf/lang/stopwords_cz.txt +172 -0
- data/example/solr/collection1/conf/lang/stopwords_da.txt +108 -0
- data/example/solr/collection1/conf/lang/stopwords_de.txt +292 -0
- data/example/solr/collection1/conf/lang/stopwords_el.txt +78 -0
- data/example/solr/collection1/conf/lang/stopwords_en.txt +54 -0
- data/example/solr/collection1/conf/lang/stopwords_es.txt +354 -0
- data/example/solr/collection1/conf/lang/stopwords_eu.txt +99 -0
- data/example/solr/collection1/conf/lang/stopwords_fa.txt +313 -0
- data/example/solr/collection1/conf/lang/stopwords_fi.txt +95 -0
- data/example/solr/collection1/conf/lang/stopwords_fr.txt +184 -0
- data/example/solr/collection1/conf/lang/stopwords_ga.txt +110 -0
- data/example/solr/collection1/conf/lang/stopwords_gl.txt +161 -0
- data/example/solr/collection1/conf/lang/stopwords_hi.txt +235 -0
- data/example/solr/collection1/conf/lang/stopwords_hu.txt +209 -0
- data/example/solr/collection1/conf/lang/stopwords_hy.txt +46 -0
- data/example/solr/collection1/conf/lang/stopwords_id.txt +359 -0
- data/example/solr/collection1/conf/lang/stopwords_it.txt +301 -0
- data/example/solr/collection1/conf/lang/stopwords_ja.txt +127 -0
- data/example/solr/collection1/conf/lang/stopwords_lv.txt +172 -0
- data/example/solr/collection1/conf/lang/stopwords_nl.txt +117 -0
- data/example/solr/collection1/conf/lang/stopwords_no.txt +192 -0
- data/example/solr/collection1/conf/lang/stopwords_pt.txt +251 -0
- data/example/solr/collection1/conf/lang/stopwords_ro.txt +233 -0
- data/example/solr/collection1/conf/lang/stopwords_ru.txt +241 -0
- data/example/solr/collection1/conf/lang/stopwords_sv.txt +131 -0
- data/example/solr/collection1/conf/lang/stopwords_th.txt +119 -0
- data/example/solr/collection1/conf/lang/stopwords_tr.txt +212 -0
- data/example/solr/collection1/conf/lang/userdict_ja.txt +29 -0
- data/example/solr/collection1/conf/mapping-FoldToASCII.txt +3813 -0
- data/example/solr/collection1/conf/mapping-ISOLatin1Accent.txt +246 -0
- data/example/solr/collection1/conf/protwords.txt +21 -0
- data/example/solr/collection1/conf/schema.xml +1125 -0
- data/example/solr/collection1/conf/scripts.conf +24 -0
- data/example/solr/collection1/conf/solrconfig.xml +1816 -0
- data/example/solr/collection1/conf/spellings.txt +2 -0
- data/example/solr/collection1/conf/stopwords.txt +14 -0
- data/example/solr/collection1/conf/synonyms.txt +29 -0
- data/example/solr/collection1/conf/update-script.js +53 -0
- data/example/solr/collection1/conf/velocity/VM_global_library.vm +170 -0
- data/example/solr/collection1/conf/velocity/browse.vm +50 -0
- data/example/solr/collection1/conf/velocity/cluster.vm +9 -0
- data/example/solr/collection1/conf/velocity/clusterResults.vm +12 -0
- data/example/solr/collection1/conf/velocity/debug.vm +17 -0
- data/example/solr/collection1/conf/velocity/did_you_mean.vm +4 -0
- data/example/solr/collection1/conf/velocity/facet_fields.vm +15 -0
- data/example/solr/collection1/conf/velocity/facet_pivot.vm +3 -0
- data/example/solr/collection1/conf/velocity/facet_queries.vm +3 -0
- data/example/solr/collection1/conf/velocity/facet_ranges.vm +15 -0
- data/example/solr/collection1/conf/velocity/facets.vm +5 -0
- data/example/solr/collection1/conf/velocity/footer.vm +17 -0
- data/example/solr/collection1/conf/velocity/head.vm +32 -0
- data/example/solr/collection1/conf/velocity/header.vm +3 -0
- data/example/solr/collection1/conf/velocity/hit.vm +11 -0
- data/example/solr/collection1/conf/velocity/hitGrouped.vm +24 -0
- data/example/solr/collection1/conf/velocity/join-doc.vm +4 -0
- data/example/solr/collection1/conf/velocity/jquery.autocomplete.css +48 -0
- data/example/solr/collection1/conf/velocity/jquery.autocomplete.js +763 -0
- data/example/solr/collection1/conf/velocity/layout.vm +20 -0
- data/example/solr/collection1/conf/velocity/main.css +208 -0
- data/example/solr/collection1/conf/velocity/product-doc.vm +27 -0
- data/example/solr/collection1/conf/velocity/query.vm +42 -0
- data/example/solr/collection1/conf/velocity/queryGroup.vm +19 -0
- data/example/solr/collection1/conf/velocity/querySpatial.vm +40 -0
- data/example/solr/collection1/conf/velocity/richtext-doc.vm +114 -0
- data/example/solr/collection1/conf/velocity/suggest.vm +3 -0
- data/example/solr/collection1/conf/velocity/tabs.vm +6 -0
- data/example/solr/collection1/conf/xslt/example.xsl +132 -0
- data/example/solr/collection1/conf/xslt/example_atom.xsl +67 -0
- data/example/solr/collection1/conf/xslt/example_rss.xsl +66 -0
- data/example/solr/collection1/conf/xslt/luke.xsl +337 -0
- data/example/solr/collection1/conf/xslt/updateXml.xsl +70 -0
- data/example/solr/collection1/data/index/segments.gen +0 -0
- data/example/solr/collection1/data/index/segments_1 +0 -0
- data/example/solr/data/development/index/segments.gen +0 -0
- data/example/solr/data/development/index/segments_1 +0 -0
- data/example/solr/solr.xml +53 -0
- data/example/solr/zoo.cfg +17 -0
- data/lib/rere.rb +66 -0
- data/lib/rere/railtie.rb +8 -0
- data/lib/rere/server.rb +380 -0
- data/lib/rere/tasks/solr.rake +47 -0
- data/lib/rere/version.rb +3 -0
- data/pom.xml +168 -0
- data/rere.gemspec +26 -0
- data/server/README.txt +78 -0
- data/server/cloud-scripts/zkcli.bat +12 -0
- data/server/cloud-scripts/zkcli.sh +14 -0
- data/server/contexts/solr-jetty-context.xml +8 -0
- data/server/etc/create-solrtest.keystore.sh +37 -0
- data/server/etc/jetty.xml +205 -0
- data/server/etc/logging.properties +38 -0
- data/server/etc/solrtest.keystore +0 -0
- data/server/etc/webdefault.xml +527 -0
- data/server/exampledocs/books.csv +11 -0
- data/server/exampledocs/books.json +51 -0
- data/server/exampledocs/gb18030-example.xml +32 -0
- data/server/exampledocs/hd.xml +56 -0
- data/server/exampledocs/ipod_other.xml +60 -0
- data/server/exampledocs/ipod_video.xml +40 -0
- data/server/exampledocs/manufacturers.xml +75 -0
- data/server/exampledocs/mem.xml +77 -0
- data/server/exampledocs/money.xml +65 -0
- data/server/exampledocs/monitor.xml +35 -0
- data/server/exampledocs/monitor2.xml +34 -0
- data/server/exampledocs/mp500.xml +43 -0
- data/server/exampledocs/post.jar +0 -0
- data/server/exampledocs/post.sh +30 -0
- data/server/exampledocs/sd500.xml +38 -0
- data/server/exampledocs/solr.xml +38 -0
- data/server/exampledocs/test_utf8.sh +93 -0
- data/server/exampledocs/utf8-example.xml +42 -0
- data/server/exampledocs/vidcard.xml +62 -0
- data/server/lib/ext/jcl-over-slf4j-1.6.6.jar +0 -0
- data/server/lib/ext/jul-to-slf4j-1.6.6.jar +0 -0
- data/server/lib/ext/log4j-1.2.16.jar +0 -0
- data/server/lib/ext/slf4j-api-1.6.6.jar +0 -0
- data/server/lib/ext/slf4j-log4j12-1.6.6.jar +0 -0
- data/server/lib/jetty-continuation-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-deploy-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-http-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-io-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-jmx-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-security-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-server-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-servlet-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-util-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-webapp-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-xml-8.1.8.v20121106.jar +0 -0
- data/server/lib/servlet-api-3.0.jar +0 -0
- data/server/resources/log4j.properties +19 -0
- data/server/solr/README.txt +63 -0
- data/server/solr/solr.xml +53 -0
- data/server/solr/zoo.cfg +17 -0
- data/server/start.jar +0 -0
- data/server/webapps/solr.war +0 -0
- data/solr/lib/solr-winds-0.1.jar +0 -0
- metadata +284 -0
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
| From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
|
|
2
|
+
| This file is distributed under the BSD License.
|
|
3
|
+
| See http://snowball.tartarus.org/license.php
|
|
4
|
+
| Also see http://www.opensource.org/licenses/bsd-license.html
|
|
5
|
+
| - Encoding was converted to UTF-8.
|
|
6
|
+
| - This notice was added.
|
|
7
|
+
|
|
8
|
+
| An Italian stop word list. Comments begin with vertical bar. Each stop
|
|
9
|
+
| word is at the start of a line.
|
|
10
|
+
|
|
11
|
+
ad | a (to) before vowel
|
|
12
|
+
al | a + il
|
|
13
|
+
allo | a + lo
|
|
14
|
+
ai | a + i
|
|
15
|
+
agli | a + gli
|
|
16
|
+
all | a + l'
|
|
17
|
+
agl | a + gl'
|
|
18
|
+
alla | a + la
|
|
19
|
+
alle | a + le
|
|
20
|
+
con | with
|
|
21
|
+
col | con + il
|
|
22
|
+
coi | con + i (forms collo, cogli etc are now very rare)
|
|
23
|
+
da | from
|
|
24
|
+
dal | da + il
|
|
25
|
+
dallo | da + lo
|
|
26
|
+
dai | da + i
|
|
27
|
+
dagli | da + gli
|
|
28
|
+
dall | da + l'
|
|
29
|
+
dagl | da + gll'
|
|
30
|
+
dalla | da + la
|
|
31
|
+
dalle | da + le
|
|
32
|
+
di | of
|
|
33
|
+
del | di + il
|
|
34
|
+
dello | di + lo
|
|
35
|
+
dei | di + i
|
|
36
|
+
degli | di + gli
|
|
37
|
+
dell | di + l'
|
|
38
|
+
degl | di + gl'
|
|
39
|
+
della | di + la
|
|
40
|
+
delle | di + le
|
|
41
|
+
in | in
|
|
42
|
+
nel | in + el
|
|
43
|
+
nello | in + lo
|
|
44
|
+
nei | in + i
|
|
45
|
+
negli | in + gli
|
|
46
|
+
nell | in + l'
|
|
47
|
+
negl | in + gl'
|
|
48
|
+
nella | in + la
|
|
49
|
+
nelle | in + le
|
|
50
|
+
su | on
|
|
51
|
+
sul | su + il
|
|
52
|
+
sullo | su + lo
|
|
53
|
+
sui | su + i
|
|
54
|
+
sugli | su + gli
|
|
55
|
+
sull | su + l'
|
|
56
|
+
sugl | su + gl'
|
|
57
|
+
sulla | su + la
|
|
58
|
+
sulle | su + le
|
|
59
|
+
per | through, by
|
|
60
|
+
tra | among
|
|
61
|
+
contro | against
|
|
62
|
+
io | I
|
|
63
|
+
tu | thou
|
|
64
|
+
lui | he
|
|
65
|
+
lei | she
|
|
66
|
+
noi | we
|
|
67
|
+
voi | you
|
|
68
|
+
loro | they
|
|
69
|
+
mio | my
|
|
70
|
+
mia |
|
|
71
|
+
miei |
|
|
72
|
+
mie |
|
|
73
|
+
tuo |
|
|
74
|
+
tua |
|
|
75
|
+
tuoi | thy
|
|
76
|
+
tue |
|
|
77
|
+
suo |
|
|
78
|
+
sua |
|
|
79
|
+
suoi | his, her
|
|
80
|
+
sue |
|
|
81
|
+
nostro | our
|
|
82
|
+
nostra |
|
|
83
|
+
nostri |
|
|
84
|
+
nostre |
|
|
85
|
+
vostro | your
|
|
86
|
+
vostra |
|
|
87
|
+
vostri |
|
|
88
|
+
vostre |
|
|
89
|
+
mi | me
|
|
90
|
+
ti | thee
|
|
91
|
+
ci | us, there
|
|
92
|
+
vi | you, there
|
|
93
|
+
lo | him, the
|
|
94
|
+
la | her, the
|
|
95
|
+
li | them
|
|
96
|
+
le | them, the
|
|
97
|
+
gli | to him, the
|
|
98
|
+
ne | from there etc
|
|
99
|
+
il | the
|
|
100
|
+
un | a
|
|
101
|
+
uno | a
|
|
102
|
+
una | a
|
|
103
|
+
ma | but
|
|
104
|
+
ed | and
|
|
105
|
+
se | if
|
|
106
|
+
perché | why, because
|
|
107
|
+
anche | also
|
|
108
|
+
come | how
|
|
109
|
+
dov | where (as dov')
|
|
110
|
+
dove | where
|
|
111
|
+
che | who, that
|
|
112
|
+
chi | who
|
|
113
|
+
cui | whom
|
|
114
|
+
non | not
|
|
115
|
+
più | more
|
|
116
|
+
quale | who, that
|
|
117
|
+
quanto | how much
|
|
118
|
+
quanti |
|
|
119
|
+
quanta |
|
|
120
|
+
quante |
|
|
121
|
+
quello | that
|
|
122
|
+
quelli |
|
|
123
|
+
quella |
|
|
124
|
+
quelle |
|
|
125
|
+
questo | this
|
|
126
|
+
questi |
|
|
127
|
+
questa |
|
|
128
|
+
queste |
|
|
129
|
+
si | yes
|
|
130
|
+
tutto | all
|
|
131
|
+
tutti | all
|
|
132
|
+
|
|
133
|
+
| single letter forms:
|
|
134
|
+
|
|
135
|
+
a | at
|
|
136
|
+
c | as c' for ce or ci
|
|
137
|
+
e | and
|
|
138
|
+
i | the
|
|
139
|
+
l | as l'
|
|
140
|
+
o | or
|
|
141
|
+
|
|
142
|
+
| forms of avere, to have (not including the infinitive):
|
|
143
|
+
|
|
144
|
+
ho
|
|
145
|
+
hai
|
|
146
|
+
ha
|
|
147
|
+
abbiamo
|
|
148
|
+
avete
|
|
149
|
+
hanno
|
|
150
|
+
abbia
|
|
151
|
+
abbiate
|
|
152
|
+
abbiano
|
|
153
|
+
avrò
|
|
154
|
+
avrai
|
|
155
|
+
avrà
|
|
156
|
+
avremo
|
|
157
|
+
avrete
|
|
158
|
+
avranno
|
|
159
|
+
avrei
|
|
160
|
+
avresti
|
|
161
|
+
avrebbe
|
|
162
|
+
avremmo
|
|
163
|
+
avreste
|
|
164
|
+
avrebbero
|
|
165
|
+
avevo
|
|
166
|
+
avevi
|
|
167
|
+
aveva
|
|
168
|
+
avevamo
|
|
169
|
+
avevate
|
|
170
|
+
avevano
|
|
171
|
+
ebbi
|
|
172
|
+
avesti
|
|
173
|
+
ebbe
|
|
174
|
+
avemmo
|
|
175
|
+
aveste
|
|
176
|
+
ebbero
|
|
177
|
+
avessi
|
|
178
|
+
avesse
|
|
179
|
+
avessimo
|
|
180
|
+
avessero
|
|
181
|
+
avendo
|
|
182
|
+
avuto
|
|
183
|
+
avuta
|
|
184
|
+
avuti
|
|
185
|
+
avute
|
|
186
|
+
|
|
187
|
+
| forms of essere, to be (not including the infinitive):
|
|
188
|
+
sono
|
|
189
|
+
sei
|
|
190
|
+
è
|
|
191
|
+
siamo
|
|
192
|
+
siete
|
|
193
|
+
sia
|
|
194
|
+
siate
|
|
195
|
+
siano
|
|
196
|
+
sarò
|
|
197
|
+
sarai
|
|
198
|
+
sarà
|
|
199
|
+
saremo
|
|
200
|
+
sarete
|
|
201
|
+
saranno
|
|
202
|
+
sarei
|
|
203
|
+
saresti
|
|
204
|
+
sarebbe
|
|
205
|
+
saremmo
|
|
206
|
+
sareste
|
|
207
|
+
sarebbero
|
|
208
|
+
ero
|
|
209
|
+
eri
|
|
210
|
+
era
|
|
211
|
+
eravamo
|
|
212
|
+
eravate
|
|
213
|
+
erano
|
|
214
|
+
fui
|
|
215
|
+
fosti
|
|
216
|
+
fu
|
|
217
|
+
fummo
|
|
218
|
+
foste
|
|
219
|
+
furono
|
|
220
|
+
fossi
|
|
221
|
+
fosse
|
|
222
|
+
fossimo
|
|
223
|
+
fossero
|
|
224
|
+
essendo
|
|
225
|
+
|
|
226
|
+
| forms of fare, to do (not including the infinitive, fa, fat-):
|
|
227
|
+
faccio
|
|
228
|
+
fai
|
|
229
|
+
facciamo
|
|
230
|
+
fanno
|
|
231
|
+
faccia
|
|
232
|
+
facciate
|
|
233
|
+
facciano
|
|
234
|
+
farò
|
|
235
|
+
farai
|
|
236
|
+
farà
|
|
237
|
+
faremo
|
|
238
|
+
farete
|
|
239
|
+
faranno
|
|
240
|
+
farei
|
|
241
|
+
faresti
|
|
242
|
+
farebbe
|
|
243
|
+
faremmo
|
|
244
|
+
fareste
|
|
245
|
+
farebbero
|
|
246
|
+
facevo
|
|
247
|
+
facevi
|
|
248
|
+
faceva
|
|
249
|
+
facevamo
|
|
250
|
+
facevate
|
|
251
|
+
facevano
|
|
252
|
+
feci
|
|
253
|
+
facesti
|
|
254
|
+
fece
|
|
255
|
+
facemmo
|
|
256
|
+
faceste
|
|
257
|
+
fecero
|
|
258
|
+
facessi
|
|
259
|
+
facesse
|
|
260
|
+
facessimo
|
|
261
|
+
facessero
|
|
262
|
+
facendo
|
|
263
|
+
|
|
264
|
+
| forms of stare, to be (not including the infinitive):
|
|
265
|
+
sto
|
|
266
|
+
stai
|
|
267
|
+
sta
|
|
268
|
+
stiamo
|
|
269
|
+
stanno
|
|
270
|
+
stia
|
|
271
|
+
stiate
|
|
272
|
+
stiano
|
|
273
|
+
starò
|
|
274
|
+
starai
|
|
275
|
+
starà
|
|
276
|
+
staremo
|
|
277
|
+
starete
|
|
278
|
+
staranno
|
|
279
|
+
starei
|
|
280
|
+
staresti
|
|
281
|
+
starebbe
|
|
282
|
+
staremmo
|
|
283
|
+
stareste
|
|
284
|
+
starebbero
|
|
285
|
+
stavo
|
|
286
|
+
stavi
|
|
287
|
+
stava
|
|
288
|
+
stavamo
|
|
289
|
+
stavate
|
|
290
|
+
stavano
|
|
291
|
+
stetti
|
|
292
|
+
stesti
|
|
293
|
+
stette
|
|
294
|
+
stemmo
|
|
295
|
+
steste
|
|
296
|
+
stettero
|
|
297
|
+
stessi
|
|
298
|
+
stesse
|
|
299
|
+
stessimo
|
|
300
|
+
stessero
|
|
301
|
+
stando
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
#
|
|
2
|
+
# This file defines a stopword set for Japanese.
|
|
3
|
+
#
|
|
4
|
+
# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
|
|
5
|
+
# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
|
|
6
|
+
# for frequency lists, etc. that can be useful for making your own set (if desired)
|
|
7
|
+
#
|
|
8
|
+
# Note that there is an overlap between these stopwords and the terms stopped when used
|
|
9
|
+
# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
|
|
10
|
+
# that comments are not allowed on the same line as stopwords.
|
|
11
|
+
#
|
|
12
|
+
# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
|
|
13
|
+
# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
|
|
14
|
+
# using the same character width as the entries in this file. Since this StopFilter is
|
|
15
|
+
# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
|
|
16
|
+
# entries to be in half-width and your kana entries to be in full-width.
|
|
17
|
+
#
|
|
18
|
+
の
|
|
19
|
+
に
|
|
20
|
+
は
|
|
21
|
+
を
|
|
22
|
+
た
|
|
23
|
+
が
|
|
24
|
+
で
|
|
25
|
+
て
|
|
26
|
+
と
|
|
27
|
+
し
|
|
28
|
+
れ
|
|
29
|
+
さ
|
|
30
|
+
ある
|
|
31
|
+
いる
|
|
32
|
+
も
|
|
33
|
+
する
|
|
34
|
+
から
|
|
35
|
+
な
|
|
36
|
+
こと
|
|
37
|
+
として
|
|
38
|
+
い
|
|
39
|
+
や
|
|
40
|
+
れる
|
|
41
|
+
など
|
|
42
|
+
なっ
|
|
43
|
+
ない
|
|
44
|
+
この
|
|
45
|
+
ため
|
|
46
|
+
その
|
|
47
|
+
あっ
|
|
48
|
+
よう
|
|
49
|
+
また
|
|
50
|
+
もの
|
|
51
|
+
という
|
|
52
|
+
あり
|
|
53
|
+
まで
|
|
54
|
+
られ
|
|
55
|
+
なる
|
|
56
|
+
へ
|
|
57
|
+
か
|
|
58
|
+
だ
|
|
59
|
+
これ
|
|
60
|
+
によって
|
|
61
|
+
により
|
|
62
|
+
おり
|
|
63
|
+
より
|
|
64
|
+
による
|
|
65
|
+
ず
|
|
66
|
+
なり
|
|
67
|
+
られる
|
|
68
|
+
において
|
|
69
|
+
ば
|
|
70
|
+
なかっ
|
|
71
|
+
なく
|
|
72
|
+
しかし
|
|
73
|
+
について
|
|
74
|
+
せ
|
|
75
|
+
だっ
|
|
76
|
+
その後
|
|
77
|
+
できる
|
|
78
|
+
それ
|
|
79
|
+
う
|
|
80
|
+
ので
|
|
81
|
+
なお
|
|
82
|
+
のみ
|
|
83
|
+
でき
|
|
84
|
+
き
|
|
85
|
+
つ
|
|
86
|
+
における
|
|
87
|
+
および
|
|
88
|
+
いう
|
|
89
|
+
さらに
|
|
90
|
+
でも
|
|
91
|
+
ら
|
|
92
|
+
たり
|
|
93
|
+
その他
|
|
94
|
+
に関する
|
|
95
|
+
たち
|
|
96
|
+
ます
|
|
97
|
+
ん
|
|
98
|
+
なら
|
|
99
|
+
に対して
|
|
100
|
+
特に
|
|
101
|
+
せる
|
|
102
|
+
及び
|
|
103
|
+
これら
|
|
104
|
+
とき
|
|
105
|
+
では
|
|
106
|
+
にて
|
|
107
|
+
ほか
|
|
108
|
+
ながら
|
|
109
|
+
うち
|
|
110
|
+
そして
|
|
111
|
+
とともに
|
|
112
|
+
ただし
|
|
113
|
+
かつて
|
|
114
|
+
それぞれ
|
|
115
|
+
または
|
|
116
|
+
お
|
|
117
|
+
ほど
|
|
118
|
+
ものの
|
|
119
|
+
に対する
|
|
120
|
+
ほとんど
|
|
121
|
+
と共に
|
|
122
|
+
といった
|
|
123
|
+
です
|
|
124
|
+
とも
|
|
125
|
+
ところ
|
|
126
|
+
ここ
|
|
127
|
+
##### End of file
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
|
|
2
|
+
# the original list of over 800 forms was refined:
|
|
3
|
+
# pronouns, adverbs, interjections were removed
|
|
4
|
+
#
|
|
5
|
+
# prepositions
|
|
6
|
+
aiz
|
|
7
|
+
ap
|
|
8
|
+
ar
|
|
9
|
+
apakš
|
|
10
|
+
ārpus
|
|
11
|
+
augšpus
|
|
12
|
+
bez
|
|
13
|
+
caur
|
|
14
|
+
dēļ
|
|
15
|
+
gar
|
|
16
|
+
iekš
|
|
17
|
+
iz
|
|
18
|
+
kopš
|
|
19
|
+
labad
|
|
20
|
+
lejpus
|
|
21
|
+
līdz
|
|
22
|
+
no
|
|
23
|
+
otrpus
|
|
24
|
+
pa
|
|
25
|
+
par
|
|
26
|
+
pār
|
|
27
|
+
pēc
|
|
28
|
+
pie
|
|
29
|
+
pirms
|
|
30
|
+
pret
|
|
31
|
+
priekš
|
|
32
|
+
starp
|
|
33
|
+
šaipus
|
|
34
|
+
uz
|
|
35
|
+
viņpus
|
|
36
|
+
virs
|
|
37
|
+
virspus
|
|
38
|
+
zem
|
|
39
|
+
apakšpus
|
|
40
|
+
# Conjunctions
|
|
41
|
+
un
|
|
42
|
+
bet
|
|
43
|
+
jo
|
|
44
|
+
ja
|
|
45
|
+
ka
|
|
46
|
+
lai
|
|
47
|
+
tomēr
|
|
48
|
+
tikko
|
|
49
|
+
turpretī
|
|
50
|
+
arī
|
|
51
|
+
kaut
|
|
52
|
+
gan
|
|
53
|
+
tādēļ
|
|
54
|
+
tā
|
|
55
|
+
ne
|
|
56
|
+
tikvien
|
|
57
|
+
vien
|
|
58
|
+
kā
|
|
59
|
+
ir
|
|
60
|
+
te
|
|
61
|
+
vai
|
|
62
|
+
kamēr
|
|
63
|
+
# Particles
|
|
64
|
+
ar
|
|
65
|
+
diezin
|
|
66
|
+
droši
|
|
67
|
+
diemžēl
|
|
68
|
+
nebūt
|
|
69
|
+
ik
|
|
70
|
+
it
|
|
71
|
+
taču
|
|
72
|
+
nu
|
|
73
|
+
pat
|
|
74
|
+
tiklab
|
|
75
|
+
iekšpus
|
|
76
|
+
nedz
|
|
77
|
+
tik
|
|
78
|
+
nevis
|
|
79
|
+
turpretim
|
|
80
|
+
jeb
|
|
81
|
+
iekam
|
|
82
|
+
iekām
|
|
83
|
+
iekāms
|
|
84
|
+
kolīdz
|
|
85
|
+
līdzko
|
|
86
|
+
tiklīdz
|
|
87
|
+
jebšu
|
|
88
|
+
tālab
|
|
89
|
+
tāpēc
|
|
90
|
+
nekā
|
|
91
|
+
itin
|
|
92
|
+
jā
|
|
93
|
+
jau
|
|
94
|
+
jel
|
|
95
|
+
nē
|
|
96
|
+
nezin
|
|
97
|
+
tad
|
|
98
|
+
tikai
|
|
99
|
+
vis
|
|
100
|
+
tak
|
|
101
|
+
iekams
|
|
102
|
+
vien
|
|
103
|
+
# modal verbs
|
|
104
|
+
būt
|
|
105
|
+
biju
|
|
106
|
+
biji
|
|
107
|
+
bija
|
|
108
|
+
bijām
|
|
109
|
+
bijāt
|
|
110
|
+
esmu
|
|
111
|
+
esi
|
|
112
|
+
esam
|
|
113
|
+
esat
|
|
114
|
+
būšu
|
|
115
|
+
būsi
|
|
116
|
+
būs
|
|
117
|
+
būsim
|
|
118
|
+
būsiet
|
|
119
|
+
tikt
|
|
120
|
+
tiku
|
|
121
|
+
tiki
|
|
122
|
+
tika
|
|
123
|
+
tikām
|
|
124
|
+
tikāt
|
|
125
|
+
tieku
|
|
126
|
+
tiec
|
|
127
|
+
tiek
|
|
128
|
+
tiekam
|
|
129
|
+
tiekat
|
|
130
|
+
tikšu
|
|
131
|
+
tiks
|
|
132
|
+
tiksim
|
|
133
|
+
tiksiet
|
|
134
|
+
tapt
|
|
135
|
+
tapi
|
|
136
|
+
tapāt
|
|
137
|
+
topat
|
|
138
|
+
tapšu
|
|
139
|
+
tapsi
|
|
140
|
+
taps
|
|
141
|
+
tapsim
|
|
142
|
+
tapsiet
|
|
143
|
+
kļūt
|
|
144
|
+
kļuvu
|
|
145
|
+
kļuvi
|
|
146
|
+
kļuva
|
|
147
|
+
kļuvām
|
|
148
|
+
kļuvāt
|
|
149
|
+
kļūstu
|
|
150
|
+
kļūsti
|
|
151
|
+
kļūst
|
|
152
|
+
kļūstam
|
|
153
|
+
kļūstat
|
|
154
|
+
kļūšu
|
|
155
|
+
kļūsi
|
|
156
|
+
kļūs
|
|
157
|
+
kļūsim
|
|
158
|
+
kļūsiet
|
|
159
|
+
# verbs
|
|
160
|
+
varēt
|
|
161
|
+
varēju
|
|
162
|
+
varējām
|
|
163
|
+
varēšu
|
|
164
|
+
varēsim
|
|
165
|
+
var
|
|
166
|
+
varēji
|
|
167
|
+
varējāt
|
|
168
|
+
varēsi
|
|
169
|
+
varēsiet
|
|
170
|
+
varat
|
|
171
|
+
varēja
|
|
172
|
+
varēs
|