xapian-fu 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/README.rdoc +2 -1
  3. data/lib/xapian_fu/stopper_factory.rb +1 -4
  4. data/lib/xapian_fu/stopwords/af.txt +51 -0
  5. data/lib/xapian_fu/stopwords/ar.txt +480 -0
  6. data/lib/xapian_fu/stopwords/bg.txt +259 -0
  7. data/lib/xapian_fu/stopwords/bn.txt +398 -0
  8. data/lib/xapian_fu/stopwords/br.txt +1203 -0
  9. data/lib/xapian_fu/stopwords/ca.txt +278 -0
  10. data/lib/xapian_fu/stopwords/cs.txt +423 -0
  11. data/lib/xapian_fu/stopwords/da.txt +170 -0
  12. data/lib/xapian_fu/stopwords/danish.txt +1 -0
  13. data/lib/xapian_fu/stopwords/de.txt +620 -0
  14. data/lib/xapian_fu/stopwords/dutch.txt +1 -0
  15. data/lib/xapian_fu/stopwords/el.txt +847 -0
  16. data/lib/xapian_fu/stopwords/en.txt +1298 -0
  17. data/lib/xapian_fu/stopwords/english.txt +1 -0
  18. data/lib/xapian_fu/stopwords/eo.txt +173 -0
  19. data/lib/xapian_fu/stopwords/es.txt +732 -0
  20. data/lib/xapian_fu/stopwords/et.txt +35 -0
  21. data/lib/xapian_fu/stopwords/eu.txt +98 -0
  22. data/lib/xapian_fu/stopwords/fa.txt +799 -0
  23. data/lib/xapian_fu/stopwords/fi.txt +847 -0
  24. data/lib/xapian_fu/stopwords/finnish.txt +1 -0
  25. data/lib/xapian_fu/stopwords/fr.txt +691 -0
  26. data/lib/xapian_fu/stopwords/french.txt +1 -0
  27. data/lib/xapian_fu/stopwords/ga.txt +109 -0
  28. data/lib/xapian_fu/stopwords/german.txt +1 -0
  29. data/lib/xapian_fu/stopwords/gl.txt +160 -0
  30. data/lib/xapian_fu/stopwords/gu.txt +224 -0
  31. data/lib/xapian_fu/stopwords/ha.txt +39 -0
  32. data/lib/xapian_fu/stopwords/he.txt +194 -0
  33. data/lib/xapian_fu/stopwords/hi.txt +225 -0
  34. data/lib/xapian_fu/stopwords/hr.txt +179 -0
  35. data/lib/xapian_fu/stopwords/hu.txt +789 -0
  36. data/lib/xapian_fu/stopwords/hungarian.txt +1 -0
  37. data/lib/xapian_fu/stopwords/hy.txt +45 -0
  38. data/lib/xapian_fu/stopwords/id.txt +758 -0
  39. data/lib/xapian_fu/stopwords/it.txt +632 -0
  40. data/lib/xapian_fu/stopwords/italian.txt +1 -0
  41. data/lib/xapian_fu/stopwords/ja.txt +134 -0
  42. data/lib/xapian_fu/stopwords/ko.txt +679 -0
  43. data/lib/xapian_fu/stopwords/ku.txt +62 -0
  44. data/lib/xapian_fu/stopwords/la.txt +49 -0
  45. data/lib/xapian_fu/stopwords/lt.txt +474 -0
  46. data/lib/xapian_fu/stopwords/lv.txt +161 -0
  47. data/lib/xapian_fu/stopwords/mr.txt +99 -0
  48. data/lib/xapian_fu/stopwords/ms.txt +475 -0
  49. data/lib/xapian_fu/stopwords/nl.txt +413 -0
  50. data/lib/xapian_fu/stopwords/no.txt +221 -0
  51. data/lib/xapian_fu/stopwords/norwegian.txt +1 -0
  52. data/lib/xapian_fu/stopwords/pl.txt +329 -0
  53. data/lib/xapian_fu/stopwords/portuguese.txt +1 -0
  54. data/lib/xapian_fu/stopwords/pt.txt +560 -0
  55. data/lib/xapian_fu/stopwords/ro.txt +434 -0
  56. data/lib/xapian_fu/stopwords/ru.txt +559 -0
  57. data/lib/xapian_fu/stopwords/russian.txt +1 -0
  58. data/lib/xapian_fu/stopwords/sk.txt +418 -0
  59. data/lib/xapian_fu/stopwords/sl.txt +446 -0
  60. data/lib/xapian_fu/stopwords/so.txt +30 -0
  61. data/lib/xapian_fu/stopwords/spanish.txt +1 -0
  62. data/lib/xapian_fu/stopwords/st.txt +31 -0
  63. data/lib/xapian_fu/stopwords/sv.txt +418 -0
  64. data/lib/xapian_fu/stopwords/sw.txt +74 -0
  65. data/lib/xapian_fu/stopwords/swedish.txt +1 -0
  66. data/lib/xapian_fu/stopwords/th.txt +116 -0
  67. data/lib/xapian_fu/stopwords/tl.txt +147 -0
  68. data/lib/xapian_fu/stopwords/tr.txt +504 -0
  69. data/lib/xapian_fu/stopwords/uk.txt +73 -0
  70. data/lib/xapian_fu/stopwords/update.rb +10 -3
  71. data/lib/xapian_fu/stopwords/ur.txt +517 -0
  72. data/lib/xapian_fu/stopwords/vi.txt +645 -0
  73. data/lib/xapian_fu/stopwords/yo.txt +60 -0
  74. data/lib/xapian_fu/stopwords/zh.txt +794 -0
  75. data/lib/xapian_fu/stopwords/zu.txt +29 -0
  76. data/lib/xapian_fu/version.rb +1 -1
  77. data/lib/xapian_fu/xapian_db.rb +3 -0
  78. data/spec/xapian_doc_spec.rb +1 -1
  79. metadata +109 -51
  80. data/lib/xapian_fu/stopwords/danish.txt +0 -102
  81. data/lib/xapian_fu/stopwords/dutch.txt +0 -113
  82. data/lib/xapian_fu/stopwords/english.txt +0 -312
  83. data/lib/xapian_fu/stopwords/finnish.txt +0 -89
  84. data/lib/xapian_fu/stopwords/french.txt +0 -168
  85. data/lib/xapian_fu/stopwords/german.txt +0 -286
  86. data/lib/xapian_fu/stopwords/hungarian.txt +0 -203
  87. data/lib/xapian_fu/stopwords/italian.txt +0 -295
  88. data/lib/xapian_fu/stopwords/norwegian.txt +0 -186
  89. data/lib/xapian_fu/stopwords/portuguese.txt +0 -245
  90. data/lib/xapian_fu/stopwords/russian.txt +0 -236
  91. data/lib/xapian_fu/stopwords/spanish.txt +0 -348
  92. data/lib/xapian_fu/stopwords/swedish.txt +0 -125
@@ -1,312 +0,0 @@
1
-
2
- | An English stop word list. Comments begin with vertical bar. Each stop
3
- | word is at the start of a line.
4
-
5
- | Many of the forms below are quite rare (e.g. "yourselves") but included for
6
- | completeness.
7
-
8
- | PRONOUNS FORMS
9
- | 1st person sing
10
-
11
- i | subject, always in upper case of course
12
-
13
- me | object
14
- my | possessive adjective
15
- | the possessive pronoun `mine' is best suppressed, because of the
16
- | sense of coal-mine etc.
17
- myself | reflexive
18
- | 1st person plural
19
- we | subject
20
-
21
- | us | object
22
- | care is required here because US = United States. It is usually
23
- | safe to remove it if it is in lower case.
24
- our | possessive adjective
25
- ours | possessive pronoun
26
- ourselves | reflexive
27
- | second person (archaic `thou' forms not included)
28
- you | subject and object
29
- your | possessive adjective
30
- yours | possessive pronoun
31
- yourself | reflexive (singular)
32
- yourselves | reflexive (plural)
33
- | third person singular
34
- he | subject
35
- him | object
36
- his | possessive adjective and pronoun
37
- himself | reflexive
38
-
39
- she | subject
40
- her | object and possessive adjective
41
- hers | possessive pronoun
42
- herself | reflexive
43
-
44
- it | subject and object
45
- its | possessive adjective
46
- itself | reflexive
47
- | third person plural
48
- they | subject
49
- them | object
50
- their | possessive adjective
51
- theirs | possessive pronoun
52
- themselves | reflexive
53
- | other forms (demonstratives, interrogatives)
54
- what
55
- which
56
- who
57
- whom
58
- this
59
- that
60
- these
61
- those
62
-
63
- | VERB FORMS (using F.R. Palmer's nomenclature)
64
- | BE
65
- am | 1st person, present
66
- is | -s form (3rd person, present)
67
- are | present
68
- was | 1st person, past
69
- were | past
70
- be | infinitive
71
- been | past participle
72
- being | -ing form
73
- | HAVE
74
- have | simple
75
- has | -s form
76
- had | past
77
- having | -ing form
78
- | DO
79
- do | simple
80
- does | -s form
81
- did | past
82
- doing | -ing form
83
-
84
- | The forms below are, I believe, best omitted, because of the significant
85
- | homonym forms:
86
-
87
- | He made a WILL
88
- | old tin CAN
89
- | merry month of MAY
90
- | a smell of MUST
91
- | fight the good fight with all thy MIGHT
92
-
93
- | would, could, should, ought might however be included
94
-
95
- | | AUXILIARIES
96
- | | WILL
97
- |will
98
-
99
- would
100
-
101
- | | SHALL
102
- |shall
103
-
104
- should
105
-
106
- | | CAN
107
- |can
108
-
109
- could
110
-
111
- | | MAY
112
- |may
113
- |might
114
- | | MUST
115
- |must
116
- | | OUGHT
117
-
118
- ought
119
-
120
- | COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing
121
- | pronoun + verb
122
-
123
- i'm
124
- you're
125
- he's
126
- she's
127
- it's
128
- we're
129
- they're
130
- i've
131
- you've
132
- we've
133
- they've
134
- i'd
135
- you'd
136
- he'd
137
- she'd
138
- we'd
139
- they'd
140
- i'll
141
- you'll
142
- he'll
143
- she'll
144
- we'll
145
- they'll
146
-
147
- | verb + negation
148
-
149
- isn't
150
- aren't
151
- wasn't
152
- weren't
153
- hasn't
154
- haven't
155
- hadn't
156
- doesn't
157
- don't
158
- didn't
159
-
160
- | auxiliary + negation
161
-
162
- won't
163
- wouldn't
164
- shan't
165
- shouldn't
166
- can't
167
- cannot
168
- couldn't
169
- mustn't
170
-
171
- | miscellaneous forms
172
-
173
- let's
174
- that's
175
- who's
176
- what's
177
- here's
178
- there's
179
- when's
180
- where's
181
- why's
182
- how's
183
-
184
- | rarer forms
185
-
186
- | daren't needn't
187
-
188
- | doubtful forms
189
-
190
- | oughtn't mightn't
191
-
192
- | ARTICLES
193
- a
194
- an
195
- the
196
-
197
- | THE REST (Overlap among prepositions, conjunctions, adverbs etc is so
198
- | high, that classification is pointless.)
199
- and
200
- but
201
- if
202
- or
203
- because
204
- as
205
- until
206
- while
207
-
208
- of
209
- at
210
- by
211
- for
212
- with
213
- about
214
- against
215
- between
216
- into
217
- through
218
- during
219
- before
220
- after
221
- above
222
- below
223
- to
224
- from
225
- up
226
- down
227
- in
228
- out
229
- on
230
- off
231
- over
232
- under
233
-
234
- again
235
- further
236
- then
237
- once
238
-
239
- here
240
- there
241
- when
242
- where
243
- why
244
- how
245
-
246
- all
247
- any
248
- both
249
- each
250
- few
251
- more
252
- most
253
- other
254
- some
255
- such
256
-
257
- no
258
- nor
259
- not
260
- only
261
- own
262
- same
263
- so
264
- than
265
- too
266
- very
267
-
268
- | Just for the record, the following words are among the commonest in English
269
-
270
- | one
271
- | every
272
- | least
273
- | less
274
- | many
275
- | now
276
- | ever
277
- | never
278
- | say
279
- | says
280
- | said
281
- | also
282
- | get
283
- | go
284
- | goes
285
- | just
286
- | made
287
- | make
288
- | put
289
- | see
290
- | seen
291
- | whether
292
- | like
293
- | well
294
- | back
295
- | even
296
- | still
297
- | way
298
- | take
299
- | since
300
- | another
301
- | however
302
- | two
303
- | three
304
- | four
305
- | five
306
- | first
307
- | second
308
- | new
309
- | old
310
- | high
311
- | long
312
-
@@ -1,89 +0,0 @@
1
-
2
- | forms of BE
3
-
4
- olla
5
- olen
6
- olet
7
- on
8
- olemme
9
- olette
10
- ovat
11
- ole | negative form
12
-
13
- oli
14
- olisi
15
- olisit
16
- olisin
17
- olisimme
18
- olisitte
19
- olisivat
20
- olit
21
- olin
22
- olimme
23
- olitte
24
- olivat
25
- ollut
26
- olleet
27
-
28
- en | negation
29
- et
30
- ei
31
- emme
32
- ette
33
- eivät
34
-
35
- |Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
36
- minä minun minut minua minussa minusta minuun minulla minulta minulle | I
37
- sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
38
- hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
39
- me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
40
- te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
41
- he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
42
-
43
- tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
44
- tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
45
- se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
46
- nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
47
- nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
48
- ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
49
-
50
- kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
51
- ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
52
- mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
53
- mitkä | (pl)
54
-
55
- joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
56
- jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
57
-
58
- | conjunctions
59
-
60
- että | that
61
- ja | and
62
- jos | if
63
- koska | because
64
- kuin | than
65
- mutta | but
66
- niin | so
67
- sekä | and
68
- sillä | for
69
- tai | or
70
- vaan | but
71
- vai | or
72
- vaikka | although
73
-
74
-
75
- | prepositions
76
-
77
- kanssa | with
78
- mukaan | according to
79
- noin | about
80
- poikki | across
81
- yli | over, across
82
-
83
- | other
84
-
85
- kun | when
86
- niin | so
87
- nyt | now
88
- itse | self
89
-
@@ -1,168 +0,0 @@
1
-
2
- | A French stop word list. Comments begin with vertical bar. Each stop
3
- | word is at the start of a line.
4
-
5
- au | a + le
6
- aux | a + les
7
- avec | with
8
- ce | this
9
- ces | these
10
- dans | with
11
- de | of
12
- des | de + les
13
- du | de + le
14
- elle | she
15
- en | `of them' etc
16
- et | and
17
- eux | them
18
- il | he
19
- je | I
20
- la | the
21
- le | the
22
- leur | their
23
- lui | him
24
- ma | my (fem)
25
- mais | but
26
- me | me
27
- même | same; as in moi-même (myself) etc
28
- mes | me (pl)
29
- moi | me
30
- mon | my (masc)
31
- ne | not
32
- nos | our (pl)
33
- notre | our
34
- nous | we
35
- on | one
36
- ou | where
37
- par | by
38
- pas | not
39
- pour | for
40
- qu | que before vowel
41
- que | that
42
- qui | who
43
- sa | his, her (fem)
44
- se | oneself
45
- ses | his (pl)
46
- son | his, her (masc)
47
- sur | on
48
- ta | thy (fem)
49
- te | thee
50
- tes | thy (pl)
51
- toi | thee
52
- ton | thy (masc)
53
- tu | thou
54
- un | a
55
- une | a
56
- vos | your (pl)
57
- votre | your
58
- vous | you
59
-
60
- | single letter forms
61
-
62
- c | c'
63
- d | d'
64
- j | j'
65
- l | l'
66
- à | to, at
67
- m | m'
68
- n | n'
69
- s | s'
70
- t | t'
71
- y | there
72
-
73
- | forms of être (not including the infinitive):
74
- été
75
- étée
76
- étées
77
- étés
78
- étant
79
- étante
80
- étants
81
- étantes
82
- suis
83
- es
84
- est
85
- sommes
86
- êtes
87
- sont
88
- serai
89
- seras
90
- sera
91
- serons
92
- serez
93
- seront
94
- serais
95
- serait
96
- serions
97
- seriez
98
- seraient
99
- étais
100
- était
101
- étions
102
- étiez
103
- étaient
104
- fus
105
- fut
106
- fûmes
107
- fûtes
108
- furent
109
- sois
110
- soit
111
- soyons
112
- soyez
113
- soient
114
- fusse
115
- fusses
116
- fût
117
- fussions
118
- fussiez
119
- fussent
120
-
121
- | forms of avoir (not including the infinitive):
122
- ayant
123
- ayante
124
- ayantes
125
- ayants
126
- eu
127
- eue
128
- eues
129
- eus
130
- ai
131
- as
132
- avons
133
- avez
134
- ont
135
- aurai
136
- auras
137
- aura
138
- aurons
139
- aurez
140
- auront
141
- aurais
142
- aurait
143
- aurions
144
- auriez
145
- auraient
146
- avais
147
- avait
148
- avions
149
- aviez
150
- avaient
151
- eut
152
- eûmes
153
- eûtes
154
- eurent
155
- aie
156
- aies
157
- ait
158
- ayons
159
- ayez
160
- aient
161
- eusse
162
- eusses
163
- eût
164
- eussions
165
- eussiez
166
- eussent
167
-
168
-