stemmers 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +13 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/Cargo.lock +547 -0
- data/Cargo.toml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +113 -0
- data/Rakefile +23 -0
- data/ext/stemmers/Cargo.toml +16 -0
- data/ext/stemmers/extconf.rb +6 -0
- data/ext/stemmers/src/lib.rs +105 -0
- data/lib/stemmers/stopwords/af.json +53 -0
- data/lib/stemmers/stopwords/ar.json +482 -0
- data/lib/stemmers/stopwords/bg.json +261 -0
- data/lib/stemmers/stopwords/bn.json +400 -0
- data/lib/stemmers/stopwords/br.json +1205 -0
- data/lib/stemmers/stopwords/ca.json +280 -0
- data/lib/stemmers/stopwords/cs.json +425 -0
- data/lib/stemmers/stopwords/da.json +172 -0
- data/lib/stemmers/stopwords/de.json +622 -0
- data/lib/stemmers/stopwords/el.json +849 -0
- data/lib/stemmers/stopwords/en.json +1300 -0
- data/lib/stemmers/stopwords/eo.json +175 -0
- data/lib/stemmers/stopwords/es.json +734 -0
- data/lib/stemmers/stopwords/et.json +37 -0
- data/lib/stemmers/stopwords/eu.json +100 -0
- data/lib/stemmers/stopwords/fa.json +801 -0
- data/lib/stemmers/stopwords/fi.json +849 -0
- data/lib/stemmers/stopwords/fr.json +693 -0
- data/lib/stemmers/stopwords/ga.json +111 -0
- data/lib/stemmers/stopwords/gl.json +162 -0
- data/lib/stemmers/stopwords/gu.json +226 -0
- data/lib/stemmers/stopwords/ha.json +41 -0
- data/lib/stemmers/stopwords/he.json +196 -0
- data/lib/stemmers/stopwords/hi.json +227 -0
- data/lib/stemmers/stopwords/hr.json +181 -0
- data/lib/stemmers/stopwords/hu.json +791 -0
- data/lib/stemmers/stopwords/hy.json +47 -0
- data/lib/stemmers/stopwords/id.json +760 -0
- data/lib/stemmers/stopwords/it.json +634 -0
- data/lib/stemmers/stopwords/ja.json +136 -0
- data/lib/stemmers/stopwords/ko.json +681 -0
- data/lib/stemmers/stopwords/ku.json +64 -0
- data/lib/stemmers/stopwords/la.json +51 -0
- data/lib/stemmers/stopwords/lt.json +476 -0
- data/lib/stemmers/stopwords/lv.json +163 -0
- data/lib/stemmers/stopwords/mr.json +101 -0
- data/lib/stemmers/stopwords/ms.json +477 -0
- data/lib/stemmers/stopwords/nl.json +415 -0
- data/lib/stemmers/stopwords/no.json +223 -0
- data/lib/stemmers/stopwords/pl.json +331 -0
- data/lib/stemmers/stopwords/pt.json +562 -0
- data/lib/stemmers/stopwords/ro.json +436 -0
- data/lib/stemmers/stopwords/ru.json +561 -0
- data/lib/stemmers/stopwords/sk.json +420 -0
- data/lib/stemmers/stopwords/sl.json +448 -0
- data/lib/stemmers/stopwords/so.json +32 -0
- data/lib/stemmers/stopwords/st.json +33 -0
- data/lib/stemmers/stopwords/sv.json +420 -0
- data/lib/stemmers/stopwords/sw.json +76 -0
- data/lib/stemmers/stopwords/th.json +118 -0
- data/lib/stemmers/stopwords/tl.json +149 -0
- data/lib/stemmers/stopwords/tr.json +506 -0
- data/lib/stemmers/stopwords/uk.json +75 -0
- data/lib/stemmers/stopwords/ur.json +519 -0
- data/lib/stemmers/stopwords/vi.json +647 -0
- data/lib/stemmers/stopwords/yo.json +62 -0
- data/lib/stemmers/stopwords/zh.json +796 -0
- data/lib/stemmers/stopwords/zu.json +31 -0
- data/lib/stemmers/version.rb +5 -0
- data/lib/stemmers.rb +91 -0
- data/sig/stemmers.rbs +4 -0
- metadata +131 -0
@@ -0,0 +1,477 @@
|
|
1
|
+
[
|
2
|
+
"abdul",
|
3
|
+
"abdullah",
|
4
|
+
"acara",
|
5
|
+
"ada",
|
6
|
+
"adalah",
|
7
|
+
"ahmad",
|
8
|
+
"air",
|
9
|
+
"akan",
|
10
|
+
"akhbar",
|
11
|
+
"akhir",
|
12
|
+
"aktiviti",
|
13
|
+
"alam",
|
14
|
+
"amat",
|
15
|
+
"amerika",
|
16
|
+
"anak",
|
17
|
+
"anggota",
|
18
|
+
"antara",
|
19
|
+
"antarabangsa",
|
20
|
+
"apa",
|
21
|
+
"apabila",
|
22
|
+
"april",
|
23
|
+
"as",
|
24
|
+
"asas",
|
25
|
+
"asean",
|
26
|
+
"asia",
|
27
|
+
"asing",
|
28
|
+
"atas",
|
29
|
+
"atau",
|
30
|
+
"australia",
|
31
|
+
"awal",
|
32
|
+
"awam",
|
33
|
+
"bagaimanapun",
|
34
|
+
"bagi",
|
35
|
+
"bahagian",
|
36
|
+
"bahan",
|
37
|
+
"baharu",
|
38
|
+
"bahawa",
|
39
|
+
"baik",
|
40
|
+
"bandar",
|
41
|
+
"bank",
|
42
|
+
"banyak",
|
43
|
+
"barangan",
|
44
|
+
"baru",
|
45
|
+
"baru-baru",
|
46
|
+
"bawah",
|
47
|
+
"beberapa",
|
48
|
+
"bekas",
|
49
|
+
"beliau",
|
50
|
+
"belum",
|
51
|
+
"berada",
|
52
|
+
"berakhir",
|
53
|
+
"berbanding",
|
54
|
+
"berdasarkan",
|
55
|
+
"berharap",
|
56
|
+
"berikutan",
|
57
|
+
"berjaya",
|
58
|
+
"berjumlah",
|
59
|
+
"berkaitan",
|
60
|
+
"berkata",
|
61
|
+
"berkenaan",
|
62
|
+
"berlaku",
|
63
|
+
"bermula",
|
64
|
+
"bernama",
|
65
|
+
"bernilai",
|
66
|
+
"bersama",
|
67
|
+
"berubah",
|
68
|
+
"besar",
|
69
|
+
"bhd",
|
70
|
+
"bidang",
|
71
|
+
"bilion",
|
72
|
+
"bn",
|
73
|
+
"boleh",
|
74
|
+
"bukan",
|
75
|
+
"bulan",
|
76
|
+
"bursa",
|
77
|
+
"cadangan",
|
78
|
+
"china",
|
79
|
+
"dagangan",
|
80
|
+
"dalam",
|
81
|
+
"dan",
|
82
|
+
"dana",
|
83
|
+
"dapat",
|
84
|
+
"dari",
|
85
|
+
"daripada",
|
86
|
+
"dasar",
|
87
|
+
"datang",
|
88
|
+
"datuk",
|
89
|
+
"demikian",
|
90
|
+
"dengan",
|
91
|
+
"depan",
|
92
|
+
"derivatives",
|
93
|
+
"dewan",
|
94
|
+
"di",
|
95
|
+
"diadakan",
|
96
|
+
"dibuka",
|
97
|
+
"dicatatkan",
|
98
|
+
"dijangka",
|
99
|
+
"diniagakan",
|
100
|
+
"dis",
|
101
|
+
"disember",
|
102
|
+
"ditutup",
|
103
|
+
"dolar",
|
104
|
+
"dr",
|
105
|
+
"dua",
|
106
|
+
"dunia",
|
107
|
+
"ekonomi",
|
108
|
+
"eksekutif",
|
109
|
+
"eksport",
|
110
|
+
"empat",
|
111
|
+
"enam",
|
112
|
+
"faedah",
|
113
|
+
"feb",
|
114
|
+
"global",
|
115
|
+
"hadapan",
|
116
|
+
"hanya",
|
117
|
+
"harga",
|
118
|
+
"hari",
|
119
|
+
"hasil",
|
120
|
+
"hingga",
|
121
|
+
"hubungan",
|
122
|
+
"ia",
|
123
|
+
"iaitu",
|
124
|
+
"ialah",
|
125
|
+
"indeks",
|
126
|
+
"india",
|
127
|
+
"indonesia",
|
128
|
+
"industri",
|
129
|
+
"ini",
|
130
|
+
"islam",
|
131
|
+
"isnin",
|
132
|
+
"isu",
|
133
|
+
"itu",
|
134
|
+
"jabatan",
|
135
|
+
"jalan",
|
136
|
+
"jan",
|
137
|
+
"jawatan",
|
138
|
+
"jawatankuasa",
|
139
|
+
"jepun",
|
140
|
+
"jika",
|
141
|
+
"jualan",
|
142
|
+
"juga",
|
143
|
+
"julai",
|
144
|
+
"jumaat",
|
145
|
+
"jumlah",
|
146
|
+
"jun",
|
147
|
+
"juta",
|
148
|
+
"kadar",
|
149
|
+
"kalangan",
|
150
|
+
"kali",
|
151
|
+
"kami",
|
152
|
+
"kata",
|
153
|
+
"katanya",
|
154
|
+
"kaunter",
|
155
|
+
"kawasan",
|
156
|
+
"ke",
|
157
|
+
"keadaan",
|
158
|
+
"kecil",
|
159
|
+
"kedua",
|
160
|
+
"kedua-dua",
|
161
|
+
"kedudukan",
|
162
|
+
"kekal",
|
163
|
+
"kementerian",
|
164
|
+
"kemudahan",
|
165
|
+
"kenaikan",
|
166
|
+
"kenyataan",
|
167
|
+
"kepada",
|
168
|
+
"kepentingan",
|
169
|
+
"keputusan",
|
170
|
+
"kerajaan",
|
171
|
+
"kerana",
|
172
|
+
"kereta",
|
173
|
+
"kerja",
|
174
|
+
"kerjasama",
|
175
|
+
"kes",
|
176
|
+
"keselamatan",
|
177
|
+
"keseluruhan",
|
178
|
+
"kesihatan",
|
179
|
+
"ketika",
|
180
|
+
"ketua",
|
181
|
+
"keuntungan",
|
182
|
+
"kewangan",
|
183
|
+
"khamis",
|
184
|
+
"kini",
|
185
|
+
"kira-kira",
|
186
|
+
"kita",
|
187
|
+
"klci",
|
188
|
+
"klibor",
|
189
|
+
"komposit",
|
190
|
+
"kontrak",
|
191
|
+
"kos",
|
192
|
+
"kuala",
|
193
|
+
"kuasa",
|
194
|
+
"kukuh",
|
195
|
+
"kumpulan",
|
196
|
+
"lagi",
|
197
|
+
"lain",
|
198
|
+
"langkah",
|
199
|
+
"laporan",
|
200
|
+
"lebih",
|
201
|
+
"lepas",
|
202
|
+
"lima",
|
203
|
+
"lot",
|
204
|
+
"luar",
|
205
|
+
"lumpur",
|
206
|
+
"mac",
|
207
|
+
"mahkamah",
|
208
|
+
"mahu",
|
209
|
+
"majlis",
|
210
|
+
"makanan",
|
211
|
+
"maklumat",
|
212
|
+
"malam",
|
213
|
+
"malaysia",
|
214
|
+
"mana",
|
215
|
+
"manakala",
|
216
|
+
"masa",
|
217
|
+
"masalah",
|
218
|
+
"masih",
|
219
|
+
"masing-masing",
|
220
|
+
"masyarakat",
|
221
|
+
"mata",
|
222
|
+
"media",
|
223
|
+
"mei",
|
224
|
+
"melalui",
|
225
|
+
"melihat",
|
226
|
+
"memandangkan",
|
227
|
+
"memastikan",
|
228
|
+
"membantu",
|
229
|
+
"membawa",
|
230
|
+
"memberi",
|
231
|
+
"memberikan",
|
232
|
+
"membolehkan",
|
233
|
+
"membuat",
|
234
|
+
"mempunyai",
|
235
|
+
"menambah",
|
236
|
+
"menarik",
|
237
|
+
"menawarkan",
|
238
|
+
"mencapai",
|
239
|
+
"mencatatkan",
|
240
|
+
"mendapat",
|
241
|
+
"mendapatkan",
|
242
|
+
"menerima",
|
243
|
+
"menerusi",
|
244
|
+
"mengadakan",
|
245
|
+
"mengambil",
|
246
|
+
"mengenai",
|
247
|
+
"menggalakkan",
|
248
|
+
"menggunakan",
|
249
|
+
"mengikut",
|
250
|
+
"mengumumkan",
|
251
|
+
"mengurangkan",
|
252
|
+
"meningkat",
|
253
|
+
"meningkatkan",
|
254
|
+
"menjadi",
|
255
|
+
"menjelang",
|
256
|
+
"menokok",
|
257
|
+
"menteri",
|
258
|
+
"menunjukkan",
|
259
|
+
"menurut",
|
260
|
+
"menyaksikan",
|
261
|
+
"menyediakan",
|
262
|
+
"mereka",
|
263
|
+
"merosot",
|
264
|
+
"merupakan",
|
265
|
+
"mesyuarat",
|
266
|
+
"minat",
|
267
|
+
"minggu",
|
268
|
+
"minyak",
|
269
|
+
"modal",
|
270
|
+
"mohd",
|
271
|
+
"mudah",
|
272
|
+
"mungkin",
|
273
|
+
"naik",
|
274
|
+
"najib",
|
275
|
+
"nasional",
|
276
|
+
"negara",
|
277
|
+
"negara-negara",
|
278
|
+
"negeri",
|
279
|
+
"niaga",
|
280
|
+
"nilai",
|
281
|
+
"nov",
|
282
|
+
"ogos",
|
283
|
+
"okt",
|
284
|
+
"oleh",
|
285
|
+
"operasi",
|
286
|
+
"orang",
|
287
|
+
"pada",
|
288
|
+
"pagi",
|
289
|
+
"paling",
|
290
|
+
"pameran",
|
291
|
+
"papan",
|
292
|
+
"para",
|
293
|
+
"paras",
|
294
|
+
"parlimen",
|
295
|
+
"parti",
|
296
|
+
"pasaran",
|
297
|
+
"pasukan",
|
298
|
+
"pegawai",
|
299
|
+
"pejabat",
|
300
|
+
"pekerja",
|
301
|
+
"pelabur",
|
302
|
+
"pelaburan",
|
303
|
+
"pelancongan",
|
304
|
+
"pelanggan",
|
305
|
+
"pelbagai",
|
306
|
+
"peluang",
|
307
|
+
"pembangunan",
|
308
|
+
"pemberita",
|
309
|
+
"pembinaan",
|
310
|
+
"pemimpin",
|
311
|
+
"pendapatan",
|
312
|
+
"pendidikan",
|
313
|
+
"penduduk",
|
314
|
+
"penerbangan",
|
315
|
+
"pengarah",
|
316
|
+
"pengeluaran",
|
317
|
+
"pengerusi",
|
318
|
+
"pengguna",
|
319
|
+
"pengurusan",
|
320
|
+
"peniaga",
|
321
|
+
"peningkatan",
|
322
|
+
"penting",
|
323
|
+
"peratus",
|
324
|
+
"perdagangan",
|
325
|
+
"perdana",
|
326
|
+
"peringkat",
|
327
|
+
"perjanjian",
|
328
|
+
"perkara",
|
329
|
+
"perkhidmatan",
|
330
|
+
"perladangan",
|
331
|
+
"perlu",
|
332
|
+
"permintaan",
|
333
|
+
"perniagaan",
|
334
|
+
"persekutuan",
|
335
|
+
"persidangan",
|
336
|
+
"pertama",
|
337
|
+
"pertubuhan",
|
338
|
+
"pertumbuhan",
|
339
|
+
"perusahaan",
|
340
|
+
"peserta",
|
341
|
+
"petang",
|
342
|
+
"pihak",
|
343
|
+
"pilihan",
|
344
|
+
"pinjaman",
|
345
|
+
"polis",
|
346
|
+
"politik",
|
347
|
+
"presiden",
|
348
|
+
"prestasi",
|
349
|
+
"produk",
|
350
|
+
"program",
|
351
|
+
"projek",
|
352
|
+
"proses",
|
353
|
+
"proton",
|
354
|
+
"pukul",
|
355
|
+
"pula",
|
356
|
+
"pusat",
|
357
|
+
"rabu",
|
358
|
+
"rakan",
|
359
|
+
"rakyat",
|
360
|
+
"ramai",
|
361
|
+
"rantau",
|
362
|
+
"raya",
|
363
|
+
"rendah",
|
364
|
+
"ringgit",
|
365
|
+
"rumah",
|
366
|
+
"sabah",
|
367
|
+
"sahaja",
|
368
|
+
"saham",
|
369
|
+
"sama",
|
370
|
+
"sarawak",
|
371
|
+
"satu",
|
372
|
+
"sawit",
|
373
|
+
"saya",
|
374
|
+
"sdn",
|
375
|
+
"sebagai",
|
376
|
+
"sebahagian",
|
377
|
+
"sebanyak",
|
378
|
+
"sebarang",
|
379
|
+
"sebelum",
|
380
|
+
"sebelumnya",
|
381
|
+
"sebuah",
|
382
|
+
"secara",
|
383
|
+
"sedang",
|
384
|
+
"segi",
|
385
|
+
"sehingga",
|
386
|
+
"sejak",
|
387
|
+
"sekarang",
|
388
|
+
"sektor",
|
389
|
+
"sekuriti",
|
390
|
+
"selain",
|
391
|
+
"selama",
|
392
|
+
"selasa",
|
393
|
+
"selatan",
|
394
|
+
"selepas",
|
395
|
+
"seluruh",
|
396
|
+
"semakin",
|
397
|
+
"semalam",
|
398
|
+
"semasa",
|
399
|
+
"sementara",
|
400
|
+
"semua",
|
401
|
+
"semula",
|
402
|
+
"sen",
|
403
|
+
"sendiri",
|
404
|
+
"seorang",
|
405
|
+
"sepanjang",
|
406
|
+
"seperti",
|
407
|
+
"sept",
|
408
|
+
"september",
|
409
|
+
"serantau",
|
410
|
+
"seri",
|
411
|
+
"serta",
|
412
|
+
"sesi",
|
413
|
+
"setiap",
|
414
|
+
"setiausaha",
|
415
|
+
"sidang",
|
416
|
+
"singapura",
|
417
|
+
"sini",
|
418
|
+
"sistem",
|
419
|
+
"sokongan",
|
420
|
+
"sri",
|
421
|
+
"sudah",
|
422
|
+
"sukan",
|
423
|
+
"suku",
|
424
|
+
"sumber",
|
425
|
+
"supaya",
|
426
|
+
"susut",
|
427
|
+
"syarikat",
|
428
|
+
"syed",
|
429
|
+
"tahap",
|
430
|
+
"tahun",
|
431
|
+
"tan",
|
432
|
+
"tanah",
|
433
|
+
"tanpa",
|
434
|
+
"tawaran",
|
435
|
+
"teknologi",
|
436
|
+
"telah",
|
437
|
+
"tempat",
|
438
|
+
"tempatan",
|
439
|
+
"tempoh",
|
440
|
+
"tenaga",
|
441
|
+
"tengah",
|
442
|
+
"tentang",
|
443
|
+
"terbaik",
|
444
|
+
"terbang",
|
445
|
+
"terbesar",
|
446
|
+
"terbuka",
|
447
|
+
"terdapat",
|
448
|
+
"terhadap",
|
449
|
+
"termasuk",
|
450
|
+
"tersebut",
|
451
|
+
"terus",
|
452
|
+
"tetapi",
|
453
|
+
"thailand",
|
454
|
+
"tiada",
|
455
|
+
"tidak",
|
456
|
+
"tiga",
|
457
|
+
"timbalan",
|
458
|
+
"timur",
|
459
|
+
"tindakan",
|
460
|
+
"tinggi",
|
461
|
+
"tun",
|
462
|
+
"tunai",
|
463
|
+
"turun",
|
464
|
+
"turut",
|
465
|
+
"umno",
|
466
|
+
"unit",
|
467
|
+
"untuk",
|
468
|
+
"untung",
|
469
|
+
"urus",
|
470
|
+
"usaha",
|
471
|
+
"utama",
|
472
|
+
"walaupun",
|
473
|
+
"wang",
|
474
|
+
"wanita",
|
475
|
+
"wilayah",
|
476
|
+
"yang"
|
477
|
+
]
|