webpeel 0.15.2 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +2 -2
  2. package/dist/cli-auth.d.ts.map +1 -1
  3. package/dist/cli-auth.js +5 -0
  4. package/dist/cli-auth.js.map +1 -1
  5. package/dist/cli.js +43 -11
  6. package/dist/cli.js.map +1 -1
  7. package/dist/core/crawler.d.ts +2 -0
  8. package/dist/core/crawler.d.ts.map +1 -1
  9. package/dist/core/crawler.js +12 -3
  10. package/dist/core/crawler.js.map +1 -1
  11. package/dist/core/pipeline.d.ts +1 -0
  12. package/dist/core/pipeline.d.ts.map +1 -1
  13. package/dist/core/pipeline.js +63 -2
  14. package/dist/core/pipeline.js.map +1 -1
  15. package/dist/core/quick-answer.d.ts +26 -0
  16. package/dist/core/quick-answer.d.ts.map +1 -1
  17. package/dist/core/quick-answer.js +451 -84
  18. package/dist/core/quick-answer.js.map +1 -1
  19. package/dist/core/search-provider.d.ts +47 -4
  20. package/dist/core/search-provider.d.ts.map +1 -1
  21. package/dist/core/search-provider.js +278 -7
  22. package/dist/core/search-provider.js.map +1 -1
  23. package/dist/core/stemmer.d.ts +39 -0
  24. package/dist/core/stemmer.d.ts.map +1 -0
  25. package/dist/core/stemmer.js +510 -0
  26. package/dist/core/stemmer.js.map +1 -0
  27. package/dist/core/synonyms.d.ts +43 -0
  28. package/dist/core/synonyms.d.ts.map +1 -0
  29. package/dist/core/synonyms.js +185 -0
  30. package/dist/core/synonyms.js.map +1 -0
  31. package/dist/mcp/server.js +109 -4
  32. package/dist/mcp/server.js.map +1 -1
  33. package/dist/server/app.d.ts +1 -0
  34. package/dist/server/app.d.ts.map +1 -1
  35. package/dist/server/app.js +76 -10
  36. package/dist/server/app.js.map +1 -1
  37. package/dist/server/middleware/auth.d.ts +2 -1
  38. package/dist/server/middleware/auth.d.ts.map +1 -1
  39. package/dist/server/middleware/auth.js +25 -12
  40. package/dist/server/middleware/auth.js.map +1 -1
  41. package/dist/server/middleware/rate-limit.d.ts +1 -0
  42. package/dist/server/middleware/rate-limit.d.ts.map +1 -1
  43. package/dist/server/middleware/rate-limit.js +20 -11
  44. package/dist/server/middleware/rate-limit.js.map +1 -1
  45. package/dist/server/routes/agent.d.ts +4 -0
  46. package/dist/server/routes/agent.d.ts.map +1 -1
  47. package/dist/server/routes/agent.js +196 -9
  48. package/dist/server/routes/agent.js.map +1 -1
  49. package/dist/server/routes/batch.d.ts.map +1 -1
  50. package/dist/server/routes/batch.js +126 -1
  51. package/dist/server/routes/batch.js.map +1 -1
  52. package/dist/server/routes/fetch.d.ts +1 -0
  53. package/dist/server/routes/fetch.d.ts.map +1 -1
  54. package/dist/server/routes/fetch.js +193 -55
  55. package/dist/server/routes/fetch.js.map +1 -1
  56. package/dist/server/routes/jobs.d.ts.map +1 -1
  57. package/dist/server/routes/jobs.js +115 -2
  58. package/dist/server/routes/jobs.js.map +1 -1
  59. package/dist/server/routes/mcp.d.ts +1 -0
  60. package/dist/server/routes/mcp.d.ts.map +1 -1
  61. package/dist/server/routes/mcp.js +113 -6
  62. package/dist/server/routes/mcp.js.map +1 -1
  63. package/dist/server/routes/search.js +1 -1
  64. package/dist/server/routes/search.js.map +1 -1
  65. package/dist/server/types.d.ts +16 -0
  66. package/dist/server/types.d.ts.map +1 -0
  67. package/dist/server/types.js +8 -0
  68. package/dist/server/types.js.map +1 -0
  69. package/dist/server/utils/response.d.ts +45 -0
  70. package/dist/server/utils/response.d.ts.map +1 -0
  71. package/dist/server/utils/response.js +70 -0
  72. package/dist/server/utils/response.js.map +1 -0
  73. package/dist/server/utils/sse.d.ts +23 -0
  74. package/dist/server/utils/sse.d.ts.map +1 -0
  75. package/dist/server/utils/sse.js +39 -0
  76. package/dist/server/utils/sse.js.map +1 -0
  77. package/dist/types.d.ts +2 -0
  78. package/dist/types.d.ts.map +1 -1
  79. package/dist/types.js.map +1 -1
  80. package/package.json +1 -1
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Porter Stemmer — Lightweight implementation of the Porter stemming algorithm.
3
+ *
4
+ * Based on: Martin Porter, "An algorithm for suffix stripping", 1980.
5
+ * Reference: https://tartarus.org/martin/PorterStemmer/
6
+ *
7
+ * This is a well-tested, deterministic implementation with no external dependencies.
8
+ * It correctly handles all standard Porter stemmer rules including steps 1a-5b.
9
+ */
10
+ /**
11
+ * Irregular verb forms → base form.
12
+ * Porter stemmer only handles regular morphology (-ed, -ing, -s).
13
+ * English has ~200 irregular verbs; we cover the most common ones.
14
+ * This table normalizes irregular forms before stemming so that
15
+ * "built" → "build" → stem("build") = "build" matches stem("build").
16
+ *
17
+ * Ambiguous words are intentionally excluded:
18
+ * "found" — could be find (past) OR establish (base form "found a company")
19
+ * "left" — could be leave (past) OR direction
20
+ * "bore"/"borne"/"born" — could be bear (past) OR bore=boring OR born=birth
21
+ * "bound" — could be bind (past) OR boundary (noun)
22
+ */
23
+ export declare const IRREGULAR_FORMS: Record<string, string>;
24
+ /**
25
+ * Stem a single word using the Porter stemming algorithm.
26
+ *
27
+ * Returns the stemmed word (lowercase). Input is also lowercased.
28
+ * Words shorter than 3 characters are returned as-is.
29
+ *
30
+ * Irregular verb forms (e.g. "built", "ran", "spoke") are first normalized
31
+ * to their base form before Porter steps are applied, ensuring that
32
+ * stem("built") === stem("build"), stem("spoke") === stem("speak"), etc.
33
+ */
34
+ export declare function stem(word: string): string;
35
+ /**
36
+ * Stem an array of tokens.
37
+ */
38
+ export declare function stemTokens(tokens: string[]): string[];
39
+ //# sourceMappingURL=stemmer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stemmer.d.ts","sourceRoot":"","sources":["../../src/core/stemmer.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAmTH;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAyKlD,CAAC;AAMF;;;;;;;;;GASG;AACH,wBAAgB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAqBzC;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAErD"}
@@ -0,0 +1,510 @@
1
+ /**
2
+ * Porter Stemmer — Lightweight implementation of the Porter stemming algorithm.
3
+ *
4
+ * Based on: Martin Porter, "An algorithm for suffix stripping", 1980.
5
+ * Reference: https://tartarus.org/martin/PorterStemmer/
6
+ *
7
+ * This is a well-tested, deterministic implementation with no external dependencies.
8
+ * It correctly handles all standard Porter stemmer rules including steps 1a-5b.
9
+ */
10
+ // ---------------------------------------------------------------------------
11
+ // Vowel / consonant helpers
12
+ // ---------------------------------------------------------------------------
13
+ /**
14
+ * Returns true if character at position i in word is a vowel.
15
+ * 'y' is treated as a vowel when preceded by a consonant.
16
+ */
17
+ function isVowelAt(word, i) {
18
+ const c = word[i];
19
+ if ('aeiou'.includes(c))
20
+ return true;
21
+ if (c === 'y' && i > 0 && !isVowelAt(word, i - 1))
22
+ return true;
23
+ return false;
24
+ }
25
+ /**
26
+ * Compute the "measure" m of a string stem.
27
+ * m = number of VC (vowel-then-consonant) transitions.
28
+ * The pattern is: [C](VC)^m[V]
29
+ */
30
+ function getMeasure(stem) {
31
+ let m = 0;
32
+ let inVowel = false;
33
+ for (let i = 0; i < stem.length; i++) {
34
+ const v = isVowelAt(stem, i);
35
+ if (inVowel && !v) {
36
+ m++;
37
+ inVowel = false;
38
+ }
39
+ else if (!inVowel && v) {
40
+ inVowel = true;
41
+ }
42
+ }
43
+ return m;
44
+ }
45
+ /** Returns true if the stem contains at least one vowel. */
46
+ function containsVowel(stem) {
47
+ for (let i = 0; i < stem.length; i++) {
48
+ if (isVowelAt(stem, i))
49
+ return true;
50
+ }
51
+ return false;
52
+ }
53
+ /** Returns true if the stem ends in a double consonant (same consonant twice). */
54
+ function endsDoubleConsonant(stem) {
55
+ const n = stem.length;
56
+ if (n < 2)
57
+ return false;
58
+ return stem[n - 1] === stem[n - 2] && !isVowelAt(stem, n - 1);
59
+ }
60
+ /**
61
+ * Returns true if stem ends in CVC where the final C is not W, X, or Y.
62
+ * This is the "*o" condition in Porter's paper.
63
+ */
64
+ function endsCVC(stem) {
65
+ const n = stem.length;
66
+ if (n < 3)
67
+ return false;
68
+ const c3 = stem[n - 1];
69
+ return (!isVowelAt(stem, n - 1) &&
70
+ isVowelAt(stem, n - 2) &&
71
+ !isVowelAt(stem, n - 3) &&
72
+ c3 !== 'w' && c3 !== 'x' && c3 !== 'y');
73
+ }
74
+ // ---------------------------------------------------------------------------
75
+ // Step 1a — Plurals
76
+ // ---------------------------------------------------------------------------
77
+ function step1a(word) {
78
+ if (word.endsWith('sses')) {
79
+ return word.slice(0, -2); // caresses → caress
80
+ }
81
+ if (word.endsWith('ies')) {
82
+ return word.slice(0, -2); // ponies → poni
83
+ }
84
+ if (word.endsWith('ss')) {
85
+ return word; // caress → caress (no change)
86
+ }
87
+ if (word.endsWith('s') && word.length > 1) {
88
+ return word.slice(0, -1); // cats → cat
89
+ }
90
+ return word;
91
+ }
92
+ // ---------------------------------------------------------------------------
93
+ // Step 1b — Past tenses / gerunds
94
+ // ---------------------------------------------------------------------------
95
+ function step1bFixup(word) {
96
+ // AT → ATE
97
+ if (word.endsWith('at'))
98
+ return word + 'e'; // conflated → conflate
99
+ // BL → BLE
100
+ if (word.endsWith('bl'))
101
+ return word + 'e'; // troubled → trouble
102
+ // IZ → IZE
103
+ if (word.endsWith('iz'))
104
+ return word + 'e'; // sized → size
105
+ // Double consonant (not L, S, Z) → remove one
106
+ if (endsDoubleConsonant(word) &&
107
+ !word.endsWith('ll') &&
108
+ !word.endsWith('ss') &&
109
+ !word.endsWith('zz')) {
110
+ return word.slice(0, -1); // hopping → hop, tapping → tap
111
+ }
112
+ // m=1 and CVC (*o) → add E
113
+ if (getMeasure(word) === 1 && endsCVC(word)) {
114
+ return word + 'e'; // failing → fail handled differently... wait
115
+ // filing → file: after removing ING we get "fil" → m=1 and *o → add E → "file"
116
+ }
117
+ return word;
118
+ }
119
+ function step1b(word) {
120
+ // (m>0) EED → EE
121
+ if (word.endsWith('eed')) {
122
+ const stem = word.slice(0, -3);
123
+ if (getMeasure(stem) > 0) {
124
+ return word.slice(0, -1); // agreed → agre, feed → feed
125
+ }
126
+ return word;
127
+ }
128
+ // (*v*) ED → delete + fixup
129
+ if (word.endsWith('ed')) {
130
+ const stem = word.slice(0, -2);
131
+ if (containsVowel(stem)) {
132
+ return step1bFixup(stem);
133
+ }
134
+ return word;
135
+ }
136
+ // (*v*) ING → delete + fixup
137
+ if (word.endsWith('ing')) {
138
+ const stem = word.slice(0, -3);
139
+ if (containsVowel(stem)) {
140
+ return step1bFixup(stem);
141
+ }
142
+ return word;
143
+ }
144
+ return word;
145
+ }
146
+ // ---------------------------------------------------------------------------
147
+ // Step 1c — y → i
148
+ // ---------------------------------------------------------------------------
149
+ function step1c(word) {
150
+ if (word.endsWith('y') && word.length > 2) {
151
+ const stem = word.slice(0, -1);
152
+ if (containsVowel(stem)) {
153
+ return stem + 'i'; // happy → happi
154
+ }
155
+ }
156
+ return word;
157
+ }
158
+ // ---------------------------------------------------------------------------
159
+ // Step 2 — Suffix removal (m > 0)
160
+ // ---------------------------------------------------------------------------
161
+ const STEP2_RULES = [
162
+ ['ational', 'ate'],
163
+ ['tional', 'tion'],
164
+ ['enci', 'ence'],
165
+ ['anci', 'ance'],
166
+ ['izer', 'ize'],
167
+ ['abli', 'able'],
168
+ ['alli', 'al'],
169
+ ['entli', 'ent'],
170
+ ['eli', 'e'],
171
+ ['ousli', 'ous'],
172
+ ['ization', 'ize'],
173
+ ['ation', 'ate'],
174
+ ['ator', 'ate'],
175
+ ['alism', 'al'],
176
+ ['iveness', 'ive'],
177
+ ['fulness', 'ful'],
178
+ ['ousness', 'ous'],
179
+ ['aliti', 'al'],
180
+ ['iviti', 'ive'],
181
+ ['biliti', 'ble'],
182
+ ];
183
+ function step2(word) {
184
+ for (const [suffix, replacement] of STEP2_RULES) {
185
+ if (word.endsWith(suffix)) {
186
+ const stem = word.slice(0, -suffix.length);
187
+ if (getMeasure(stem) > 0) {
188
+ return stem + replacement;
189
+ }
190
+ return word;
191
+ }
192
+ }
193
+ return word;
194
+ }
195
+ // ---------------------------------------------------------------------------
196
+ // Step 3 — Suffix removal (m > 0)
197
+ // ---------------------------------------------------------------------------
198
+ const STEP3_RULES = [
199
+ ['icate', 'ic'],
200
+ ['ative', ''],
201
+ ['alize', 'al'],
202
+ ['iciti', 'ic'],
203
+ ['ical', 'ic'],
204
+ ['ful', ''],
205
+ ['ness', ''],
206
+ ];
207
+ function step3(word) {
208
+ for (const [suffix, replacement] of STEP3_RULES) {
209
+ if (word.endsWith(suffix)) {
210
+ const stem = word.slice(0, -suffix.length);
211
+ if (getMeasure(stem) > 0) {
212
+ return stem + replacement;
213
+ }
214
+ return word;
215
+ }
216
+ }
217
+ return word;
218
+ }
219
+ // ---------------------------------------------------------------------------
220
+ // Step 4 — Suffix removal (m > 1)
221
+ // ---------------------------------------------------------------------------
222
+ const STEP4_RULES = [
223
+ ['ement', ''],
224
+ ['ment', ''],
225
+ ['ance', ''],
226
+ ['ence', ''],
227
+ ['able', ''],
228
+ ['ible', ''],
229
+ ['ism', ''],
230
+ ['ate', ''],
231
+ ['iti', ''],
232
+ ['ous', ''],
233
+ ['ive', ''],
234
+ ['ize', ''],
235
+ ['ant', ''],
236
+ ['ent', ''],
237
+ ['al', ''],
238
+ ['er', ''],
239
+ ['ic', ''],
240
+ ['ou', ''],
241
+ ];
242
+ function step4(word) {
243
+ // Special case: ION — stem must end in S or T
244
+ if (word.endsWith('ion')) {
245
+ const stem = word.slice(0, -3);
246
+ if (getMeasure(stem) > 1 && (stem.endsWith('s') || stem.endsWith('t'))) {
247
+ return stem;
248
+ }
249
+ return word;
250
+ }
251
+ for (const [suffix, replacement] of STEP4_RULES) {
252
+ if (word.endsWith(suffix)) {
253
+ const stem = word.slice(0, -suffix.length);
254
+ if (getMeasure(stem) > 1) {
255
+ return stem + replacement;
256
+ }
257
+ return word;
258
+ }
259
+ }
260
+ return word;
261
+ }
262
+ // ---------------------------------------------------------------------------
263
+ // Step 5a — Final E removal
264
+ // ---------------------------------------------------------------------------
265
+ function step5a(word) {
266
+ if (word.endsWith('e')) {
267
+ const stem = word.slice(0, -1);
268
+ const m = getMeasure(stem);
269
+ if (m > 1)
270
+ return stem;
271
+ if (m === 1 && !endsCVC(stem))
272
+ return stem;
273
+ }
274
+ return word;
275
+ }
276
+ // ---------------------------------------------------------------------------
277
+ // Step 5b — Double L removal
278
+ // ---------------------------------------------------------------------------
279
+ function step5b(word) {
280
+ if (word.endsWith('ll') && getMeasure(word) > 1) {
281
+ return word.slice(0, -1);
282
+ }
283
+ return word;
284
+ }
285
+ // ---------------------------------------------------------------------------
286
+ // Irregular verb forms table
287
+ // ---------------------------------------------------------------------------
288
+ /**
289
+ * Irregular verb forms → base form.
290
+ * Porter stemmer only handles regular morphology (-ed, -ing, -s).
291
+ * English has ~200 irregular verbs; we cover the most common ones.
292
+ * This table normalizes irregular forms before stemming so that
293
+ * "built" → "build" → stem("build") = "build" matches stem("build").
294
+ *
295
+ * Ambiguous words are intentionally excluded:
296
+ * "found" — could be find (past) OR establish (base form "found a company")
297
+ * "left" — could be leave (past) OR direction
298
+ * "bore"/"borne"/"born" — could be bear (past) OR bore=boring OR born=birth
299
+ * "bound" — could be bind (past) OR boundary (noun)
300
+ */
301
+ export const IRREGULAR_FORMS = {
302
+ // build
303
+ 'built': 'build',
304
+ // run
305
+ 'ran': 'run',
306
+ // make
307
+ 'made': 'make',
308
+ // write
309
+ 'wrote': 'write', 'written': 'write',
310
+ // begin
311
+ 'began': 'begin', 'begun': 'begin',
312
+ // give
313
+ 'gave': 'give', 'given': 'give',
314
+ // take
315
+ 'took': 'take', 'taken': 'take',
316
+ // go
317
+ 'went': 'go', 'gone': 'go',
318
+ // come
319
+ 'came': 'come',
320
+ // see
321
+ 'saw': 'see', 'seen': 'see',
322
+ // know
323
+ 'knew': 'know', 'known': 'know',
324
+ // think
325
+ 'thought': 'think',
326
+ // tell
327
+ 'told': 'tell',
328
+ // say
329
+ 'said': 'say',
330
+ // get
331
+ 'got': 'get', 'gotten': 'get',
332
+ // buy
333
+ 'bought': 'buy',
334
+ // bring
335
+ 'brought': 'bring',
336
+ // send
337
+ 'sent': 'send',
338
+ // spend
339
+ 'spent': 'spend',
340
+ // keep
341
+ 'kept': 'keep',
342
+ // hold
343
+ 'held': 'hold',
344
+ // stand
345
+ 'stood': 'stand',
346
+ // lose
347
+ 'lost': 'lose',
348
+ // pay
349
+ 'paid': 'pay',
350
+ // meet
351
+ 'met': 'meet',
352
+ // lead
353
+ 'led': 'lead',
354
+ // grow
355
+ 'grew': 'grow', 'grown': 'grow',
356
+ // draw
357
+ 'drew': 'draw', 'drawn': 'draw',
358
+ // break
359
+ 'broke': 'break', 'broken': 'break',
360
+ // speak
361
+ 'spoke': 'speak', 'spoken': 'speak',
362
+ // choose
363
+ 'chose': 'choose', 'chosen': 'choose',
364
+ // fall
365
+ 'fell': 'fall', 'fallen': 'fall',
366
+ // drive
367
+ 'drove': 'drive', 'driven': 'drive',
368
+ // rise
369
+ 'rose': 'rise', 'risen': 'rise',
370
+ // fly
371
+ 'flew': 'fly', 'flown': 'fly',
372
+ // throw
373
+ 'threw': 'throw', 'thrown': 'throw',
374
+ // wear
375
+ 'wore': 'wear', 'worn': 'wear',
376
+ // hide
377
+ 'hid': 'hide', 'hidden': 'hide',
378
+ // sit
379
+ 'sat': 'sit',
380
+ // swim
381
+ 'swam': 'swim', 'swum': 'swim',
382
+ // sing
383
+ 'sang': 'sing', 'sung': 'sing',
384
+ // ring
385
+ 'rang': 'ring', 'rung': 'ring',
386
+ // drink
387
+ 'drank': 'drink', 'drunk': 'drink',
388
+ // wake
389
+ 'woke': 'wake', 'woken': 'wake',
390
+ // freeze
391
+ 'froze': 'freeze', 'frozen': 'freeze',
392
+ // steal
393
+ 'stole': 'steal', 'stolen': 'steal',
394
+ // tear
395
+ 'tore': 'tear', 'torn': 'tear',
396
+ // shake
397
+ 'shook': 'shake', 'shaken': 'shake',
398
+ // forgive
399
+ 'forgave': 'forgive', 'forgiven': 'forgive',
400
+ // forget
401
+ 'forgot': 'forget', 'forgotten': 'forget',
402
+ // bite
403
+ 'bit': 'bite', 'bitten': 'bite',
404
+ // blow
405
+ 'blew': 'blow', 'blown': 'blow',
406
+ // catch
407
+ 'caught': 'catch',
408
+ // teach
409
+ 'taught': 'teach',
410
+ // fight
411
+ 'fought': 'fight',
412
+ // seek
413
+ 'sought': 'seek',
414
+ // sell
415
+ 'sold': 'sell',
416
+ // win
417
+ 'won': 'win',
418
+ // feed
419
+ 'fed': 'feed',
420
+ // feel
421
+ 'felt': 'feel',
422
+ // mean
423
+ 'meant': 'mean',
424
+ // lend
425
+ 'lent': 'lend',
426
+ // bend
427
+ 'bent': 'bend',
428
+ // dig
429
+ 'dug': 'dig',
430
+ // stick
431
+ 'stuck': 'stick',
432
+ // strike
433
+ 'struck': 'strike', 'stricken': 'strike',
434
+ // swear
435
+ 'swore': 'swear', 'sworn': 'swear',
436
+ // spin
437
+ 'spun': 'spin',
438
+ // hang
439
+ 'hung': 'hang',
440
+ // slide
441
+ 'slid': 'slide',
442
+ // shine
443
+ 'shone': 'shine',
444
+ // shoot
445
+ 'shot': 'shoot',
446
+ // sleep
447
+ 'slept': 'sleep',
448
+ // sweep
449
+ 'swept': 'sweep',
450
+ // creep
451
+ 'crept': 'creep',
452
+ // weep
453
+ 'wept': 'weep',
454
+ // deal
455
+ 'dealt': 'deal',
456
+ // dream (irregular British)
457
+ 'dreamt': 'dream',
458
+ // learn (irregular British)
459
+ 'learnt': 'learn',
460
+ // burn (irregular British)
461
+ 'burnt': 'burn',
462
+ // lean
463
+ 'leant': 'lean',
464
+ // leap
465
+ 'leapt': 'leap',
466
+ // spell
467
+ 'spelt': 'spell',
468
+ // spill
469
+ 'spilt': 'spill',
470
+ };
471
+ // ---------------------------------------------------------------------------
472
+ // Main stem function
473
+ // ---------------------------------------------------------------------------
474
+ /**
475
+ * Stem a single word using the Porter stemming algorithm.
476
+ *
477
+ * Returns the stemmed word (lowercase). Input is also lowercased.
478
+ * Words shorter than 3 characters are returned as-is.
479
+ *
480
+ * Irregular verb forms (e.g. "built", "ran", "spoke") are first normalized
481
+ * to their base form before Porter steps are applied, ensuring that
482
+ * stem("built") === stem("build"), stem("spoke") === stem("speak"), etc.
483
+ */
484
+ export function stem(word) {
485
+ if (!word)
486
+ return word;
487
+ const lower = word.toLowerCase();
488
+ // Short words: don't stem
489
+ if (lower.length <= 2)
490
+ return lower;
491
+ // Normalize irregular verb forms to base before stemming
492
+ const normalized = IRREGULAR_FORMS[lower] ?? lower;
493
+ let w = normalized;
494
+ w = step1a(w);
495
+ w = step1b(w);
496
+ w = step1c(w);
497
+ w = step2(w);
498
+ w = step3(w);
499
+ w = step4(w);
500
+ w = step5a(w);
501
+ w = step5b(w);
502
+ return w;
503
+ }
504
+ /**
505
+ * Stem an array of tokens.
506
+ */
507
+ export function stemTokens(tokens) {
508
+ return tokens.map(stem);
509
+ }
510
+ //# sourceMappingURL=stemmer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stemmer.js","sourceRoot":"","sources":["../../src/core/stemmer.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAE9E;;;GAGG;AACH,SAAS,SAAS,CAAC,IAAY,EAAE,CAAS;IACxC,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,IAAI,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACrC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IAC/D,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;GAIG;AACH,SAAS,UAAU,CAAC,IAAY;IAC9B,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,IAAI,OAAO,GAAG,KAAK,CAAC;IACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,CAAC,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC7B,IAAI,OAAO,IAAI,CAAC,CAAC,EAAE,CAAC;YAClB,CAAC,EAAE,CAAC;YACJ,OAAO,GAAG,KAAK,CAAC;QAClB,CAAC;aAAM,IAAI,CAAC,OAAO,IAAI,CAAC,EAAE,CAAC;YACzB,OAAO,GAAG,IAAI,CAAC;QACjB,CAAC;IACH,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,4DAA4D;AAC5D,SAAS,aAAa,CAAC,IAAY;IACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;YAAE,OAAO,IAAI,CAAC;IACtC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,kFAAkF;AAClF,SAAS,mBAAmB,CAAC,IAAY;IACvC,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC;IACtB,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IACxB,OAAO,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;AAChE,CAAC;AAED;;;GAGG;AACH,SAAS,OAAO,CAAC,IAAY;IAC3B,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC;IACtB,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IACxB,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACvB,OAAO,CACL,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC;QACvB,SAAS,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC;QACvB,EAAE,KAAK,GAAG,IAAI,EAAE,KAAK,GAAG,IAAI,EAAE,KAAK,GAAG,CACvC,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,oBAAoB;AACpB,8EAA8E;AAE9E,SAAS,MAAM,CAAC,IAAY;IAC1B,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC1B,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,oBAAoB;IAChD,CAAC;IACD,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,gBAAgB;IAC5C,CAAC;IACD,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACxB,OAAO,IAAI,CAAC,CAAC,8BAA8B;IAC7C,CAAC;IACD,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1C,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa;IACzC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,8EAA8E;AAC9E,kCAAkC;AAClC,8EAA8E;AAE9E,SAAS,WAAW,CAAC,IAAY;IAC/B,WAAW;IACX,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,GAAG,GAAG,CAAC,CAAC,uBAAuB;IACnE,WAAW;IACX,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,GAAG,GAAG,CAAC,CAAC,qBAAqB;IACjE,WAAW;IACX,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,GAAG,GAAG,CAAC,CAAC,eAAe;IAE3D,8CAA8C;IAC9C,IACE,mBAAmB,CAAC,IAAI,CAAC;QACzB,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;QACpB,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;QACpB,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EACpB,CAAC;QACD,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,+BAA+B;IAC3D,CAAC;IAED,2BAA2B;IAC3B,IAAI,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;QAC5C,OAAO,IAAI,GAAG,GAAG,CAAC,CAAC,6CAA6C;QAChE,+EAA+E;IACjF,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,MAAM,CAAC,IAAY;IAC1B,iBAAiB;IACjB,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,6BAA6B;QACzD,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,4BAA4B;IAC5B,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACxB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC;YACxB,OAAO,WAAW,CAAC,IAAI,CAAC,CAAC;QAC3B,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,6BAA6B;IAC7B,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC;YACxB,OAAO,WAAW,CAAC,IAAI,CAAC,CAAC;QAC3B,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,8EAA8E;AAC9E,kBAAkB;AAClB,8EAA8E;AAE9E,SAAS,MAAM,CAAC,IAAY;IAC1B,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC;YACxB,OAAO,IAAI,GAAG,GAAG,CAAC,CAAC,gBAAgB;QACrC,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,8EAA8E;AAC9E,kCAAkC;AAClC,8EAA8E;AAE9E,MAAM,WAAW,GAA4B;IAC3C,CAAC,SAAS,EAAE,KAAK,CAAC;IAClB,CAAC,QAAQ,EAAE,MAAM,CAAC;IAClB,CAAC,MAAM,EAAE,MAAM,CAAC;IAChB,CAAC,MAAM,EAAE,MAAM,CAAC;IAChB,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,MAAM,EAAE,MAAM,CAAC;IAChB,CAAC,MAAM,EAAE,IAAI,CAAC;IACd,CAAC,OAAO,EAAE,KAAK,CAAC;IAChB,CAAC,KAAK,EAAE,GAAG,CAAC;IACZ,CAAC,OAAO,EAAE,KAAK,CAAC;IAChB,CAAC,SAAS,EAAE,KAAK,CAAC;IAClB,CAAC,OAAO,EAAE,KAAK,CAAC;IAChB,CAAC,MAAM,EAAE,KAAK,CAAC;IACf,CAAC,OAAO,EAAE,IAAI,CAAC;IACf,CAAC,SAAS,EAAE,KAAK,CAAC;IAClB,CAAC,SAAS,EAAE,KAAK,CAAC;IAClB,CAAC,SAAS,EAAE,KAAK,CAAC;IAClB,CAAC,OAAO,EAAE,IAAI,CAAC;IACf,CAAC,OAAO,EAAE,KAAK,CAAC;IAChB,CAAC,QAAQ,EAAE,KAAK,CAAC;CAClB,CAAC;AAEF,SAAS,KAAK,CAAC,IAAY;IACzB,KAAK,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,IAAI,WAAW,EAAE,CAAC;QAChD,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAC3C,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACzB,OAAO,IAAI,GAAG,WAAW,CAAC;YAC5B,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,8EAA8E;AAC9E,kCAAkC;AAClC,8EAA8E;AAE9E,MAAM,WAAW,GAA4B;IAC3C,CAAC,OAAO,EAAE,IAAI,CAAC;IACf,CAAC,OAAO,EAAE,EAAE,CAAC;IACb,CAAC,OAAO,EAAE,IAAI,CAAC;IACf,CAAC,OAAO,EAAE,IAAI,CAAC;IACf,CAAC,MAAM,EAAE,IAAI,CAAC;IACd,CAAC,KAAK,EAAE,EAAE,CAAC;IACX,CAAC,MAAM,EAAE,EAAE,CAAC;CACb,CAAC;AAEF,SAAS,KAAK,CAAC,IAAY;IACzB,KAAK,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,IAAI,WAAW,EAAE,CAAC;QAChD,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAC3C,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACzB,OAAO,IAAI,GAAG,WAAW,CAAC;YAC5B,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,8EAA8E;AAC9E,kCAAkC;AAClC,8EAA8E;AAE9E,MAAM,WAAW,GAA4B;IAC3C,CAAC,OAAO,EAAE,EAAE,CAAC;IACb,CAAC,MAAM,EAAE,EAAE,CAAC;IACZ,CAAC,MAAM,EAAE,EAAE,CAAC;IACZ,CAAC,MAAM,EAAE,EAAE,CAAC;IACZ,CAAC,MAAM,EAAE,EAAE,CAAC;IACZ,CAAC,MAAM,EAAE,EAAE,CAAC;IACZ,CAAC,KAAK,EAAE,EAAE,CAAC;IACX,CAAC,KAAK,EAAE,EAAE,CAAC;IACX,CAAC,KAAK,EAAE,EAAE,CAAC;IACX,CAAC,KAAK,EAAE,EAAE,CAAC;IACX,CAAC,KAAK,EAAE,EAAE,CAAC;IACX,CAAC,KAAK,EAAE,EAAE,CAAC;IACX,CAAC,KAAK,EAAE,EAAE,CAAC;IACX,CAAC,KAAK,EAAE,EAAE,CAAC;IACX,CAAC,IAAI,EAAE,EAAE,CAAC;IACV,CAAC,IAAI,EAAE,EAAE,CAAC;IACV,CAAC,IAAI,EAAE,EAAE,CAAC;IACV,CAAC,IAAI,EAAE,EAAE,CAAC;CACX,CAAC;AAEF,SAAS,KAAK,CAAC,IAAY;IACzB,8CAA8C;IAC9C,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YACvE,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,KAAK,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,IAAI,WAAW,EAAE,CAAC;QAChD,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAC3C,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACzB,OAAO,IAAI,GAAG,WAAW,CAAC;YAC5B,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAE9E,SAAS,MAAM,CAAC,IAAY;IAC1B,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,CAAC,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;QAC3B,IAAI,CAAC,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QACvB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC;YAAE,OAAO,IAAI,CAAC;IAC7C,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,8EAA8E;AAC9E,6BAA6B;AAC7B,8EAA8E;AAE9E,SAAS,MAAM,CAAC,IAAY;IAC1B,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QAChD,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAC3B,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,8EAA8E;AAC9E,6BAA6B;AAC7B,8EAA8E;AAE9E;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,MAAM,eAAe,GAA2B;IACrD,QAAQ;IACR,OAAO,EAAE,OAAO;IAChB,MAAM;IACN,KAAK,EAAE,KAAK;IACZ,OAAO;IACP,MAAM,EAAE,MAAM;IACd,QAAQ;IACR,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO;IACpC,QAAQ;IACR,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO;IAClC,OAAO;IACP,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM;IAC/B,OAAO;IACP,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM;IAC/B,KAAK;IACL,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI;IAC1B,OAAO;IACP,MAAM,EAAE,MAAM;IACd,MAAM;IACN,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK;IAC3B,OAAO;IACP,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM;IAC/B,QAAQ;IACR,SAAS,EAAE,OAAO;IAClB,OAAO;IACP,MAAM,EAAE,MAAM;IACd,MAAM;IACN,MAAM,EAAE,KAAK;IACb,MAAM;IACN,KAAK,EAAE,KAAK,EAAE,QAAQ,EAAE,KAAK;IAC7B,MAAM;IACN,QAAQ,EAAE,KAAK;IACf,QAAQ;IACR,SAAS,EAAE,OAAO;IAClB,OAAO;IACP,MAAM,EAAE,MAAM;IACd,QAAQ;IACR,OAAO,EAAE,OAAO;IAChB,OAAO;IACP,MAAM,EAAE,MAAM;IACd,OAAO;IACP,MAAM,EAAE,MAAM;IACd,QAAQ;IACR,OAAO,EAAE,OAAO;IAChB,OAAO;IACP,MAAM,EAAE,MAAM;IACd,MAAM;IACN,MAAM,EAAE,KAAK;IACb,OAAO;IACP,KAAK,EAAE,MAAM;IACb,OAAO;IACP,KAAK,EAAE,MAAM;IACb,OAAO;IACP,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM;IAC/B,OAAO;IACP,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM;IAC/B,QAAQ;IACR,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO;IACnC,QAAQ;IACR,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO;IACnC,SAAS;IACT,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ;IACrC,OAAO;IACP,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM;IAChC,QAAQ;IACR,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO;IACnC,OAAO;IACP,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM;IAC/B,MAAM;IACN,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK;IAC7B,QAAQ;IACR,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO;IACnC,OAAO;IACP,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;IAC9B,OAAO;IACP,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM;IAC/B,MAAM;IACN,KAAK,EAAE,KAAK;IACZ,OAAO;IACP,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;IAC9B,OAAO;IACP,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;IAC9B,OAAO;IACP,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;IAC9B,QAAQ;IACR,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO;IAClC,OAAO;IACP,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM;IAC/B,SAAS;IACT,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ;IACrC,QAAQ;IACR,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO;IACnC,OAAO;IACP,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;IAC9B,QAAQ;IACR,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO;IACnC,UAAU;IACV,SAAS,EAAE,SAAS,EAAE,UAAU,EAAE,SAAS;IAC3C,SAAS;IACT,QAAQ,EAAE,QAAQ,EAAE,WAAW,EAAE,QAAQ;IACzC,OAAO;IACP,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM;IAC/B,OAAO;IACP,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM;IAC/B,QAAQ;IACR,QAAQ,EAAE,OAAO;IACjB,QAAQ;IACR,QAAQ,EAAE,OAAO;IACjB,QAAQ;IACR,QAAQ,EAAE,OAAO;IACjB,OAAO;IACP,QAAQ,EAAE,MAAM;IAChB,OAAO;IACP,MAAM,EAAE,MAAM;IACd,MAAM;IACN,KAAK,EAAE,KAAK;IACZ,OAAO;IACP,KAAK,EAAE,MAAM;IACb,OAAO;IACP,MAAM,EAAE,MAAM;IACd,OAAO;IACP,OAAO,EAAE,MAAM;IACf,OAAO;IACP,MAAM,EAAE,MAAM;IACd,OAAO;IACP,MAAM,EAAE,MAAM;IACd,MAAM;IACN,KAAK,EAAE,KAAK;IACZ,QAAQ;IACR,OAAO,EAAE,OAAO;IAChB,SAAS;IACT,QAAQ,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ;IACxC,QAAQ;IACR,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO;IAClC,OAAO;IACP,MAAM,EAAE,MAAM;IACd,OAAO;IACP,MAAM,EAAE,MAAM;IACd,QAAQ;IACR,MAAM,EAAE,OAAO;IACf,QAAQ;IACR,OAAO,EAAE,OAAO;IAChB,QAAQ;IACR,MAAM,EAAE,OAAO;IACf,QAAQ;IACR,OAAO,EAAE,OAAO;IAChB,QAAQ;IACR,OAAO,EAAE,OAAO;IAChB,QAAQ;IACR,OAAO,EAAE,OAAO;IAChB,OAAO;IACP,MAAM,EAAE,MAAM;IACd,OAAO;IACP,OAAO,EAAE,MAAM;IACf,4BAA4B;IAC5B,QAAQ,EAAE,OAAO;IACjB,4BAA4B;IAC5B,QAAQ,EAAE,OAAO;IACjB,2BAA2B;IAC3B,OAAO,EAAE,MAAM;IACf,OAAO;IACP,OAAO,EAAE,MAAM;IACf,OAAO;IACP,OAAO,EAAE,MAAM;IACf,QAAQ;IACR,OAAO,EAAE,OAAO;IAChB,QAAQ;IACR,OAAO,EAAE,OAAO;CACjB,CAAC;AAEF,8EAA8E;AAC9E,qBAAqB;AACrB,8EAA8E;AAE9E;;;;;;;;;GASG;AACH,MAAM,UAAU,IAAI,CAAC,IAAY;IAC/B,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IAEjC,0BAA0B;IAC1B,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAEpC,yDAAyD;IACzD,MAAM,UAAU,GAAG,eAAe,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC;IAEnD,IAAI,CAAC,GAAG,UAAU,CAAC;IACnB,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACd,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACd,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACd,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACb,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACb,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACb,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACd,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IAEd,OAAO,CAAC,CAAC;AACX,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,MAAgB;IACzC,OAAO,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Synonym expansion for query broadening.
3
+ *
4
+ * Provides stemmed synonym groups and a function to expand a set of stemmed
5
+ * query tokens with related synonyms (at a lower weight).
6
+ *
7
+ * Usage:
8
+ * const queryTerms = tokenizeQuestion(question); // already stemmed
9
+ * const expanded = expandWithSynonyms(queryTerms);
10
+ * // expanded includes originals (weight=1.0) + synonyms (weight=0.5)
11
+ */
12
+ /**
13
+ * Raw synonym groups. Each group is a set of words with equivalent or near-
14
+ * equivalent meaning in the context of software/web documentation queries.
15
+ *
16
+ * These are stored in unstemmed form for readability; the build process stems
17
+ * them into STEMMED_SYNONYM_GROUPS and builds an index.
18
+ */
19
+ export declare const SYNONYM_GROUPS: string[][];
20
+ /**
21
+ * Stemmed synonym groups.
22
+ * Each word in each group has been run through the Porter stemmer.
23
+ * Duplicate stems within a group are deduplicated.
24
+ */
25
+ export declare const STEMMED_SYNONYM_GROUPS: string[][];
26
+ export interface ExpandedTerm {
27
+ /** The stemmed term */
28
+ term: string;
29
+ /** 1.0 for original query terms, 0.5 for synonym expansions */
30
+ weight: number;
31
+ /** True if this term came from the original query */
32
+ isOriginal: boolean;
33
+ }
34
+ /**
35
+ * Expand a list of stemmed query tokens with their synonyms.
36
+ *
37
+ * @param terms - Already-stemmed tokens from the query
38
+ * @returns Array of ExpandedTerm objects. Original terms have weight=1.0,
39
+ * synonym expansions have weight=0.5.
40
+ * The returned array preserves originals first, then synonyms.
41
+ */
42
+ export declare function expandWithSynonyms(terms: string[]): ExpandedTerm[];
43
+ //# sourceMappingURL=synonyms.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"synonyms.d.ts","sourceRoot":"","sources":["../../src/core/synonyms.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAQH;;;;;;GAMG;AACH,eAAO,MAAM,cAAc,EAAE,MAAM,EAAE,EA8GpC,CAAC;AAMF;;;;GAIG;AACH,eAAO,MAAM,sBAAsB,EAAE,MAAM,EAAE,EAI3C,CAAC;AAsBH,MAAM,WAAW,YAAY;IAC3B,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,+DAA+D;IAC/D,MAAM,EAAE,MAAM,CAAC;IACf,qDAAqD;IACrD,UAAU,EAAE,OAAO,CAAC;CACrB;AAED;;;;;;;GAOG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,YAAY,EAAE,CA0BlE"}