sparse-encode 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +102 -0
  2. package/dist/__tests__/bm25.test.d.ts +2 -0
  3. package/dist/__tests__/bm25.test.d.ts.map +1 -0
  4. package/dist/__tests__/bm25.test.js +89 -0
  5. package/dist/__tests__/bm25.test.js.map +1 -0
  6. package/dist/__tests__/tfidf.test.d.ts +2 -0
  7. package/dist/__tests__/tfidf.test.d.ts.map +1 -0
  8. package/dist/__tests__/tfidf.test.js +110 -0
  9. package/dist/__tests__/tfidf.test.js.map +1 -0
  10. package/dist/__tests__/tokenizer.test.d.ts +2 -0
  11. package/dist/__tests__/tokenizer.test.d.ts.map +1 -0
  12. package/dist/__tests__/tokenizer.test.js +51 -0
  13. package/dist/__tests__/tokenizer.test.js.map +1 -0
  14. package/dist/bm25.d.ts +3 -0
  15. package/dist/bm25.d.ts.map +1 -0
  16. package/dist/bm25.js +99 -0
  17. package/dist/bm25.js.map +1 -0
  18. package/dist/index.d.ts +4 -0
  19. package/dist/index.d.ts.map +1 -0
  20. package/dist/index.js +9 -0
  21. package/dist/index.js.map +1 -0
  22. package/dist/porter-stemmer.d.ts +2 -0
  23. package/dist/porter-stemmer.d.ts.map +1 -0
  24. package/dist/porter-stemmer.js +195 -0
  25. package/dist/porter-stemmer.js.map +1 -0
  26. package/dist/tfidf.d.ts +3 -0
  27. package/dist/tfidf.d.ts.map +1 -0
  28. package/dist/tfidf.js +98 -0
  29. package/dist/tfidf.js.map +1 -0
  30. package/dist/tokenizer.d.ts +8 -0
  31. package/dist/tokenizer.d.ts.map +1 -0
  32. package/dist/tokenizer.js +43 -0
  33. package/dist/tokenizer.js.map +1 -0
  34. package/dist/types.d.ts +41 -0
  35. package/dist/types.d.ts.map +1 -0
  36. package/dist/types.js +3 -0
  37. package/dist/types.js.map +1 -0
  38. package/dist/vocab.d.ts +11 -0
  39. package/dist/vocab.d.ts.map +1 -0
  40. package/dist/vocab.js +38 -0
  41. package/dist/vocab.js.map +1 -0
  42. package/package.json +33 -0
package/README.md ADDED
@@ -0,0 +1,102 @@
1
+ # sparse-encode
2
+
3
+ Generate BM25 and TF-IDF sparse vectors in JavaScript/TypeScript. Designed for use with sparse vector search engines (e.g., Pinecone sparse indexes, Qdrant sparse vectors).
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ npm install sparse-encode
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```typescript
14
+ import { createBM25, createTFIDF } from 'sparse-encode'
15
+
16
+ const docs = [
17
+ 'the quick brown fox jumps over the lazy dog',
18
+ 'the dog barked loudly at the fox',
19
+ 'cats and dogs are common pets',
20
+ ]
21
+
22
+ // BM25
23
+ const bm25 = createBM25({ k1: 1.5, b: 0.75 })
24
+ bm25.fit(docs)
25
+ const vec = bm25.encode('quick fox')
26
+ // { indices: [2, 5, ...], values: [0.82, 1.23, ...] }
27
+
28
+ const queryVec = bm25.encodeQuery('fox')
29
+
30
+ // TF-IDF
31
+ const tfidf = createTFIDF({ sublinearTf: false })
32
+ tfidf.fit(docs)
33
+ const tVec = tfidf.encode('quick fox')
34
+ // L2-normalized: sum of squares ≈ 1.0
35
+ ```
36
+
37
+ ## BM25 Encoder
38
+
39
+ ```typescript
40
+ const enc = createBM25(options?)
41
+ enc.fit(documents: string[]) // build vocabulary + IDF statistics
42
+ enc.encode(text: string): SparseVector // encode a document
43
+ enc.encodeQuery(text: string): SparseVector // encode a query (no length norm)
44
+ enc.encodeBatch(texts: string[]): SparseVector[]
45
+ enc.getStats(): FitStats // { N, avgdl, vocabSize, totalTokens }
46
+ enc.serialize(): string // JSON snapshot (vocab + df + options)
47
+ ```
48
+
49
+ ### Options
50
+
51
+ | Option | Default | Description |
52
+ |--------|---------|-------------|
53
+ | `k1` | `1.5` | Term frequency saturation parameter |
54
+ | `b` | `0.75` | Length normalization parameter |
55
+ | `stem` | `true` | Apply Porter stemmer |
56
+ | `stopwords` | `[]` | Additional stopwords to remove |
57
+ | `tokenizer` | built-in | Custom tokenizer function |
58
+
59
+ ## TF-IDF Encoder
60
+
61
+ ```typescript
62
+ const enc = createTFIDF(options?)
63
+ enc.fit(documents: string[])
64
+ enc.encode(text: string): SparseVector // L2-normalized TF-IDF vector
65
+ enc.encodeQuery(text: string): SparseVector
66
+ enc.encodeBatch(texts: string[]): SparseVector[]
67
+ enc.getStats(): FitStats
68
+ enc.serialize(): string
69
+ ```
70
+
71
+ ### Options
72
+
73
+ | Option | Default | Description |
74
+ |--------|---------|-------------|
75
+ | `stem` | `true` | Apply Porter stemmer |
76
+ | `stopwords` | `[]` | Additional stopwords to remove |
77
+ | `sublinearTf` | `false` | Use `1 + log(tf)` instead of `tf / doc_length` |
78
+ | `tokenizer` | built-in | Custom tokenizer function |
79
+
80
+ ## SparseVector Format
81
+
82
+ ```typescript
83
+ interface SparseVector {
84
+ indices: number[] // term IDs (sorted ascending)
85
+ values: number[] // corresponding scores
86
+ }
87
+ ```
88
+
89
+ Each position `i` maps `indices[i]` → `values[i]`. Zero values are omitted.
90
+
91
+ ## Tokenizer Pipeline
92
+
93
+ The built-in tokenizer:
94
+ 1. Lowercases the input
95
+ 2. Splits on non-word characters
96
+ 3. Removes pure numbers
97
+ 4. Removes common English stopwords
98
+ 5. Applies the Porter stemmer (can be disabled with `stem: false`)
99
+
100
+ ## License
101
+
102
+ MIT
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=bm25.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bm25.test.d.ts","sourceRoot":"","sources":["../../src/__tests__/bm25.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,89 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const vitest_1 = require("vitest");
4
+ const bm25_1 = require("../bm25");
5
+ (0, vitest_1.describe)('createBM25', () => {
6
+ const docs = [
7
+ 'the quick brown fox jumps over the lazy dog',
8
+ 'the dog barked loudly at the fox',
9
+ 'cats and dogs are common pets',
10
+ ];
11
+ (0, vitest_1.it)('throws if encode called before fit', () => {
12
+ const enc = (0, bm25_1.createBM25)();
13
+ (0, vitest_1.expect)(() => enc.encode('test')).toThrow();
14
+ });
15
+ (0, vitest_1.it)('returns a SparseVector with indices and values', () => {
16
+ const enc = (0, bm25_1.createBM25)();
17
+ enc.fit(docs);
18
+ const vec = enc.encode('fox');
19
+ (0, vitest_1.expect)(Array.isArray(vec.indices)).toBe(true);
20
+ (0, vitest_1.expect)(Array.isArray(vec.values)).toBe(true);
21
+ (0, vitest_1.expect)(vec.indices.length).toBe(vec.values.length);
22
+ });
23
+ (0, vitest_1.it)('encodes to non-empty vector for known term', () => {
24
+ const enc = (0, bm25_1.createBM25)();
25
+ enc.fit(docs);
26
+ const vec = enc.encode('fox');
27
+ (0, vitest_1.expect)(vec.indices.length).toBeGreaterThan(0);
28
+ });
29
+ (0, vitest_1.it)('indices are sorted ascending', () => {
30
+ const enc = (0, bm25_1.createBM25)();
31
+ enc.fit(docs);
32
+ const vec = enc.encode('quick brown fox');
33
+ for (let i = 1; i < vec.indices.length; i++) {
34
+ (0, vitest_1.expect)(vec.indices[i]).toBeGreaterThan(vec.indices[i - 1]);
35
+ }
36
+ });
37
+ (0, vitest_1.it)('term in doc scores higher than term not in corpus', () => {
38
+ const enc = (0, bm25_1.createBM25)();
39
+ enc.fit(docs);
40
+ const vec = enc.encode('fox');
41
+ const foxIdx = vec.indices.findIndex(i => i >= 0);
42
+ // vector should have at least one positive value
43
+ (0, vitest_1.expect)(vec.values[foxIdx]).toBeGreaterThan(0);
44
+ });
45
+ (0, vitest_1.it)('getStats returns correct N and vocabSize', () => {
46
+ const enc = (0, bm25_1.createBM25)();
47
+ enc.fit(docs);
48
+ const stats = enc.getStats();
49
+ (0, vitest_1.expect)(stats.N).toBe(3);
50
+ (0, vitest_1.expect)(stats.vocabSize).toBeGreaterThan(0);
51
+ (0, vitest_1.expect)(stats.avgdl).toBeGreaterThan(0);
52
+ (0, vitest_1.expect)(stats.totalTokens).toBeGreaterThan(0);
53
+ });
54
+ (0, vitest_1.it)('encodeBatch returns one vector per text', () => {
55
+ const enc = (0, bm25_1.createBM25)();
56
+ enc.fit(docs);
57
+ const vecs = enc.encodeBatch(['fox', 'dog', 'cat']);
58
+ (0, vitest_1.expect)(vecs).toHaveLength(3);
59
+ });
60
+ (0, vitest_1.it)('encodeQuery returns a SparseVector', () => {
61
+ const enc = (0, bm25_1.createBM25)();
62
+ enc.fit(docs);
63
+ const vec = enc.encodeQuery('brown fox');
64
+ (0, vitest_1.expect)(Array.isArray(vec.indices)).toBe(true);
65
+ (0, vitest_1.expect)(Array.isArray(vec.values)).toBe(true);
66
+ });
67
+ (0, vitest_1.it)('serialize returns valid JSON', () => {
68
+ const enc = (0, bm25_1.createBM25)();
69
+ enc.fit(docs);
70
+ const json = enc.serialize();
71
+ (0, vitest_1.expect)(() => JSON.parse(json)).not.toThrow();
72
+ const parsed = JSON.parse(json);
73
+ (0, vitest_1.expect)(parsed.N).toBe(3);
74
+ (0, vitest_1.expect)(parsed.vocab).toBeDefined();
75
+ });
76
+ (0, vitest_1.it)('custom k1/b options affect scores', () => {
77
+ const enc1 = (0, bm25_1.createBM25)({ k1: 1.0, b: 0.5 });
78
+ const enc2 = (0, bm25_1.createBM25)({ k1: 2.0, b: 0.9 });
79
+ enc1.fit(docs);
80
+ enc2.fit(docs);
81
+ const v1 = enc1.encode('fox');
82
+ const v2 = enc2.encode('fox');
83
+ // Scores differ with different parameters
84
+ const sum1 = v1.values.reduce((a, b) => a + b, 0);
85
+ const sum2 = v2.values.reduce((a, b) => a + b, 0);
86
+ (0, vitest_1.expect)(sum1).not.toBeCloseTo(sum2, 5);
87
+ });
88
+ });
89
+ //# sourceMappingURL=bm25.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bm25.test.js","sourceRoot":"","sources":["../../src/__tests__/bm25.test.ts"],"names":[],"mappings":";;AAAA,mCAA6C;AAC7C,kCAAoC;AAEpC,IAAA,iBAAQ,EAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,MAAM,IAAI,GAAG;QACX,6CAA6C;QAC7C,kCAAkC;QAClC,+BAA+B;KAChC,CAAA;IAED,IAAA,WAAE,EAAC,oCAAoC,EAAE,GAAG,EAAE;QAC5C,MAAM,GAAG,GAAG,IAAA,iBAAU,GAAE,CAAA;QACxB,IAAA,eAAM,EAAC,GAAG,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,EAAE,CAAA;IAC5C,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,GAAG,GAAG,IAAA,iBAAU,GAAE,CAAA;QACxB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QAC7B,IAAA,eAAM,EAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC7C,IAAA,eAAM,EAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC5C,IAAA,eAAM,EAAC,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;IACpD,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,4CAA4C,EAAE,GAAG,EAAE;QACpD,MAAM,GAAG,GAAG,IAAA,iBAAU,GAAE,CAAA;QACxB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QAC7B,IAAA,eAAM,EAAC,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;IAC/C,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,GAAG,GAAG,IAAA,iBAAU,GAAE,CAAA;QACxB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAA;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5C,IAAA,eAAM,EAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;QAC5D,CAAC;IACH,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,mDAAmD,EAAE,GAAG,EAAE;QAC3D,MAAM,GAAG,GAAG,IAAA,iBAAU,GAAE,CAAA;QACxB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QAC7B,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAA;QACjD,iDAAiD;QACjD,IAAA,eAAM,EAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;IAC/C,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,GAAG,GAAG,IAAA,iBAAU,GAAE,CAAA;QACxB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,KAAK,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAA;QAC5B,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACvB,IAAA,eAAM,EAAC,KAAK,CAAC,SAAS,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;QAC1C,IAAA,eAAM,EAAC,KAAK,CAAC,KAAK,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;QACtC,IAAA,eAAM,EAAC,KAAK,CAAC,WAAW,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;IAC9C,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,MAAM,GAAG,GAAG,IAAA,iBAAU,GAAE,CAAA;QACxB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAA;QACnD,IAAA,eAAM,EAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IAC9B,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,oCAAoC,EAAE,GAAG,EAAE;QAC5C,MAAM,GAAG,GAAG,IAAA,iBAAU,GAAE,CAAA;QACxB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,GAAG,GAAG,GAAG,CAAC,WAAW,CAAC,WAAW,CAAC,CAAA;QACxC,IAAA,eAAM,EAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC7C,IAAA,eAAM,EAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAC9C,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,GAAG,GAAG,IAAA,iBAAU,GAAE,CAAA;QACxB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,IAAI,GAAG,GAAG,CAAC,SAAS,EAAE,CAAA;QAC5B,IAAA,eAAM,EAAC,GAAG,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,EAAE,CAAA;QAC5C,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAC/B,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACxB,IAAA,eAAM,EAAC,MAAM,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAA;IACpC,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,IAAI,GAAG,IAAA,iBAAU,EAAC,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,CAAA;QAC5C,MAAM,IAAI,GAAG,IAAA,iBAAU,EAAC,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,CAAA;QAC5C,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACd,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACd,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QAC7B,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QAC7B,0CAA0C;QAC1C,MAAM,IAAI,GAAG,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAA;QACjD,MAAM,IAAI,GAAG,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAA;QACjD,IAAA,eAAM,EAAC,IAAI,CAAC,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,CAAA;IACvC,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=tfidf.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tfidf.test.d.ts","sourceRoot":"","sources":["../../src/__tests__/tfidf.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,110 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const vitest_1 = require("vitest");
4
+ const tfidf_1 = require("../tfidf");
5
+ (0, vitest_1.describe)('createTFIDF', () => {
6
+ const docs = [
7
+ 'the quick brown fox jumps over the lazy dog',
8
+ 'the dog barked loudly at the fox',
9
+ 'cats and dogs are common pets',
10
+ ];
11
+ (0, vitest_1.it)('throws if encode called before fit', () => {
12
+ const enc = (0, tfidf_1.createTFIDF)();
13
+ (0, vitest_1.expect)(() => enc.encode('test')).toThrow();
14
+ });
15
+ (0, vitest_1.it)('returns a SparseVector with indices and values', () => {
16
+ const enc = (0, tfidf_1.createTFIDF)();
17
+ enc.fit(docs);
18
+ const vec = enc.encode('fox');
19
+ (0, vitest_1.expect)(Array.isArray(vec.indices)).toBe(true);
20
+ (0, vitest_1.expect)(Array.isArray(vec.values)).toBe(true);
21
+ (0, vitest_1.expect)(vec.indices.length).toBe(vec.values.length);
22
+ });
23
+ (0, vitest_1.it)('encodes to non-empty vector for known term', () => {
24
+ const enc = (0, tfidf_1.createTFIDF)();
25
+ enc.fit(docs);
26
+ const vec = enc.encode('fox');
27
+ (0, vitest_1.expect)(vec.indices.length).toBeGreaterThan(0);
28
+ });
29
+ (0, vitest_1.it)('indices are sorted ascending', () => {
30
+ const enc = (0, tfidf_1.createTFIDF)();
31
+ enc.fit(docs);
32
+ const vec = enc.encode('quick brown fox');
33
+ for (let i = 1; i < vec.indices.length; i++) {
34
+ (0, vitest_1.expect)(vec.indices[i]).toBeGreaterThan(vec.indices[i - 1]);
35
+ }
36
+ });
37
+ (0, vitest_1.it)('output vector is unit-length (L2 norm ≈ 1)', () => {
38
+ const enc = (0, tfidf_1.createTFIDF)();
39
+ enc.fit(docs);
40
+ const vec = enc.encode('quick brown fox');
41
+ const sumSq = vec.values.reduce((acc, v) => acc + v * v, 0);
42
+ (0, vitest_1.expect)(sumSq).toBeCloseTo(1.0, 5);
43
+ });
44
+ (0, vitest_1.it)('unit-length holds for single-term query', () => {
45
+ const enc = (0, tfidf_1.createTFIDF)();
46
+ enc.fit(docs);
47
+ const vec = enc.encode('fox');
48
+ if (vec.values.length > 0) {
49
+ const sumSq = vec.values.reduce((acc, v) => acc + v * v, 0);
50
+ (0, vitest_1.expect)(sumSq).toBeCloseTo(1.0, 5);
51
+ }
52
+ });
53
+ (0, vitest_1.it)('getStats returns correct N and vocabSize', () => {
54
+ const enc = (0, tfidf_1.createTFIDF)();
55
+ enc.fit(docs);
56
+ const stats = enc.getStats();
57
+ (0, vitest_1.expect)(stats.N).toBe(3);
58
+ (0, vitest_1.expect)(stats.vocabSize).toBeGreaterThan(0);
59
+ (0, vitest_1.expect)(stats.totalTokens).toBeGreaterThan(0);
60
+ });
61
+ (0, vitest_1.it)('encodeBatch returns one vector per text', () => {
62
+ const enc = (0, tfidf_1.createTFIDF)();
63
+ enc.fit(docs);
64
+ const vecs = enc.encodeBatch(['fox', 'dog', 'cat']);
65
+ (0, vitest_1.expect)(vecs).toHaveLength(3);
66
+ });
67
+ (0, vitest_1.it)('encodeQuery returns unit-length SparseVector', () => {
68
+ const enc = (0, tfidf_1.createTFIDF)();
69
+ enc.fit(docs);
70
+ const vec = enc.encodeQuery('brown fox');
71
+ if (vec.values.length > 0) {
72
+ const sumSq = vec.values.reduce((acc, v) => acc + v * v, 0);
73
+ (0, vitest_1.expect)(sumSq).toBeCloseTo(1.0, 5);
74
+ }
75
+ });
76
+ (0, vitest_1.it)('serialize returns valid JSON', () => {
77
+ const enc = (0, tfidf_1.createTFIDF)();
78
+ enc.fit(docs);
79
+ const json = enc.serialize();
80
+ (0, vitest_1.expect)(() => JSON.parse(json)).not.toThrow();
81
+ const parsed = JSON.parse(json);
82
+ (0, vitest_1.expect)(parsed.N).toBe(3);
83
+ (0, vitest_1.expect)(parsed.vocab).toBeDefined();
84
+ });
85
+ (0, vitest_1.it)('sublinearTf option changes scores', () => {
86
+ const enc1 = (0, tfidf_1.createTFIDF)({ sublinearTf: false });
87
+ const enc2 = (0, tfidf_1.createTFIDF)({ sublinearTf: true });
88
+ enc1.fit(docs);
89
+ enc2.fit(docs);
90
+ // Use a text with repeated terms to make sublinear difference visible
91
+ const text = 'fox fox fox quick';
92
+ const v1 = enc1.encode(text);
93
+ const v2 = enc2.encode(text);
94
+ // Both should be unit vectors but with different distributions
95
+ if (v1.values.length > 0 && v2.values.length > 0) {
96
+ const s1 = v1.values.reduce((a, b) => a + b * b, 0);
97
+ const s2 = v2.values.reduce((a, b) => a + b * b, 0);
98
+ (0, vitest_1.expect)(s1).toBeCloseTo(1.0, 5);
99
+ (0, vitest_1.expect)(s2).toBeCloseTo(1.0, 5);
100
+ }
101
+ });
102
+ (0, vitest_1.it)('empty text returns empty vector', () => {
103
+ const enc = (0, tfidf_1.createTFIDF)();
104
+ enc.fit(docs);
105
+ const vec = enc.encode('');
106
+ (0, vitest_1.expect)(vec.indices).toHaveLength(0);
107
+ (0, vitest_1.expect)(vec.values).toHaveLength(0);
108
+ });
109
+ });
110
+ //# sourceMappingURL=tfidf.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tfidf.test.js","sourceRoot":"","sources":["../../src/__tests__/tfidf.test.ts"],"names":[],"mappings":";;AAAA,mCAA6C;AAC7C,oCAAsC;AAEtC,IAAA,iBAAQ,EAAC,aAAa,EAAE,GAAG,EAAE;IAC3B,MAAM,IAAI,GAAG;QACX,6CAA6C;QAC7C,kCAAkC;QAClC,+BAA+B;KAChC,CAAA;IAED,IAAA,WAAE,EAAC,oCAAoC,EAAE,GAAG,EAAE;QAC5C,MAAM,GAAG,GAAG,IAAA,mBAAW,GAAE,CAAA;QACzB,IAAA,eAAM,EAAC,GAAG,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,EAAE,CAAA;IAC5C,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,GAAG,GAAG,IAAA,mBAAW,GAAE,CAAA;QACzB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QAC7B,IAAA,eAAM,EAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC7C,IAAA,eAAM,EAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC5C,IAAA,eAAM,EAAC,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;IACpD,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,4CAA4C,EAAE,GAAG,EAAE;QACpD,MAAM,GAAG,GAAG,IAAA,mBAAW,GAAE,CAAA;QACzB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QAC7B,IAAA,eAAM,EAAC,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;IAC/C,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,GAAG,GAAG,IAAA,mBAAW,GAAE,CAAA;QACzB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAA;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5C,IAAA,eAAM,EAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;QAC5D,CAAC;IACH,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,4CAA4C,EAAE,GAAG,EAAE;QACpD,MAAM,GAAG,GAAG,IAAA,mBAAW,GAAE,CAAA;QACzB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAA;QACzC,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAA;QAC3D,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,CAAC,CAAC,CAAA;IACnC,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,MAAM,GAAG,GAAG,IAAA,mBAAW,GAAE,CAAA;QACzB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QAC7B,IAAI,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAA;YAC3D,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,CAAC,CAAC,CAAA;QACnC,CAAC;IACH,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,GAAG,GAAG,IAAA,mBAAW,GAAE,CAAA;QACzB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,KAAK,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAA;QAC5B,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACvB,IAAA,eAAM,EAAC,KAAK,CAAC,SAAS,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;QAC1C,IAAA,eAAM,EAAC,KAAK,CAAC,WAAW,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAA;IAC9C,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,MAAM,GAAG,GAAG,IAAA,mBAAW,GAAE,CAAA;QACzB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAA;QACnD,IAAA,eAAM,EAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IAC9B,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,8CAA8C,EAAE,GAAG,EAAE;QACtD,MAAM,GAAG,GAAG,IAAA,mBAAW,GAAE,CAAA;QACzB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,GAAG,GAAG,GAAG,CAAC,WAAW,CAAC,WAAW,CAAC,CAAA;QACxC,IAAI,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAA;YAC3D,IAAA,eAAM,EAAC,KAAK,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,CAAC,CAAC,CAAA;QACnC,CAAC;IACH,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,GAAG,GAAG,IAAA,mBAAW,GAAE,CAAA;QACzB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,IAAI,GAAG,GAAG,CAAC,SAAS,EAAE,CAAA;QAC5B,IAAA,eAAM,EAAC,GAAG,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,EAAE,CAAA;QAC5C,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAC/B,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACxB,IAAA,eAAM,EAAC,MAAM,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAA;IACpC,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,IAAI,GAAG,IAAA,mBAAW,EAAC,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC,CAAA;QAChD,MAAM,IAAI,GAAG,IAAA,mBAAW,EAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,CAAA;QAC/C,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACd,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACd,sEAAsE;QACtE,MAAM,IAAI,GAAG,mBAAmB,CAAA;QAChC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QAC5B,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QAC5B,+DAA+D;QAC/D,IAAI,EAAE,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjD,MAAM,EAAE,GAAG,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAA;YACnD,MAAM,EAAE,GAAG,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAA;YACnD,IAAA,eAAM,EAAC,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,CAAC,CAAC,CAAA;YAC9B,IAAA,eAAM,EAAC,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,CAAC,CAAC,CAAA;QAChC,CAAC;IACH,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,GAAG,GAAG,IAAA,mBAAW,GAAE,CAAA;QACzB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACb,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAA;QAC1B,IAAA,eAAM,EAAC,GAAG,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QACnC,IAAA,eAAM,EAAC,GAAG,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IACpC,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=tokenizer.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokenizer.test.d.ts","sourceRoot":"","sources":["../../src/__tests__/tokenizer.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,51 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const vitest_1 = require("vitest");
4
+ const tokenizer_1 = require("../tokenizer");
5
+ (0, vitest_1.describe)('defaultTokenizer', () => {
6
+ (0, vitest_1.it)('lowercases input', () => {
7
+ const tokens = (0, tokenizer_1.defaultTokenizer)('Hello World');
8
+ (0, vitest_1.expect)(tokens.every(t => t === t.toLowerCase())).toBe(true);
9
+ });
10
+ (0, vitest_1.it)('removes stopwords', () => {
11
+ const tokens = (0, tokenizer_1.defaultTokenizer)('the cat sat on the mat');
12
+ (0, vitest_1.expect)(tokens).not.toContain('the');
13
+ (0, vitest_1.expect)(tokens).not.toContain('on');
14
+ });
15
+ (0, vitest_1.it)('removes pure numbers', () => {
16
+ const tokens = (0, tokenizer_1.defaultTokenizer)('I have 42 cats');
17
+ (0, vitest_1.expect)(tokens).not.toContain('42');
18
+ });
19
+ (0, vitest_1.it)('splits on non-word characters', () => {
20
+ const tokens = (0, tokenizer_1.defaultTokenizer)('hello-world foo.bar');
21
+ (0, vitest_1.expect)(tokens.length).toBeGreaterThanOrEqual(2);
22
+ });
23
+ (0, vitest_1.it)('applies Porter stemmer (running → run)', () => {
24
+ const tokens = (0, tokenizer_1.defaultTokenizer)('running');
25
+ // porter stemmer maps running → run
26
+ (0, vitest_1.expect)(tokens).toContain('run');
27
+ });
28
+ (0, vitest_1.it)('applies Porter stemmer (cats → cat)', () => {
29
+ const tokens = (0, tokenizer_1.defaultTokenizer)('cats');
30
+ (0, vitest_1.expect)(tokens).toContain('cat');
31
+ });
32
+ });
33
+ (0, vitest_1.describe)('tokenize', () => {
34
+ (0, vitest_1.it)('uses custom tokenizer when provided', () => {
35
+ const custom = (t) => t.split(',');
36
+ const tokens = (0, tokenizer_1.tokenize)('a,b,c', { tokenizer: custom });
37
+ (0, vitest_1.expect)(tokens).toEqual(['a', 'b', 'c']);
38
+ });
39
+ (0, vitest_1.it)('applies extra stopwords', () => {
40
+ const tokens = (0, tokenizer_1.tokenize)('cat dog bird', { stem: false, stopwords: ['dog'] });
41
+ (0, vitest_1.expect)(tokens).not.toContain('dog');
42
+ (0, vitest_1.expect)(tokens).toContain('cat');
43
+ (0, vitest_1.expect)(tokens).toContain('bird');
44
+ });
45
+ (0, vitest_1.it)('skips stemming when stem=false', () => {
46
+ const tokens = (0, tokenizer_1.tokenize)('running cats', { stem: false });
47
+ (0, vitest_1.expect)(tokens).toContain('running');
48
+ (0, vitest_1.expect)(tokens).toContain('cats');
49
+ });
50
+ });
51
+ //# sourceMappingURL=tokenizer.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokenizer.test.js","sourceRoot":"","sources":["../../src/__tests__/tokenizer.test.ts"],"names":[],"mappings":";;AAAA,mCAA6C;AAC7C,4CAAyD;AAEzD,IAAA,iBAAQ,EAAC,kBAAkB,EAAE,GAAG,EAAE;IAChC,IAAA,WAAE,EAAC,kBAAkB,EAAE,GAAG,EAAE;QAC1B,MAAM,MAAM,GAAG,IAAA,4BAAgB,EAAC,aAAa,CAAC,CAAA;QAC9C,IAAA,eAAM,EAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAC7D,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,mBAAmB,EAAE,GAAG,EAAE;QAC3B,MAAM,MAAM,GAAG,IAAA,4BAAgB,EAAC,wBAAwB,CAAC,CAAA;QACzD,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;QACnC,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;IACpC,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,sBAAsB,EAAE,GAAG,EAAE;QAC9B,MAAM,MAAM,GAAG,IAAA,4BAAgB,EAAC,gBAAgB,CAAC,CAAA;QACjD,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;IACpC,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,+BAA+B,EAAE,GAAG,EAAE;QACvC,MAAM,MAAM,GAAG,IAAA,4BAAgB,EAAC,qBAAqB,CAAC,CAAA;QACtD,IAAA,eAAM,EAAC,MAAM,CAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAA;IACjD,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,MAAM,GAAG,IAAA,4BAAgB,EAAC,SAAS,CAAC,CAAA;QAC1C,oCAAoC;QACpC,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;IACjC,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,MAAM,GAAG,IAAA,4BAAgB,EAAC,MAAM,CAAC,CAAA;QACvC,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;IACjC,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA;AAEF,IAAA,iBAAQ,EAAC,UAAU,EAAE,GAAG,EAAE;IACxB,IAAA,WAAE,EAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,MAAM,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QAC1C,MAAM,MAAM,GAAG,IAAA,oBAAQ,EAAC,OAAO,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,CAAA;QACvD,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAA;IACzC,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,MAAM,GAAG,IAAA,oBAAQ,EAAC,cAAc,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAA;QAC5E,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;QACnC,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;QAC/B,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;IAClC,CAAC,CAAC,CAAA;IAEF,IAAA,WAAE,EAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,MAAM,MAAM,GAAG,IAAA,oBAAQ,EAAC,cAAc,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAA;QACxD,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAA;QACnC,IAAA,eAAM,EAAC,MAAM,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;IAClC,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
package/dist/bm25.d.ts ADDED
@@ -0,0 +1,3 @@
1
+ import type { BM25Options, BM25Encoder } from './types';
2
+ export declare function createBM25(options?: BM25Options): BM25Encoder;
3
+ //# sourceMappingURL=bm25.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../src/bm25.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAE,WAAW,EAA0B,MAAM,SAAS,CAAA;AAE/E,wBAAgB,UAAU,CAAC,OAAO,CAAC,EAAE,WAAW,GAAG,WAAW,CA0G7D"}
package/dist/bm25.js ADDED
@@ -0,0 +1,99 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createBM25 = createBM25;
4
+ const tokenizer_1 = require("./tokenizer");
5
+ const vocab_1 = require("./vocab");
6
+ function createBM25(options) {
7
+ const k1 = options?.k1 ?? 1.5;
8
+ const b = options?.b ?? 0.75;
9
+ const stemOpt = options?.stem ?? true;
10
+ const tokenizerFn = options?.tokenizer;
11
+ const extraStopwords = options?.stopwords ?? [];
12
+ const vocab = new vocab_1.Vocabulary();
13
+ // df[termId] = number of documents containing that term
14
+ const df = new Map();
15
+ let N = 0;
16
+ let avgdl = 0;
17
+ let totalTokens = 0;
18
+ let fitted = false;
19
+ function tokenizeText(text) {
20
+ return (0, tokenizer_1.tokenize)(text, { stem: stemOpt, tokenizer: tokenizerFn, stopwords: extraStopwords });
21
+ }
22
+ function fit(documents) {
23
+ N = documents.length;
24
+ let totalLen = 0;
25
+ const tokenizedDocs = [];
26
+ for (const doc of documents) {
27
+ const tokens = tokenizeText(doc);
28
+ tokenizedDocs.push(tokens);
29
+ totalLen += tokens.length;
30
+ totalTokens += tokens.length;
31
+ // Register all terms in vocab first
32
+ const seen = new Set();
33
+ for (const token of tokens) {
34
+ const id = vocab.getOrAdd(token);
35
+ if (!seen.has(id)) {
36
+ seen.add(id);
37
+ df.set(id, (df.get(id) ?? 0) + 1);
38
+ }
39
+ }
40
+ }
41
+ avgdl = N > 0 ? totalLen / N : 0;
42
+ fitted = true;
43
+ }
44
+ function computeBM25(tokens, dl) {
45
+ if (!fitted)
46
+ throw new Error('BM25Encoder must be fit() before encode()');
47
+ // Count term frequencies
48
+ const tf = new Map();
49
+ for (const token of tokens) {
50
+ const id = vocab.getId(token);
51
+ if (id !== undefined) {
52
+ tf.set(id, (tf.get(id) ?? 0) + 1);
53
+ }
54
+ }
55
+ const entries = [];
56
+ for (const [termId, termTf] of tf) {
57
+ const termDf = df.get(termId) ?? 0;
58
+ const idf = Math.log((N - termDf + 0.5) / (termDf + 0.5) + 1);
59
+ const score = idf * (termTf * (k1 + 1)) / (termTf + k1 * (1 - b + b * dl / avgdl));
60
+ if (score > 0) {
61
+ entries.push({ idx: termId, val: score });
62
+ }
63
+ }
64
+ entries.sort((a, c) => a.idx - c.idx);
65
+ return {
66
+ indices: entries.map(e => e.idx),
67
+ values: entries.map(e => e.val),
68
+ };
69
+ }
70
+ function encode(text) {
71
+ const tokens = tokenizeText(text);
72
+ return computeBM25(tokens, tokens.length);
73
+ }
74
+ function encodeBatch(texts) {
75
+ return texts.map(t => encode(t));
76
+ }
77
+ function encodeQuery(text) {
78
+ // Query encoding: no length normalization (treat avgdl as doc length)
79
+ if (!fitted)
80
+ throw new Error('BM25Encoder must be fit() before encodeQuery()');
81
+ const tokens = tokenizeText(text);
82
+ return computeBM25(tokens, avgdl);
83
+ }
84
+ function serialize() {
85
+ return JSON.stringify({
86
+ N,
87
+ avgdl,
88
+ totalTokens,
89
+ df: Object.fromEntries(df),
90
+ vocab: vocab.serialize(),
91
+ options: { k1, b, stem: stemOpt, stopwords: extraStopwords },
92
+ });
93
+ }
94
+ function getStats() {
95
+ return { N, avgdl, vocabSize: vocab.size, totalTokens };
96
+ }
97
+ return { fit, encode, encodeBatch, encodeQuery, serialize, getStats };
98
+ }
99
+ //# sourceMappingURL=bm25.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bm25.js","sourceRoot":"","sources":["../src/bm25.ts"],"names":[],"mappings":";;AAIA,gCA0GC;AA9GD,2CAAsC;AACtC,mCAAoC;AAGpC,SAAgB,UAAU,CAAC,OAAqB;IAC9C,MAAM,EAAE,GAAG,OAAO,EAAE,EAAE,IAAI,GAAG,CAAA;IAC7B,MAAM,CAAC,GAAG,OAAO,EAAE,CAAC,IAAI,IAAI,CAAA;IAC5B,MAAM,OAAO,GAAG,OAAO,EAAE,IAAI,IAAI,IAAI,CAAA;IACrC,MAAM,WAAW,GAAG,OAAO,EAAE,SAAS,CAAA;IACtC,MAAM,cAAc,GAAG,OAAO,EAAE,SAAS,IAAI,EAAE,CAAA;IAE/C,MAAM,KAAK,GAAG,IAAI,kBAAU,EAAE,CAAA;IAC9B,wDAAwD;IACxD,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAA;IACpC,IAAI,CAAC,GAAG,CAAC,CAAA;IACT,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,IAAI,WAAW,GAAG,CAAC,CAAA;IACnB,IAAI,MAAM,GAAG,KAAK,CAAA;IAElB,SAAS,YAAY,CAAC,IAAY;QAChC,OAAO,IAAA,oBAAQ,EAAC,IAAI,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,SAAS,EAAE,cAAc,EAAE,CAAC,CAAA;IAC7F,CAAC;IAED,SAAS,GAAG,CAAC,SAAmB;QAC9B,CAAC,GAAG,SAAS,CAAC,MAAM,CAAA;QACpB,IAAI,QAAQ,GAAG,CAAC,CAAA;QAChB,MAAM,aAAa,GAAe,EAAE,CAAA;QAEpC,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;YAC5B,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,CAAC,CAAA;YAChC,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YAC1B,QAAQ,IAAI,MAAM,CAAC,MAAM,CAAA;YACzB,WAAW,IAAI,MAAM,CAAC,MAAM,CAAA;YAE5B,oCAAoC;YACpC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAA;YAC9B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,EAAE,GAAG,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAA;gBAChC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;oBAClB,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;oBACZ,EAAE,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;gBACnC,CAAC;YACH,CAAC;QACH,CAAC;QAED,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;QAChC,MAAM,GAAG,IAAI,CAAA;IACf,CAAC;IAED,SAAS,WAAW,CAAC,MAAgB,EAAE,EAAU;QAC/C,IAAI,CAAC,MAAM;YAAE,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAA;QAEzE,yBAAyB;QACzB,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAA;QACpC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,MAAM,EAAE,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;YAC7B,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;gBACrB,EAAE,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;YACnC,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GAAmC,EAAE,CAAA;QAClD,KAAK,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC;YAClC,MAAM,MAAM,GAAG,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;YAClC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAA;YAC7D,MAAM,KAAK,GAAG,GAAG,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,CAAC,CAAA;YAClF,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBACd,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,CAAA;YAC3C,CAAC;QACH,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;QACrC,OAAO;YACL,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;YAChC,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;SAChC,CAAA;IACH,CAAC;IAED,SAAS,MAAM,CAAC,IAAY;QAC1B,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAA;QACjC,OAAO,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,CAAA;IAC3C,CAAC;IAED,SAAS,WAAW,CAAC,KAAe;QAClC,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;IAClC,CAAC;IAED,SAAS,WAAW,CAAC,IAAY;QAC/B,sEAAsE;QACtE,IAAI,CAAC,MAAM;YAAE,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAA;QAC9E,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAA;QACjC,OAAO,WAAW,CAAC,MAAM,EAAE,KAAK,CAAC,CAAA;IACnC,CAAC;IAED,SAAS,SAAS;QAChB,OAAO,IAAI,CAAC,SAAS,CAAC;YACpB,CAAC;YACD,KAAK;YACL,WAAW;YACX,EAAE,EAAE,MAAM,CAAC,WAAW,CAAC,EAAE,CAAC;YAC1B,KAAK,EAAE,KAAK,CAAC,SAAS,EAAE;YACxB,OAAO,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,cAAc,EAAE;SAC7D,CAAC,CAAA;IACJ,CAAC;IAED,SAAS,QAAQ;QACf,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,CAAC,IAAI,EAAE,WAAW,EAAE,CAAA;IACzD,CAAC;IAED,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,WAAW,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAA;AACvE,CAAC"}
@@ -0,0 +1,4 @@
1
+ export { createBM25 } from './bm25';
2
+ export { createTFIDF } from './tfidf';
3
+ export type { SparseVector, FitStats, BM25Options, TFIDFOptions, TokenizerFn, BM25Encoder, TFIDFEncoder, } from './types';
4
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAA;AACnC,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAA;AACrC,YAAY,EACV,YAAY,EACZ,QAAQ,EACR,WAAW,EACX,YAAY,EACZ,WAAW,EACX,WAAW,EACX,YAAY,GACb,MAAM,SAAS,CAAA"}
package/dist/index.js ADDED
@@ -0,0 +1,9 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createTFIDF = exports.createBM25 = void 0;
4
+ // sparse-encode - Generate BM25 and TF-IDF sparse vectors in JavaScript
5
+ var bm25_1 = require("./bm25");
6
+ Object.defineProperty(exports, "createBM25", { enumerable: true, get: function () { return bm25_1.createBM25; } });
7
+ var tfidf_1 = require("./tfidf");
8
+ Object.defineProperty(exports, "createTFIDF", { enumerable: true, get: function () { return tfidf_1.createTFIDF; } });
9
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,wEAAwE;AACxE,+BAAmC;AAA1B,kGAAA,UAAU,OAAA;AACnB,iCAAqC;AAA5B,oGAAA,WAAW,OAAA"}
@@ -0,0 +1,2 @@
1
+ export declare function stem(word: string): string;
2
+ //# sourceMappingURL=porter-stemmer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"porter-stemmer.d.ts","sourceRoot":"","sources":["../src/porter-stemmer.ts"],"names":[],"mappings":"AA+CA,wBAAgB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CA+IzC"}
@@ -0,0 +1,195 @@
1
+ "use strict";
2
+ // Porter Stemmer — classic algorithm (Porter 1980)
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ exports.stem = stem;
5
+ function hasCVC(word) {
6
+ // ends in consonant-vowel-consonant where final consonant is not w, x, y
7
+ if (word.length < 3)
8
+ return false;
9
+ const last = word[word.length - 1];
10
+ if ('wxy'.includes(last))
11
+ return false;
12
+ const vowels = new Set(['a', 'e', 'i', 'o', 'u']);
13
+ const c = word.length - 1;
14
+ return !vowels.has(word[c]) && vowels.has(word[c - 1]) && !vowels.has(word[c - 2]);
15
+ }
16
+ function containsVowel(word) {
17
+ for (const ch of word) {
18
+ if ('aeiou'.includes(ch))
19
+ return true;
20
+ if (ch === 'y' && word.indexOf(ch) > 0)
21
+ return true;
22
+ }
23
+ return false;
24
+ }
25
+ function measure(word) {
26
+ // count VC sequences
27
+ const vowels = new Set(['a', 'e', 'i', 'o', 'u']);
28
+ let m = 0;
29
+ let inVowel = false;
30
+ for (let i = 0; i < word.length; i++) {
31
+ const ch = word[i];
32
+ const isVowel = vowels.has(ch) || (ch === 'y' && i > 0);
33
+ if (isVowel) {
34
+ inVowel = true;
35
+ }
36
+ else {
37
+ if (inVowel) {
38
+ m++;
39
+ inVowel = false;
40
+ }
41
+ }
42
+ }
43
+ return m;
44
+ }
45
+ function endsDoubleConsonant(word) {
46
+ if (word.length < 2)
47
+ return false;
48
+ const last = word[word.length - 1];
49
+ const prev = word[word.length - 2];
50
+ return last === prev && !'aeiou'.includes(last);
51
+ }
52
+ function stem(word) {
53
+ if (word.length <= 2)
54
+ return word;
55
+ let w = word.toLowerCase();
56
+ // Step 1a
57
+ if (w.endsWith('sses')) {
58
+ w = w.slice(0, -2);
59
+ }
60
+ else if (w.endsWith('ies')) {
61
+ w = w.slice(0, -2); // ies → i
62
+ }
63
+ else if (w.endsWith('ss')) {
64
+ // keep
65
+ }
66
+ else if (w.endsWith('s')) {
67
+ w = w.slice(0, -1);
68
+ }
69
+ // Step 1b
70
+ let step1bTriggered = false;
71
+ if (w.endsWith('eed')) {
72
+ const stem1b = w.slice(0, -3);
73
+ if (measure(stem1b) > 0) {
74
+ w = w.slice(0, -1); // eed → ee
75
+ }
76
+ }
77
+ else if (w.endsWith('ed')) {
78
+ const stem1b = w.slice(0, -2);
79
+ if (containsVowel(stem1b)) {
80
+ w = stem1b;
81
+ step1bTriggered = true;
82
+ }
83
+ }
84
+ else if (w.endsWith('ing')) {
85
+ const stem1b = w.slice(0, -3);
86
+ if (containsVowel(stem1b)) {
87
+ w = stem1b;
88
+ step1bTriggered = true;
89
+ }
90
+ }
91
+ if (step1bTriggered) {
92
+ if (w.endsWith('at') || w.endsWith('bl') || w.endsWith('iz')) {
93
+ w = w + 'e';
94
+ }
95
+ else if (endsDoubleConsonant(w) && !w.endsWith('l') && !w.endsWith('s') && !w.endsWith('z')) {
96
+ w = w.slice(0, -1);
97
+ }
98
+ else if (measure(w) === 1 && hasCVC(w)) {
99
+ w = w + 'e';
100
+ }
101
+ }
102
+ // Step 1c
103
+ if (w.endsWith('y') && w.length > 2) {
104
+ const before = w.slice(0, -1);
105
+ if (containsVowel(before)) {
106
+ w = before + 'i';
107
+ }
108
+ }
109
+ // Step 2
110
+ const step2Map = [
111
+ ['ational', 'ate'],
112
+ ['tional', 'tion'],
113
+ ['enci', 'ence'],
114
+ ['anci', 'ance'],
115
+ ['izer', 'ize'],
116
+ ['abli', 'able'],
117
+ ['alli', 'al'],
118
+ ['entli', 'ent'],
119
+ ['eli', 'e'],
120
+ ['ousli', 'ous'],
121
+ ['ization', 'ize'],
122
+ ['ation', 'ate'],
123
+ ['ator', 'ate'],
124
+ ['alism', 'al'],
125
+ ['iveness', 'ive'],
126
+ ['fulness', 'ful'],
127
+ ['ousness', 'ous'],
128
+ ['aliti', 'al'],
129
+ ['iviti', 'ive'],
130
+ ['biliti', 'ble'],
131
+ ];
132
+ for (const [suffix, replacement] of step2Map) {
133
+ if (w.endsWith(suffix)) {
134
+ const base = w.slice(0, -suffix.length);
135
+ if (measure(base) > 0) {
136
+ w = base + replacement;
137
+ }
138
+ break;
139
+ }
140
+ }
141
+ // Step 3
142
+ const step3Map = [
143
+ ['icate', 'ic'],
144
+ ['ative', ''],
145
+ ['alize', 'al'],
146
+ ['iciti', 'ic'],
147
+ ['ical', 'ic'],
148
+ ['ful', ''],
149
+ ['ness', ''],
150
+ ];
151
+ for (const [suffix, replacement] of step3Map) {
152
+ if (w.endsWith(suffix)) {
153
+ const base = w.slice(0, -suffix.length);
154
+ if (measure(base) > 0) {
155
+ w = base + replacement;
156
+ }
157
+ break;
158
+ }
159
+ }
160
+ // Step 4
161
+ const step4Suffixes = [
162
+ 'ement', 'ment', 'ance', 'ence', 'able', 'ible', 'ant', 'ent',
163
+ 'ion', 'ism', 'ate', 'iti', 'ous', 'ive', 'ize', 'al', 'er', 'ic',
164
+ ];
165
+ for (const suffix of step4Suffixes) {
166
+ if (w.endsWith(suffix)) {
167
+ const base = w.slice(0, -suffix.length);
168
+ if (suffix === 'ion') {
169
+ if (measure(base) > 1 && (base.endsWith('s') || base.endsWith('t'))) {
170
+ w = base;
171
+ }
172
+ }
173
+ else if (measure(base) > 1) {
174
+ w = base;
175
+ }
176
+ break;
177
+ }
178
+ }
179
+ // Step 5a
180
+ if (w.endsWith('e')) {
181
+ const base = w.slice(0, -1);
182
+ if (measure(base) > 1) {
183
+ w = base;
184
+ }
185
+ else if (measure(base) === 1 && !hasCVC(base)) {
186
+ w = base;
187
+ }
188
+ }
189
+ // Step 5b
190
+ if (w.endsWith('ll') && measure(w.slice(0, -1)) > 1) {
191
+ w = w.slice(0, -1);
192
+ }
193
+ return w;
194
+ }
195
+ //# sourceMappingURL=porter-stemmer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"porter-stemmer.js","sourceRoot":"","sources":["../src/porter-stemmer.ts"],"names":[],"mappings":";AAAA,mDAAmD;;AA+CnD,oBA+IC;AA5LD,SAAS,MAAM,CAAC,IAAY;IAC1B,yEAAyE;IACzE,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,KAAK,CAAA;IACjC,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IAClC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAA;IACtC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAA;IACjD,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAA;IACzB,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;AACpF,CAAC;AAED,SAAS,aAAa,CAAC,IAAY;IACjC,KAAK,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC;QACtB,IAAI,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;YAAE,OAAO,IAAI,CAAA;QACrC,IAAI,EAAE,KAAK,GAAG,IAAI,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,GAAG,CAAC;YAAE,OAAO,IAAI,CAAA;IACrD,CAAC;IACD,OAAO,KAAK,CAAA;AACd,CAAC;AAED,SAAS,OAAO,CAAC,IAAY;IAC3B,qBAAqB;IACrB,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAA;IACjD,IAAI,CAAC,GAAG,CAAC,CAAA;IACT,IAAI,OAAO,GAAG,KAAK,CAAA;IACnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QAClB,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;QACvD,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,GAAG,IAAI,CAAA;QAChB,CAAC;aAAM,CAAC;YACN,IAAI,OAAO,EAAE,CAAC;gBACZ,CAAC,EAAE,CAAA;gBACH,OAAO,GAAG,KAAK,CAAA;YACjB,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,CAAC,CAAA;AACV,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAY;IACvC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,KAAK,CAAA;IACjC,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IAClC,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IAClC,OAAO,IAAI,KAAK,IAAI,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAA;AACjD,CAAC;AAED,SAAgB,IAAI,CAAC,IAAY;IAC/B,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,IAAI,CAAA;IAEjC,IAAI,CAAC,GAAG,IAAI,CAAC,WAAW,EAAE,CAAA;IAE1B,UAAU;IACV,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACvB,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;IACpB,CAAC;SAAM,IAAI,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC7B,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA,CAAC,UAAU;IAC/B,CAAC;SAAM,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QAC5B,OAAO;IACT,CAAC;SAAM,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC3B,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;IACpB,CAAC;IAED,UAAU;IACV,IAAI,eAAe,GAAG,KAAK,CAAA;IAC3B,IAAI,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACtB,MAAM,MAAM,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;QAC7B,IAAI,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA,CAAC,WAAW;QAChC,CAAC;IACH,CAAC;SAAM,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QAC5B,MAAM,MAAM,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;QAC7B,IAAI,aAAa,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,CAAC,GAAG,MAAM,CAAA;YACV,eAAe,GAAG,IAAI,CAAA;QACxB,CAAC;IACH,CAAC;SAAM,IAAI,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;QAC7B,IAAI,aAAa,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,CAAC,GAAG,MAAM,CAAA;YACV,eAAe,GAAG,IAAI,CAAA;QACxB,CAAC;IACH,CAAC;IAED,IAAI,eAAe,EAAE,CAAC;QACpB,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7D,CAAC,GAAG,CAAC,GAAG,GAAG,CAAA;QACb,CAAC;aAAM,IAAI,mBAAmB,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAC9F,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;QACpB,CAAC;aAAM,IAAI,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;YACzC,CAAC,GAAG,CAAC,GAAG,GAAG,CAAA;QACb,CAAC;IACH,CAAC;IAED,UAAU;IACV,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpC,MAAM,MAAM,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;QAC7B,IAAI,aAAa,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,CAAC,GAAG,MAAM,GAAG,GAAG,CAAA;QAClB,CAAC;IACH,CAAC;IAED,SAAS;IACT,MAAM,QAAQ,GAAuB;QACnC,CAAC,SAAS,EAAE,KAAK,CAAC;QAClB,CAAC,QAAQ,EAAE,MAAM,CAAC;QAClB,CAAC,MAAM,EAAE,MAAM,CAAC;QAChB,CAAC,MAAM,EAAE,MAAM,CAAC;QAChB,CAAC,MAAM,EAAE,KAAK,CAAC;QACf,CAAC,MAAM,EAAE,MAAM,CAAC;QAChB,CAAC,MAAM,EAAE,IAAI,CAAC;QACd,CAAC,OAAO,EAAE,KAAK,CAAC;QAChB,CAAC,KAAK,EAAE,GAAG,CAAC;QACZ,CAAC,OAAO,EAAE,KAAK,CAAC;QAChB,CAAC,SAAS,EAAE,KAAK,CAAC;QAClB,CAAC,OAAO,EAAE,KAAK,CAAC;QAChB,CAAC,MAAM,EAAE,KAAK,CAAC;QACf,CAAC,OAAO,EAAE,IAAI,CAAC;QACf,CAAC,SAAS,EAAE,KAAK,CAAC;QAClB,CAAC,SAAS,EAAE,KAAK,CAAC;QAClB,CAAC,SAAS,EAAE,KAAK,CAAC;QAClB,CAAC,OAAO,EAAE,IAAI,CAAC;QACf,CAAC,OAAO,EAAE,KAAK,CAAC;QAChB,CAAC,QAAQ,EAAE,KAAK,CAAC;KAClB,CAAA;IACD,KAAK,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,IAAI,QAAQ,EAAE,CAAC;QAC7C,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YACvC,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACtB,CAAC,GAAG,IAAI,GAAG,WAAW,CAAA;YACxB,CAAC;YACD,MAAK;QACP,CAAC;IACH,CAAC;IAED,SAAS;IACT,MAAM,QAAQ,GAAuB;QACnC,CAAC,OAAO,EAAE,IAAI,CAAC;QACf,CAAC,OAAO,EAAE,EAAE,CAAC;QACb,CAAC,OAAO,EAAE,IAAI,CAAC;QACf,CAAC,OAAO,EAAE,IAAI,CAAC;QACf,CAAC,MAAM,EAAE,IAAI,CAAC;QACd,CAAC,KAAK,EAAE,EAAE,CAAC;QACX,CAAC,MAAM,EAAE,EAAE,CAAC;KACb,CAAA;IACD,KAAK,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,IAAI,QAAQ,EAAE,CAAC;QAC7C,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YACvC,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACtB,CAAC,GAAG,IAAI,GAAG,WAAW,CAAA;YACxB,CAAC;YACD,MAAK;QACP,CAAC;IACH,CAAC;IAED,SAAS;IACT,MAAM,aAAa,GAAG;QACpB,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK;QAC7D,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;KAClE,CAAA;IACD,KAAK,MAAM,MAAM,IAAI,aAAa,EAAE,CAAC;QACnC,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YACvC,IAAI,MAAM,KAAK,KAAK,EAAE,CAAC;gBACrB,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;oBACpE,CAAC,GAAG,IAAI,CAAA;gBACV,CAAC;YACH,CAAC;iBAAM,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC7B,CAAC,GAAG,IAAI,CAAA;YACV,CAAC;YACD,MAAK;QACP,CAAC;IACH,CAAC;IAED,UAAU;IACV,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACpB,MAAM,IAAI,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;QAC3B,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YACtB,CAAC,GAAG,IAAI,CAAA;QACV,CAAC;aAAM,IAAI,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;YAChD,CAAC,GAAG,IAAI,CAAA;QACV,CAAC;IACH,CAAC;IAED,UAAU;IACV,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;QACpD,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;IACpB,CAAC;IAED,OAAO,CAAC,CAAA;AACV,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { TFIDFOptions, TFIDFEncoder } from './types';
2
+ export declare function createTFIDF(options?: TFIDFOptions): TFIDFEncoder;
3
+ //# sourceMappingURL=tfidf.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tfidf.d.ts","sourceRoot":"","sources":["../src/tfidf.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAA0B,MAAM,SAAS,CAAA;AAEjF,wBAAgB,WAAW,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,YAAY,CAyGhE"}
package/dist/tfidf.js ADDED
@@ -0,0 +1,98 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createTFIDF = createTFIDF;
4
+ const tokenizer_1 = require("./tokenizer");
5
+ const vocab_1 = require("./vocab");
6
+ function createTFIDF(options) {
7
+ const stemOpt = options?.stem ?? true;
8
+ const tokenizerFn = options?.tokenizer;
9
+ const extraStopwords = options?.stopwords ?? [];
10
+ const sublinearTf = options?.sublinearTf ?? false;
11
+ const vocab = new vocab_1.Vocabulary();
12
+ const df = new Map();
13
+ let N = 0;
14
+ let totalTokens = 0;
15
+ let avgdl = 0;
16
+ let fitted = false;
17
+ function tokenizeText(text) {
18
+ return (0, tokenizer_1.tokenize)(text, { stem: stemOpt, tokenizer: tokenizerFn, stopwords: extraStopwords });
19
+ }
20
+ function fit(documents) {
21
+ N = documents.length;
22
+ let totalLen = 0;
23
+ for (const doc of documents) {
24
+ const tokens = tokenizeText(doc);
25
+ totalLen += tokens.length;
26
+ totalTokens += tokens.length;
27
+ const seen = new Set();
28
+ for (const token of tokens) {
29
+ const id = vocab.getOrAdd(token);
30
+ if (!seen.has(id)) {
31
+ seen.add(id);
32
+ df.set(id, (df.get(id) ?? 0) + 1);
33
+ }
34
+ }
35
+ }
36
+ avgdl = N > 0 ? totalLen / N : 0;
37
+ fitted = true;
38
+ }
39
+ function encode(text) {
40
+ if (!fitted)
41
+ throw new Error('TFIDFEncoder must be fit() before encode()');
42
+ const tokens = tokenizeText(text);
43
+ const dl = tokens.length;
44
+ if (dl === 0)
45
+ return { indices: [], values: [] };
46
+ // Count raw term frequencies
47
+ const rawTf = new Map();
48
+ for (const token of tokens) {
49
+ const id = vocab.getId(token);
50
+ if (id !== undefined) {
51
+ rawTf.set(id, (rawTf.get(id) ?? 0) + 1);
52
+ }
53
+ }
54
+ const entries = [];
55
+ for (const [termId, count] of rawTf) {
56
+ const tf = sublinearTf ? 1 + Math.log(count) : count / dl;
57
+ const termDf = df.get(termId) ?? 0;
58
+ const idf = Math.log((N + 1) / (termDf + 1)) + 1;
59
+ const score = tf * idf;
60
+ if (score > 0) {
61
+ entries.push({ idx: termId, val: score });
62
+ }
63
+ }
64
+ // L2 normalize
65
+ const norm = Math.sqrt(entries.reduce((acc, e) => acc + e.val * e.val, 0));
66
+ if (norm > 0) {
67
+ for (const e of entries)
68
+ e.val /= norm;
69
+ }
70
+ entries.sort((a, c) => a.idx - c.idx);
71
+ return {
72
+ indices: entries.map(e => e.idx),
73
+ values: entries.map(e => e.val),
74
+ };
75
+ }
76
+ function encodeBatch(texts) {
77
+ return texts.map(t => encode(t));
78
+ }
79
+ function encodeQuery(text) {
80
+ // For queries, same as encode
81
+ return encode(text);
82
+ }
83
+ function serialize() {
84
+ return JSON.stringify({
85
+ N,
86
+ avgdl,
87
+ totalTokens,
88
+ df: Object.fromEntries(df),
89
+ vocab: vocab.serialize(),
90
+ options: { stem: stemOpt, sublinearTf, stopwords: extraStopwords },
91
+ });
92
+ }
93
+ function getStats() {
94
+ return { N, avgdl, vocabSize: vocab.size, totalTokens };
95
+ }
96
+ return { fit, encode, encodeBatch, encodeQuery, serialize, getStats };
97
+ }
98
+ //# sourceMappingURL=tfidf.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tfidf.js","sourceRoot":"","sources":["../src/tfidf.ts"],"names":[],"mappings":";;AAIA,kCAyGC;AA7GD,2CAAsC;AACtC,mCAAoC;AAGpC,SAAgB,WAAW,CAAC,OAAsB;IAChD,MAAM,OAAO,GAAG,OAAO,EAAE,IAAI,IAAI,IAAI,CAAA;IACrC,MAAM,WAAW,GAAG,OAAO,EAAE,SAAS,CAAA;IACtC,MAAM,cAAc,GAAG,OAAO,EAAE,SAAS,IAAI,EAAE,CAAA;IAC/C,MAAM,WAAW,GAAG,OAAO,EAAE,WAAW,IAAI,KAAK,CAAA;IAEjD,MAAM,KAAK,GAAG,IAAI,kBAAU,EAAE,CAAA;IAC9B,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAA;IACpC,IAAI,CAAC,GAAG,CAAC,CAAA;IACT,IAAI,WAAW,GAAG,CAAC,CAAA;IACnB,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,IAAI,MAAM,GAAG,KAAK,CAAA;IAElB,SAAS,YAAY,CAAC,IAAY;QAChC,OAAO,IAAA,oBAAQ,EAAC,IAAI,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,SAAS,EAAE,cAAc,EAAE,CAAC,CAAA;IAC7F,CAAC;IAED,SAAS,GAAG,CAAC,SAAmB;QAC9B,CAAC,GAAG,SAAS,CAAC,MAAM,CAAA;QACpB,IAAI,QAAQ,GAAG,CAAC,CAAA;QAEhB,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;YAC5B,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,CAAC,CAAA;YAChC,QAAQ,IAAI,MAAM,CAAC,MAAM,CAAA;YACzB,WAAW,IAAI,MAAM,CAAC,MAAM,CAAA;YAE5B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAA;YAC9B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,EAAE,GAAG,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAA;gBAChC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;oBAClB,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;oBACZ,EAAE,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;gBACnC,CAAC;YACH,CAAC;QACH,CAAC;QAED,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;QAChC,MAAM,GAAG,IAAI,CAAA;IACf,CAAC;IAED,SAAS,MAAM,CAAC,IAAY;QAC1B,IAAI,CAAC,MAAM;YAAE,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAA;QAE1E,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAA;QACjC,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,CAAA;QACxB,IAAI,EAAE,KAAK,CAAC;YAAE,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAA;QAEhD,6BAA6B;QAC7B,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAA;QACvC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,MAAM,EAAE,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;YAC7B,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;gBACrB,KAAK,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;YACzC,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GAAmC,EAAE,CAAA;QAClD,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,KAAK,EAAE,CAAC;YACpC,MAAM,EAAE,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE,CAAA;YACzD,MAAM,MAAM,GAAG,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;YAClC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;YAChD,MAAM,KAAK,GAAG,EAAE,GAAG,GAAG,CAAA;YACtB,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBACd,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,CAAA;YAC3C,CAAC;QACH,CAAC;QAED,eAAe;QACf,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAA;QAC1E,IAAI,IAAI,GAAG,CAAC,EAAE,CAAC;YACb,KAAK,MAAM,CAAC,IAAI,OAAO;gBAAE,CAAC,CAAC,GAAG,IAAI,IAAI,CAAA;QACxC,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;QACrC,OAAO;YACL,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;YAChC,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;SAChC,CAAA;IACH,CAAC;IAED,SAAS,WAAW,CAAC,KAAe;QAClC,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;IAClC,CAAC;IAED,SAAS,WAAW,CAAC,IAAY;QAC/B,8BAA8B;QAC9B,OAAO,MAAM,CAAC,IAAI,CAAC,CAAA;IACrB,CAAC;IAED,SAAS,SAAS;QAChB,OAAO,IAAI,CAAC,SAAS,CAAC;YACpB,CAAC;YACD,KAAK;YACL,WAAW;YACX,EAAE,EAAE,MAAM,CAAC,WAAW,CAAC,EAAE,CAAC;YAC1B,KAAK,EAAE,KAAK,CAAC,SAAS,EAAE;YACxB,OAAO,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,cAAc,EAAE;SACnE,CAAC,CAAA;IACJ,CAAC;IAED,SAAS,QAAQ;QACf,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,CAAC,IAAI,EAAE,WAAW,EAAE,CAAA;IACzD,CAAC;IAED,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,WAAW,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAA;AACvE,CAAC"}
@@ -0,0 +1,8 @@
1
+ import type { TokenizerFn } from './types';
2
+ export declare function defaultTokenizer(text: string): string[];
3
+ export declare function tokenize(text: string, options?: {
4
+ stopwords?: string[];
5
+ stem?: boolean;
6
+ tokenizer?: TokenizerFn;
7
+ }): string[];
8
+ //# sourceMappingURL=tokenizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../src/tokenizer.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,SAAS,CAAA;AAS1C,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAOvD;AAED,wBAAgB,QAAQ,CACtB,IAAI,EAAE,MAAM,EACZ,OAAO,CAAC,EAAE;IACR,SAAS,CAAC,EAAE,MAAM,EAAE,CAAA;IACpB,IAAI,CAAC,EAAE,OAAO,CAAA;IACd,SAAS,CAAC,EAAE,WAAW,CAAA;CACxB,GACA,MAAM,EAAE,CAyBV"}
@@ -0,0 +1,43 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.defaultTokenizer = defaultTokenizer;
4
+ exports.tokenize = tokenize;
5
+ const porter_stemmer_1 = require("./porter-stemmer");
6
+ const DEFAULT_STOPWORDS = new Set([
7
+ 'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
8
+ 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
9
+ 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
10
+ 'should', 'may', 'might', 'shall', 'can',
11
+ ]);
12
+ function defaultTokenizer(text) {
13
+ return text
14
+ .toLowerCase()
15
+ .split(/[^\w]+/)
16
+ .filter(t => t.length > 0 && !/^\d+$/.test(t))
17
+ .filter(t => !DEFAULT_STOPWORDS.has(t))
18
+ .map(t => (0, porter_stemmer_1.stem)(t));
19
+ }
20
+ function tokenize(text, options) {
21
+ const { tokenizer, stopwords, stem: doStem = true } = options ?? {};
22
+ let tokens;
23
+ if (tokenizer) {
24
+ tokens = tokenizer(text);
25
+ }
26
+ else {
27
+ // Manual pipeline so we can honour stem=false
28
+ tokens = text
29
+ .toLowerCase()
30
+ .split(/[^\w]+/)
31
+ .filter(t => t.length > 0 && !/^\d+$/.test(t))
32
+ .filter(t => !DEFAULT_STOPWORDS.has(t));
33
+ if (doStem) {
34
+ tokens = tokens.map(t => (0, porter_stemmer_1.stem)(t));
35
+ }
36
+ }
37
+ if (stopwords && stopwords.length > 0) {
38
+ const extra = new Set(stopwords.map(s => s.toLowerCase()));
39
+ tokens = tokens.filter(t => !extra.has(t));
40
+ }
41
+ return tokens;
42
+ }
43
+ //# sourceMappingURL=tokenizer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../src/tokenizer.ts"],"names":[],"mappings":";;AAUA,4CAOC;AAED,4BAgCC;AAnDD,qDAAuC;AAGvC,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC;IAChC,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK;IACnE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO;IACrE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO;IACnE,QAAQ,EAAE,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK;CACzC,CAAC,CAAA;AAEF,SAAgB,gBAAgB,CAAC,IAAY;IAC3C,OAAO,IAAI;SACR,WAAW,EAAE;SACb,KAAK,CAAC,QAAQ,CAAC;SACf,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;SAC7C,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;SACtC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAA,qBAAI,EAAC,CAAC,CAAC,CAAC,CAAA;AACtB,CAAC;AAED,SAAgB,QAAQ,CACtB,IAAY,EACZ,OAIC;IAED,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,GAAG,IAAI,EAAE,GAAG,OAAO,IAAI,EAAE,CAAA;IAEnE,IAAI,MAAgB,CAAA;IACpB,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAA;IAC1B,CAAC;SAAM,CAAC;QACN,8CAA8C;QAC9C,MAAM,GAAG,IAAI;aACV,WAAW,EAAE;aACb,KAAK,CAAC,QAAQ,CAAC;aACf,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;aAC7C,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;QAEzC,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAA,qBAAI,EAAC,CAAC,CAAC,CAAC,CAAA;QACnC,CAAC;IACH,CAAC;IAED,IAAI,SAAS,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAA;QAC1D,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;IAC5C,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC"}
@@ -0,0 +1,41 @@
1
+ export interface SparseVector {
2
+ indices: number[];
3
+ values: number[];
4
+ }
5
+ export interface FitStats {
6
+ N: number;
7
+ avgdl: number;
8
+ vocabSize: number;
9
+ totalTokens: number;
10
+ }
11
+ export type TokenizerFn = (text: string) => string[];
12
+ export interface BM25Options {
13
+ k1?: number;
14
+ b?: number;
15
+ tokenizer?: TokenizerFn;
16
+ stopwords?: string[];
17
+ stem?: boolean;
18
+ }
19
+ export interface TFIDFOptions {
20
+ tokenizer?: TokenizerFn;
21
+ stopwords?: string[];
22
+ stem?: boolean;
23
+ sublinearTf?: boolean;
24
+ }
25
+ export interface BM25Encoder {
26
+ fit(documents: string[]): void;
27
+ encode(text: string): SparseVector;
28
+ encodeBatch(texts: string[]): SparseVector[];
29
+ encodeQuery(text: string): SparseVector;
30
+ serialize(): string;
31
+ getStats(): FitStats;
32
+ }
33
+ export interface TFIDFEncoder {
34
+ fit(documents: string[]): void;
35
+ encode(text: string): SparseVector;
36
+ encodeBatch(texts: string[]): SparseVector[];
37
+ encodeQuery(text: string): SparseVector;
38
+ serialize(): string;
39
+ getStats(): FitStats;
40
+ }
41
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,YAAY;IAC3B,OAAO,EAAE,MAAM,EAAE,CAAA;IACjB,MAAM,EAAE,MAAM,EAAE,CAAA;CACjB;AAED,MAAM,WAAW,QAAQ;IACvB,CAAC,EAAE,MAAM,CAAA;IACT,KAAK,EAAE,MAAM,CAAA;IACb,SAAS,EAAE,MAAM,CAAA;IACjB,WAAW,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,MAAM,WAAW,GAAG,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAA;AAEpD,MAAM,WAAW,WAAW;IAC1B,EAAE,CAAC,EAAE,MAAM,CAAA;IACX,CAAC,CAAC,EAAE,MAAM,CAAA;IACV,SAAS,CAAC,EAAE,WAAW,CAAA;IACvB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAA;IACpB,IAAI,CAAC,EAAE,OAAO,CAAA;CACf;AAED,MAAM,WAAW,YAAY;IAC3B,SAAS,CAAC,EAAE,WAAW,CAAA;IACvB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAA;IACpB,IAAI,CAAC,EAAE,OAAO,CAAA;IACd,WAAW,CAAC,EAAE,OAAO,CAAA;CACtB;AAED,MAAM,WAAW,WAAW;IAC1B,GAAG,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,IAAI,CAAA;IAC9B,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,CAAA;IAClC,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,YAAY,EAAE,CAAA;IAC5C,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,CAAA;IACvC,SAAS,IAAI,MAAM,CAAA;IACnB,QAAQ,IAAI,QAAQ,CAAA;CACrB;AAED,MAAM,WAAW,YAAY;IAC3B,GAAG,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,IAAI,CAAA;IAC9B,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,CAAA;IAClC,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,YAAY,EAAE,CAAA;IAC5C,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,CAAA;IACvC,SAAS,IAAI,MAAM,CAAA;IACnB,QAAQ,IAAI,QAAQ,CAAA;CACrB"}
package/dist/types.js ADDED
@@ -0,0 +1,3 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
@@ -0,0 +1,11 @@
1
+ export declare class Vocabulary {
2
+ private termToId;
3
+ private idToTerm;
4
+ getOrAdd(term: string): number;
5
+ getId(term: string): number | undefined;
6
+ get size(): number;
7
+ terms(): string[];
8
+ serialize(): object;
9
+ static deserialize(data: object): Vocabulary;
10
+ }
11
+ //# sourceMappingURL=vocab.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vocab.d.ts","sourceRoot":"","sources":["../src/vocab.ts"],"names":[],"mappings":"AAAA,qBAAa,UAAU;IACrB,OAAO,CAAC,QAAQ,CAA4B;IAC5C,OAAO,CAAC,QAAQ,CAAe;IAE/B,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM;IAU9B,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS;IAIvC,IAAI,IAAI,IAAI,MAAM,CAEjB;IAED,KAAK,IAAI,MAAM,EAAE;IAIjB,SAAS,IAAI,MAAM;IAInB,MAAM,CAAC,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,UAAU;CAQ7C"}
package/dist/vocab.js ADDED
@@ -0,0 +1,38 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.Vocabulary = void 0;
4
+ class Vocabulary {
5
+ termToId = new Map();
6
+ idToTerm = [];
7
+ getOrAdd(term) {
8
+ let id = this.termToId.get(term);
9
+ if (id === undefined) {
10
+ id = this.idToTerm.length;
11
+ this.termToId.set(term, id);
12
+ this.idToTerm.push(term);
13
+ }
14
+ return id;
15
+ }
16
+ getId(term) {
17
+ return this.termToId.get(term);
18
+ }
19
+ get size() {
20
+ return this.idToTerm.length;
21
+ }
22
+ terms() {
23
+ return [...this.idToTerm];
24
+ }
25
+ serialize() {
26
+ return { terms: this.idToTerm };
27
+ }
28
+ static deserialize(data) {
29
+ const v = new Vocabulary();
30
+ const d = data;
31
+ for (const term of d.terms) {
32
+ v.getOrAdd(term);
33
+ }
34
+ return v;
35
+ }
36
+ }
37
+ exports.Vocabulary = Vocabulary;
38
+ //# sourceMappingURL=vocab.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vocab.js","sourceRoot":"","sources":["../src/vocab.ts"],"names":[],"mappings":";;;AAAA,MAAa,UAAU;IACb,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAA;IACpC,QAAQ,GAAa,EAAE,CAAA;IAE/B,QAAQ,CAAC,IAAY;QACnB,IAAI,EAAE,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QAChC,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;YACrB,EAAE,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAA;YACzB,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAA;YAC3B,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC1B,CAAC;QACD,OAAO,EAAE,CAAA;IACX,CAAC;IAED,KAAK,CAAC,IAAY;QAChB,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;IAChC,CAAC;IAED,IAAI,IAAI;QACN,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAA;IAC7B,CAAC;IAED,KAAK;QACH,OAAO,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAA;IAC3B,CAAC;IAED,SAAS;QACP,OAAO,EAAE,KAAK,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAA;IACjC,CAAC;IAED,MAAM,CAAC,WAAW,CAAC,IAAY;QAC7B,MAAM,CAAC,GAAG,IAAI,UAAU,EAAE,CAAA;QAC1B,MAAM,CAAC,GAAG,IAA2B,CAAA;QACrC,KAAK,MAAM,IAAI,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;YAC3B,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAA;QAClB,CAAC;QACD,OAAO,CAAC,CAAA;IACV,CAAC;CACF;AAtCD,gCAsCC"}
package/package.json ADDED
@@ -0,0 +1,33 @@
1
+ {
2
+ "name": "sparse-encode",
3
+ "version": "0.1.0",
4
+ "description": "Generate BM25 and TF-IDF sparse vectors in JavaScript",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "files": [
8
+ "dist"
9
+ ],
10
+ "scripts": {
11
+ "build": "tsc",
12
+ "test": "vitest run",
13
+ "lint": "eslint src/",
14
+ "prepublishOnly": "npm run build"
15
+ },
16
+ "keywords": [],
17
+ "author": "",
18
+ "license": "MIT",
19
+ "engines": {
20
+ "node": ">=18"
21
+ },
22
+ "publishConfig": {
23
+ "access": "public"
24
+ },
25
+ "devDependencies": {
26
+ "@types/node": "^25.5.0",
27
+ "@typescript-eslint/eslint-plugin": "^8.57.1",
28
+ "@typescript-eslint/parser": "^8.57.1",
29
+ "eslint": "^10.1.0",
30
+ "typescript": "^5.9.3",
31
+ "vitest": "^4.1.0"
32
+ }
33
+ }