@stll/text-search 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 stella labs, s.r.o., a limited liability company
4
+ established under the laws of the Czech Republic, with its registered
5
+ office at Nad Porubkou 2355, Poruba, 708 00 Ostrava, Czech Republic,
6
+ Company ID (IČO): 24632872, registered in the Commercial Register
7
+ maintained by the Regional Court in Ostrava, Section C, File No. 103233.
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ The above copyright notice and this permission notice shall be included in all
17
+ copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,183 @@
1
+ <p align="center">
2
+ <img src=".github/assets/banner.png" alt="Stella" width="100%" />
3
+ </p>
4
+
5
+ # @stll/text-search
6
+
7
+ Multi-engine text search orchestrator for
8
+ Node.js and Bun. Routes patterns to the optimal
9
+ engine automatically: Aho-Corasick for literals,
10
+ RegexSet for regex, FuzzySearch for approximate
11
+ matching, with auto-optimization for large
12
+ alternations.
13
+
14
+ Part of the
15
+ [@stll text search ecosystem](https://github.com/stella):
16
+ [@stll/regex-set](https://github.com/stella/regex-set),
17
+ [@stll/aho-corasick](https://github.com/stella/aho-corasick),
18
+ [@stll/fuzzy-search](https://github.com/stella/fuzzy-search).
19
+
20
+ ## Install
21
+
22
+ ```bash
23
+ npm install @stll/text-search
24
+ # or
25
+ bun add @stll/text-search
26
+ ```
27
+
28
+ Requires `@stll/regex-set`, `@stll/aho-corasick`,
29
+ and `@stll/fuzzy-search` as peer dependencies
30
+ (installed automatically).
31
+
32
+ ## Usage
33
+
34
+ ```typescript
35
+ import { TextSearch } from "@stll/text-search";
36
+
37
+ const ts = new TextSearch([
38
+ // Regex patterns → RegexSet (DFA)
39
+ /\b\d{2}\.\d{2}\.\d{4}\b/,
40
+ /\b[\w.+-]+@[\w-]+\.[\w]+\b/,
41
+
42
+ // Pure literals → Aho-Corasick (SIMD)
43
+ "Confidential",
44
+ "Attorney-Client Privilege",
45
+
46
+ // Fuzzy patterns → FuzzySearch (Levenshtein)
47
+ { pattern: "Novák", distance: 1, name: "person" },
48
+
49
+ // Large alternation → auto-isolated RegexSet
50
+ `(?:${titles.join("|")})\\s+[A-Z][a-z]+`,
51
+
52
+ // Named patterns
53
+ { pattern: /\+?\d{9,12}/, name: "phone" },
54
+ ]);
55
+
56
+ ts.findIter("Ing. Jan Novak, born 15.03.1990");
57
+ // [
58
+ // { pattern: 5, text: "Ing. Jan Novak", ... },
59
+ // { pattern: 4, text: "Novak", distance: 1, ... },
60
+ // { pattern: 0, text: "15.03.1990", ... },
61
+ // ]
62
+ ```
63
+
64
+ ## Engine routing
65
+
66
+ Patterns are classified and routed to the optimal
67
+ engine at construction time:
68
+
69
+ | Engine | Condition | Performance |
70
+ | --- | --- | --- |
71
+ | Aho-Corasick | Pure literal strings | SIMD-accelerated |
72
+ | RegexSet (shared) | Normal regex patterns | Single-pass DFA |
73
+ | RegexSet (isolated) | >50 alternation branches | Prevents DFA explosion |
74
+ | FuzzySearch | `distance` field present | Levenshtein/Damerau |
75
+
76
+ Large alternation patterns (e.g., 80+ title
77
+ prefixes) are automatically isolated into their
78
+ own RegexSet instance, preventing DFA state
79
+ explosion when combined with other patterns.
80
+
81
+ ```typescript
82
+ // Without text-search: 73ms (DFA state explosion)
83
+ new RegexSet([hugePattern, simplePattern]);
84
+
85
+ // With text-search: 0.4ms (auto-split)
86
+ new TextSearch([hugePattern, simplePattern]);
87
+ ```
88
+
89
+ ## Options
90
+
91
+ ```typescript
92
+ new TextSearch(patterns, {
93
+ // Unicode word boundaries (default: true)
94
+ unicodeBoundaries: true,
95
+
96
+ // Only match whole words (default: false)
97
+ wholeWords: false,
98
+
99
+ // Max alternation branches before auto-split
100
+ // (default: 50)
101
+ maxAlternations: 50,
102
+
103
+ // Fuzzy matching options
104
+ fuzzyMetric: "levenshtein", // or "damerau-levenshtein"
105
+ normalizeDiacritics: false,
106
+ caseInsensitive: false,
107
+ });
108
+ ```
109
+
110
+ ## API
111
+
112
+ | Method | Returns | Description |
113
+ | --- | --- | --- |
114
+ | `findIter(text)` | `Match[]` | All non-overlapping matches |
115
+ | `isMatch(text)` | `boolean` | Any pattern matches? |
116
+ | `whichMatch(text)` | `number[]` | Which pattern indices matched |
117
+ | `replaceAll(text, replacements)` | `string` | Replace matches |
118
+ | `length` | `number` | Number of patterns |
119
+
120
+ ## Pattern entry types
121
+
122
+ ```typescript
123
+ // Simple string (literal → AC, regex → RegexSet)
124
+ "foo"
125
+
126
+ // RegExp object → RegexSet
127
+ /\btest\b/i
128
+
129
+ // Named pattern
130
+ { pattern: "\\d+", name: "number" }
131
+
132
+ // Fuzzy pattern → FuzzySearch
133
+ { pattern: "Novák", distance: 1 }
134
+ { pattern: "Smith", distance: "auto", name: "person" }
135
+ ```
136
+
137
+ ## Match type
138
+
139
+ ```typescript
140
+ type Match = {
141
+ pattern: number; // original pattern index
142
+ start: number; // UTF-16 offset
143
+ end: number; // exclusive
144
+ text: string; // matched substring
145
+ name?: string; // pattern name (if provided)
146
+ };
147
+ ```
148
+
149
+ Same `Match` shape as `@stll/regex-set`,
150
+ `@stll/aho-corasick`, and `@stll/fuzzy-search`.
151
+
152
+ ## How it works
153
+
154
+ 1. **Classify**: detect literals, count alternation
155
+ branches, identify fuzzy patterns
156
+ 2. **Route**: literals → AC, fuzzy → FuzzySearch,
157
+ large alternations → isolated RegexSet,
158
+ normal regex → shared RegexSet
159
+ 3. **Search**: each engine scans the text
160
+ 4. **Merge**: combine results, sort by position,
161
+ select non-overlapping (longest match at ties)
162
+
163
+ ## Development
164
+
165
+ ```bash
166
+ bun install
167
+ bun test
168
+ bun run lint
169
+ bun run format
170
+ bun run build
171
+ ```
172
+
173
+ ## Built on
174
+ - [@stll/regex-set](https://github.com/stella/regex-set) —
175
+ NAPI-RS bindings to Rust regex-automata
176
+ - [@stll/aho-corasick](https://github.com/stella/aho-corasick) —
177
+ NAPI-RS bindings to Rust aho-corasick
178
+ - [@stll/fuzzy-search](https://github.com/stella/fuzzy-search) —
179
+ NAPI-RS Levenshtein/Damerau-Levenshtein matcher
180
+
181
+ ## License
182
+
183
+ [MIT](./LICENSE)
package/package.json ADDED
@@ -0,0 +1,51 @@
1
+ {
2
+ "name": "@stll/text-search",
3
+ "version": "0.1.0",
4
+ "description": "Multi-engine text search orchestrator. Routes patterns to optimal engines: Aho-Corasick, RegexSet, or FuzzySearch.",
5
+ "keywords": [
6
+ "text-search",
7
+ "multi-pattern",
8
+ "aho-corasick",
9
+ "regex",
10
+ "fuzzy",
11
+ "orchestrator"
12
+ ],
13
+ "homepage": "https://github.com/stella/text-search#readme",
14
+ "bugs": {
15
+ "url": "https://github.com/stella/text-search/issues"
16
+ },
17
+ "license": "MIT",
18
+ "repository": {
19
+ "type": "git",
20
+ "url": "https://github.com/stella/text-search"
21
+ },
22
+ "type": "module",
23
+ "main": "src/index.ts",
24
+ "module": "src/index.ts",
25
+ "exports": {
26
+ ".": "./src/index.ts"
27
+ },
28
+ "files": [
29
+ "dist"
30
+ ],
31
+ "scripts": {
32
+ "build": "bun build src/index.ts --outdir dist --target node",
33
+ "test": "bun test",
34
+ "lint": "oxlint .",
35
+ "format": "oxfmt ."
36
+ },
37
+ "dependencies": {
38
+ "@stll/aho-corasick": "^0.2.0",
39
+ "@stll/fuzzy-search": "^0.1.0",
40
+ "@stll/regex-set": "^0.4.0"
41
+ },
42
+ "devDependencies": {
43
+ "@types/node": "^22.0.0",
44
+ "bun-types": "^1.3.10",
45
+ "oxfmt": "^0.40.0",
46
+ "oxlint": "^1.55.0"
47
+ },
48
+ "engines": {
49
+ "node": ">= 18"
50
+ }
51
+ }
package/src/index.ts ADDED
@@ -0,0 +1,6 @@
1
+ export { TextSearch } from "./text-search";
2
+ export type {
3
+ Match,
4
+ PatternEntry,
5
+ TextSearchOptions,
6
+ } from "./types";