seqex 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +305 -0
- package/dist/ast.d.ts +24 -0
- package/dist/engine.d.ts +14 -0
- package/dist/index.cjs +517 -0
- package/dist/index.cjs.map +15 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +485 -0
- package/dist/index.js.map +15 -0
- package/dist/matcher.d.ts +13 -0
- package/dist/nfa.d.ts +15 -0
- package/dist/pattern.d.ts +24 -0
- package/dist/scanner.d.ts +19 -0
- package/package.json +54 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Dawson Booth
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
# seqex
|
|
2
|
+
|
|
3
|
+
[](https://github.com/dawsonbooth/seqex/actions/workflows/ci.yml)
|
|
4
|
+
[](https://www.npmjs.com/package/seqex)
|
|
5
|
+
[](https://codecov.io/gh/dawsonbooth/seqex)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
Regex-like pattern matching for arbitrary sequences. Instead of matching characters against character classes, match elements of any type against predicate functions.
|
|
9
|
+
|
|
10
|
+
Built on an NFA engine (Thompson's construction) — O(n \* m) guaranteed, no exponential backtracking.
|
|
11
|
+
|
|
12
|
+
## Install
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
bun add seqex
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Quick start
|
|
19
|
+
|
|
20
|
+
```typescript
|
|
21
|
+
import { Pattern } from 'seqex'
|
|
22
|
+
|
|
23
|
+
const isEven = (n: number) => n % 2 === 0
|
|
24
|
+
const isOdd = (n: number) => n % 2 !== 0
|
|
25
|
+
|
|
26
|
+
// Build a pattern and compile it
|
|
27
|
+
const matcher = Pattern.where<number>(isEven).followedBy(isOdd).followedBy(isEven).compile()
|
|
28
|
+
|
|
29
|
+
// Find all non-overlapping matches
|
|
30
|
+
matcher.findAll([2, 3, 4, 6, 7, 8, 9, 10])
|
|
31
|
+
// → [{ start: 0, end: 2, data: [2, 3, 4] },
|
|
32
|
+
// { start: 3, end: 5, data: [6, 7, 8] }]
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## API
|
|
36
|
+
|
|
37
|
+
### Building patterns
|
|
38
|
+
|
|
39
|
+
Start a pattern with `Pattern.where()` or `Pattern.any()`, then chain methods to describe the shape you're looking for.
|
|
40
|
+
|
|
41
|
+
#### `Pattern.where<T>(fn)` — start with a predicate
|
|
42
|
+
|
|
43
|
+
```typescript
|
|
44
|
+
Pattern.where<number>(n => n > 0)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
#### `Pattern.any<T>()` — start with a wildcard (matches any element)
|
|
48
|
+
|
|
49
|
+
```typescript
|
|
50
|
+
Pattern.any<string>()
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
#### `.followedBy(fn | pattern)` — append a predicate or sub-pattern
|
|
54
|
+
|
|
55
|
+
```typescript
|
|
56
|
+
Pattern.where<number>(isEven).followedBy(isOdd).followedBy(isEven)
|
|
57
|
+
|
|
58
|
+
// Sub-patterns work too
|
|
59
|
+
const prefix = Pattern.where<number>(isEven).followedBy(isOdd)
|
|
60
|
+
Pattern.where<number>(isPositive).followedBy(prefix)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
#### `.followedByAny()` — append a wildcard
|
|
64
|
+
|
|
65
|
+
```typescript
|
|
66
|
+
Pattern.where<number>(isEven).followedByAny().followedBy(isOdd)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Quantifiers
|
|
70
|
+
|
|
71
|
+
Quantifiers modify the **last element** in the pattern. All accept an optional `greedy` parameter (default `true`).
|
|
72
|
+
|
|
73
|
+
| Method | Regex equivalent | Description |
|
|
74
|
+
| -------------------- | ---------------- | ------------------- |
|
|
75
|
+
| `.oneOrMore()` | `+` | One or more |
|
|
76
|
+
| `.zeroOrMore()` | `*` | Zero or more |
|
|
77
|
+
| `.optional()` | `?` | Zero or one |
|
|
78
|
+
| `.times(n)` | `{n}` | Exactly n |
|
|
79
|
+
| `.between(min, max)` | `{min,max}` | Between min and max |
|
|
80
|
+
|
|
81
|
+
```typescript
|
|
82
|
+
// One or more even numbers followed by an odd
|
|
83
|
+
Pattern.where<number>(isEven).oneOrMore().followedBy(isOdd)
|
|
84
|
+
|
|
85
|
+
// Exactly 3 positive numbers
|
|
86
|
+
Pattern.where<number>(n => n > 0).times(3)
|
|
87
|
+
|
|
88
|
+
// Between 2 and 5 elements
|
|
89
|
+
Pattern.where<number>(isEven).between(2, 5)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
#### Greedy vs lazy
|
|
93
|
+
|
|
94
|
+
By default, quantifiers are greedy (match as many elements as possible). Pass `false` for lazy matching (match as few as possible).
|
|
95
|
+
|
|
96
|
+
```typescript
|
|
97
|
+
// Greedy: consumes as many positives as possible
|
|
98
|
+
Pattern.where<number>(isPositive).oneOrMore(true).followedBy(isPositive)
|
|
99
|
+
// On [1, 2, 3] → matches [1, 2, 3]
|
|
100
|
+
|
|
101
|
+
// Lazy: consumes as few positives as possible
|
|
102
|
+
Pattern.where<number>(isPositive).oneOrMore(false).followedBy(isPositive)
|
|
103
|
+
// On [1, 2, 3] → matches [1, 2]
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Alternation
|
|
107
|
+
|
|
108
|
+
#### `.or(fn | pattern)` — match this pattern or another
|
|
109
|
+
|
|
110
|
+
```typescript
|
|
111
|
+
// Match a positive or negative number
|
|
112
|
+
Pattern.where<number>(n => n > 0).or(n => n < 0)
|
|
113
|
+
|
|
114
|
+
// Alternation with complex sub-patterns
|
|
115
|
+
Pattern.where<number>(isEven).followedBy(isOdd).or(Pattern.where<number>(isOdd).followedBy(isEven))
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
#### `Pattern.oneOf<T>(...alternatives)` — multi-way alternation
|
|
119
|
+
|
|
120
|
+
Cleaner syntax for 3+ branches. Accepts any mix of predicates and patterns.
|
|
121
|
+
|
|
122
|
+
```typescript
|
|
123
|
+
Pattern.oneOf<number>(isEven, isOdd, isZero)
|
|
124
|
+
|
|
125
|
+
// With sub-patterns
|
|
126
|
+
Pattern.oneOf<number>(
|
|
127
|
+
Pattern.where<number>(isEven).followedBy(isOdd),
|
|
128
|
+
Pattern.where<number>(isOdd).followedBy(isEven),
|
|
129
|
+
Pattern.where<number>(isZero).followedBy(isZero),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
// Composable — chain quantifiers, followedBy, etc.
|
|
133
|
+
Pattern.oneOf<number>(isEven, isOdd).oneOrMore().followedBy(isZero)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Anchors
|
|
137
|
+
|
|
138
|
+
#### `.atStart()` — anchor to the beginning of the sequence
|
|
139
|
+
|
|
140
|
+
```typescript
|
|
141
|
+
Pattern.where<number>(isEven)
|
|
142
|
+
.atStart()
|
|
143
|
+
.compile()
|
|
144
|
+
.findAll([2, 3, 4]) // → [{ start: 0, end: 0, data: [2] }]
|
|
145
|
+
.findAll([1, 2, 4]) // → []
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
#### `.atEnd()` — anchor to the end of the sequence
|
|
149
|
+
|
|
150
|
+
```typescript
|
|
151
|
+
Pattern.where<number>(isEven).atEnd().compile().findAll([1, 3, 4]) // → [{ start: 2, end: 2, data: [4] }]
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Compiling and matching
|
|
155
|
+
|
|
156
|
+
#### `.compile()` — compile the pattern into a Matcher
|
|
157
|
+
|
|
158
|
+
Patterns are immutable descriptions. Call `.compile()` to get a `Matcher` that can be used repeatedly against different sequences.
|
|
159
|
+
|
|
160
|
+
```typescript
|
|
161
|
+
const matcher = Pattern.where<number>(isEven).compile()
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
#### `matcher.findAll(sequence)` — find all non-overlapping matches
|
|
165
|
+
|
|
166
|
+
Returns an array of `MatchResult<T>` objects with `start`, `end`, and `data` properties. Accepts any `Iterable<T>` — arrays, generators, Sets, Maps, or custom iterables.
|
|
167
|
+
|
|
168
|
+
```typescript
|
|
169
|
+
matcher.findAll([1, 2, 3, 4, 5, 6])
|
|
170
|
+
// → [{ start: 1, end: 1, data: [2] },
|
|
171
|
+
// { start: 3, end: 3, data: [4] },
|
|
172
|
+
// { start: 5, end: 5, data: [6] }]
|
|
173
|
+
|
|
174
|
+
// Works with any iterable
|
|
175
|
+
function* naturals(n: number) {
|
|
176
|
+
for (let i = 1; i <= n; i++) yield i
|
|
177
|
+
}
|
|
178
|
+
matcher.findAll(naturals(6))
|
|
179
|
+
// → same result
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
#### `matcher.find(sequence)` — find the first match
|
|
183
|
+
|
|
184
|
+
Returns a single `MatchResult<T>` or `null`. For iterables, stops consuming elements as soon as a match is found.
|
|
185
|
+
|
|
186
|
+
```typescript
|
|
187
|
+
matcher.find([1, 2, 3, 4]) // → { start: 1, end: 1, data: [2] }
|
|
188
|
+
matcher.find([1, 3, 5]) // → null
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
#### `matcher.test(sequence)` — check if any match exists
|
|
192
|
+
|
|
193
|
+
Returns a boolean.
|
|
194
|
+
|
|
195
|
+
```typescript
|
|
196
|
+
matcher.test([1, 2, 3]) // → true
|
|
197
|
+
matcher.test([1, 3, 5]) // → false
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Streaming
|
|
201
|
+
|
|
202
|
+
For data that arrives incrementally (event streams, network packets, sensor readings), use the push-based scanner API.
|
|
203
|
+
|
|
204
|
+
#### `matcher.scanner()` — create a streaming scanner
|
|
205
|
+
|
|
206
|
+
```typescript
|
|
207
|
+
const scanner = matcher.scanner()
|
|
208
|
+
|
|
209
|
+
for await (const event of eventSource) {
|
|
210
|
+
for (const match of scanner.push(event)) {
|
|
211
|
+
handleMatch(match) // matches emitted as soon as they become definitive
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Signal end-of-stream to flush pending matches (greedy, atEnd anchors)
|
|
216
|
+
for (const match of scanner.end()) {
|
|
217
|
+
handleMatch(match)
|
|
218
|
+
}
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
`push(element)` advances the NFA simulation by one element and returns any matches that have become definitive. `end()` signals that no more elements will arrive, resolving pending greedy matches and `atEnd` anchors.
|
|
222
|
+
|
|
223
|
+
For greedy patterns, matches are held until the greedy quantifier can no longer extend (i.e., the simulation dies). For lazy patterns, matches emit from `push()` as early as possible.
|
|
224
|
+
|
|
225
|
+
## Works with any type
|
|
226
|
+
|
|
227
|
+
The library is generic over `<T>` — match numbers, strings, objects, or anything else.
|
|
228
|
+
|
|
229
|
+
```typescript
|
|
230
|
+
// Strings
|
|
231
|
+
const matcher = Pattern.where<string>(s => s.startsWith('a'))
|
|
232
|
+
.followedBy(s => s.length > 3)
|
|
233
|
+
.compile()
|
|
234
|
+
|
|
235
|
+
matcher.findAll(['apple', 'banana', 'ant', 'elephant'])
|
|
236
|
+
// → [{ start: 0, end: 1, data: ['apple', 'banana'] },
|
|
237
|
+
// { start: 2, end: 3, data: ['ant', 'elephant'] }]
|
|
238
|
+
|
|
239
|
+
// Objects
|
|
240
|
+
interface Event {
|
|
241
|
+
type: string
|
|
242
|
+
level: number
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
const matcher = Pattern.where<Event>(e => e.type === 'error')
|
|
246
|
+
.oneOrMore()
|
|
247
|
+
.followedBy(e => e.type === 'recovery')
|
|
248
|
+
.compile()
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## Advanced patterns
|
|
252
|
+
|
|
253
|
+
Each predicate sees a single element in isolation. For patterns that depend on relationships between elements, there are two approaches.
|
|
254
|
+
|
|
255
|
+
### Pre-processing
|
|
256
|
+
|
|
257
|
+
Transform the sequence so each element carries the context it needs. This is pure and composable.
|
|
258
|
+
|
|
259
|
+
```typescript
|
|
260
|
+
// Detect runs of 3+ strictly increasing numbers
|
|
261
|
+
const nums = [1, 3, 5, 2, 4, 8, 12, 7]
|
|
262
|
+
const pairs = nums.slice(0, -1).map((n, i) => ({ value: n, next: nums[i + 1] }))
|
|
263
|
+
|
|
264
|
+
const m = Pattern.where<(typeof pairs)[number]>(p => p.next > p.value)
|
|
265
|
+
.between(2, Infinity)
|
|
266
|
+
.compile()
|
|
267
|
+
|
|
268
|
+
m.findAll(pairs) // finds [1,3,5] and [2,4,8,12]
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
### Closure variables
|
|
272
|
+
|
|
273
|
+
When a later predicate needs to reference what an earlier one matched, use a shared variable. The NFA evaluates predicates left-to-right during simulation, so the ordering is reliable.
|
|
274
|
+
|
|
275
|
+
```typescript
|
|
276
|
+
// Match open tag, content, then the matching close tag
|
|
277
|
+
let openName = ''
|
|
278
|
+
|
|
279
|
+
const m = Pattern.where<Tag>(t => {
|
|
280
|
+
if (t.kind === 'open') {
|
|
281
|
+
openName = t.name
|
|
282
|
+
return true
|
|
283
|
+
}
|
|
284
|
+
return false
|
|
285
|
+
})
|
|
286
|
+
.followedBy(Pattern.where<Tag>(t => t.kind === 'text').oneOrMore())
|
|
287
|
+
.followedBy(t => t.kind === 'close' && t.name === openName)
|
|
288
|
+
.compile()
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
## Tests
|
|
292
|
+
|
|
293
|
+
```bash
|
|
294
|
+
bun test
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
## How it works
|
|
298
|
+
|
|
299
|
+
1. The fluent `Pattern` builder constructs an AST (abstract syntax tree) of pattern nodes
|
|
300
|
+
2. `.compile()` converts the AST into an NFA (nondeterministic finite automaton) using Thompson's construction
|
|
301
|
+
3. The matching engine simulates the NFA using the standard Thompson algorithm — tracking all active states simultaneously
|
|
302
|
+
4. Each element is tested against predicate transitions on active states to advance the simulation
|
|
303
|
+
5. This gives O(n \* m) time complexity where n is the sequence length and m is the pattern size — no pathological backtracking cases
|
|
304
|
+
|
|
305
|
+
For arrays, the engine runs the full simulation in a tight loop. For other iterables and the streaming scanner, the same NFA simulation is broken into per-element steps with buffer-and-replay for `findAll` semantics.
|
package/dist/ast.d.ts
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export type Predicate<T> = (element: T) => boolean;
|
|
2
|
+
export declare function isGreedy<T>(node: PatternNode<T>): boolean;
|
|
3
|
+
export type PatternNode<T> = {
|
|
4
|
+
type: 'predicate';
|
|
5
|
+
fn: Predicate<T>;
|
|
6
|
+
} | {
|
|
7
|
+
type: 'sequence';
|
|
8
|
+
children: PatternNode<T>[];
|
|
9
|
+
} | {
|
|
10
|
+
type: 'quantifier';
|
|
11
|
+
child: PatternNode<T>;
|
|
12
|
+
min: number;
|
|
13
|
+
max: number;
|
|
14
|
+
greedy: boolean;
|
|
15
|
+
} | {
|
|
16
|
+
type: 'alternation';
|
|
17
|
+
left: PatternNode<T>;
|
|
18
|
+
right: PatternNode<T>;
|
|
19
|
+
} | {
|
|
20
|
+
type: 'anchor';
|
|
21
|
+
position: 'start' | 'end';
|
|
22
|
+
} | {
|
|
23
|
+
type: 'any';
|
|
24
|
+
};
|
package/dist/engine.d.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { NFA, NFAState } from './nfa';
|
|
2
|
+
export interface MatchResult<T> {
|
|
3
|
+
start: number;
|
|
4
|
+
end: number;
|
|
5
|
+
data: T[];
|
|
6
|
+
}
|
|
7
|
+
export declare function epsilonClosure<T>(states: Set<NFAState<T>>, position: number, seqLength: number): Set<NFAState<T>>;
|
|
8
|
+
export declare function simulate<T>(nfa: NFA<T>, sequence: T[], offset: number, greedy?: boolean): {
|
|
9
|
+
matched: boolean;
|
|
10
|
+
length: number;
|
|
11
|
+
};
|
|
12
|
+
export declare function findAll<T>(nfa: NFA<T>, sequence: T[], greedy?: boolean): MatchResult<T>[];
|
|
13
|
+
export declare function findFirst<T>(nfa: NFA<T>, sequence: T[], greedy?: boolean): MatchResult<T> | null;
|
|
14
|
+
export declare function test<T>(nfa: NFA<T>, sequence: T[], greedy?: boolean): boolean;
|