@shd101wyy/yo 0.0.28 → 0.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/out/cjs/index.cjs +57 -57
- package/out/cjs/yo-cli.cjs +72 -72
- package/out/esm/index.mjs +62 -62
- package/out/types/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
- package/std/regex/compiler.yo +355 -0
- package/std/regex/flags.yo +104 -0
- package/std/regex/match.yo +83 -0
- package/std/regex/node.yo +283 -0
- package/std/regex/parser.yo +847 -0
- package/std/regex/regex.yo +714 -0
- package/std/regex/unicode.yo +365 -0
- package/std/regex/vm.yo +737 -0
- package/std/time/sleep.yo +18 -0
- package/std/time.yo +0 -13
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
// std/regex/node.yo - Regex AST node types
|
|
2
|
+
//
|
|
3
|
+
// The parser produces a tree of RegexNode objects representing the
|
|
4
|
+
// structure of a regex pattern. Nodes are reference-counted objects
|
|
5
|
+
// since they form a recursive tree.
|
|
6
|
+
|
|
7
|
+
open import "std/collections/array_list";
|
|
8
|
+
open import "std/string";
|
|
9
|
+
|
|
10
|
+
// A range of characters for character classes, e.g. 'a'-'z'
|
|
11
|
+
CharRange :: struct(
|
|
12
|
+
low : u32,
|
|
13
|
+
high : u32
|
|
14
|
+
);
|
|
15
|
+
|
|
16
|
+
// Anchor types for ^ $ \b \B
|
|
17
|
+
AnchorKind :: enum(
|
|
18
|
+
Start,
|
|
19
|
+
End,
|
|
20
|
+
WordBoundary,
|
|
21
|
+
NonWordBoundary
|
|
22
|
+
);
|
|
23
|
+
|
|
24
|
+
// Node kind tag
|
|
25
|
+
NodeKind :: enum(
|
|
26
|
+
Literal,
|
|
27
|
+
Dot,
|
|
28
|
+
CharClass,
|
|
29
|
+
Anchor,
|
|
30
|
+
Sequence,
|
|
31
|
+
Alternation,
|
|
32
|
+
Quantifier,
|
|
33
|
+
Group,
|
|
34
|
+
NonCapturingGroup,
|
|
35
|
+
Backreference,
|
|
36
|
+
Lookahead,
|
|
37
|
+
Lookbehind
|
|
38
|
+
);
|
|
39
|
+
|
|
40
|
+
// Entry mapping a named group to its index
|
|
41
|
+
GroupNameEntry :: struct(
|
|
42
|
+
name : String,
|
|
43
|
+
index : usize
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
// The regex AST node type.
|
|
47
|
+
// Uses `object(...)` for reference counting since nodes are recursive.
|
|
48
|
+
// Self-reference via `Self` (like LinkedList's Node).
|
|
49
|
+
RegexNode :: object(
|
|
50
|
+
kind : NodeKind,
|
|
51
|
+
children : ArrayList(Self),
|
|
52
|
+
// For Literal nodes: the codepoint
|
|
53
|
+
codepoint : u32,
|
|
54
|
+
// For CharClass nodes: ranges and negation flag
|
|
55
|
+
ranges : ArrayList(CharRange),
|
|
56
|
+
negated : bool,
|
|
57
|
+
// For Quantifier nodes: min, max, greedy
|
|
58
|
+
q_min : usize,
|
|
59
|
+
q_max : usize,
|
|
60
|
+
q_greedy : bool,
|
|
61
|
+
// For Group/Save nodes: group index
|
|
62
|
+
group_index : usize,
|
|
63
|
+
// For Anchor nodes
|
|
64
|
+
anchor : AnchorKind
|
|
65
|
+
);
|
|
66
|
+
|
|
67
|
+
impl(RegexNode,
|
|
68
|
+
// Create a literal node matching a single codepoint
|
|
69
|
+
literal : (fn(cp : u32) -> Self)(
|
|
70
|
+
Self(
|
|
71
|
+
kind: .Literal,
|
|
72
|
+
children: ArrayList(RegexNode).new(),
|
|
73
|
+
codepoint: cp,
|
|
74
|
+
ranges: ArrayList(CharRange).new(),
|
|
75
|
+
negated: false,
|
|
76
|
+
q_min: usize(0),
|
|
77
|
+
q_max: usize(0),
|
|
78
|
+
q_greedy: true,
|
|
79
|
+
group_index: usize(0),
|
|
80
|
+
anchor: .Start
|
|
81
|
+
)
|
|
82
|
+
),
|
|
83
|
+
|
|
84
|
+
// Create a dot (any char) node
|
|
85
|
+
dot : (fn() -> Self)(
|
|
86
|
+
Self(
|
|
87
|
+
kind: .Dot,
|
|
88
|
+
children: ArrayList(RegexNode).new(),
|
|
89
|
+
codepoint: u32(0),
|
|
90
|
+
ranges: ArrayList(CharRange).new(),
|
|
91
|
+
negated: false,
|
|
92
|
+
q_min: usize(0),
|
|
93
|
+
q_max: usize(0),
|
|
94
|
+
q_greedy: true,
|
|
95
|
+
group_index: usize(0),
|
|
96
|
+
anchor: .Start
|
|
97
|
+
)
|
|
98
|
+
),
|
|
99
|
+
|
|
100
|
+
// Create a character class node
|
|
101
|
+
char_class : (fn(ranges : ArrayList(CharRange), negated : bool) -> Self)(
|
|
102
|
+
Self(
|
|
103
|
+
kind: .CharClass,
|
|
104
|
+
children: ArrayList(RegexNode).new(),
|
|
105
|
+
codepoint: u32(0),
|
|
106
|
+
ranges: ranges,
|
|
107
|
+
negated: negated,
|
|
108
|
+
q_min: usize(0),
|
|
109
|
+
q_max: usize(0),
|
|
110
|
+
q_greedy: true,
|
|
111
|
+
group_index: usize(0),
|
|
112
|
+
anchor: .Start
|
|
113
|
+
)
|
|
114
|
+
),
|
|
115
|
+
|
|
116
|
+
// Create an anchor node (^, $, \b, \B)
|
|
117
|
+
anchor_node : (fn(kind : AnchorKind) -> Self)(
|
|
118
|
+
Self(
|
|
119
|
+
kind: .Anchor,
|
|
120
|
+
children: ArrayList(RegexNode).new(),
|
|
121
|
+
codepoint: u32(0),
|
|
122
|
+
ranges: ArrayList(CharRange).new(),
|
|
123
|
+
negated: false,
|
|
124
|
+
q_min: usize(0),
|
|
125
|
+
q_max: usize(0),
|
|
126
|
+
q_greedy: true,
|
|
127
|
+
group_index: usize(0),
|
|
128
|
+
anchor: kind
|
|
129
|
+
)
|
|
130
|
+
),
|
|
131
|
+
|
|
132
|
+
// Create a sequence node (concatenation of children)
|
|
133
|
+
sequence : (fn(nodes : ArrayList(RegexNode)) -> Self)(
|
|
134
|
+
Self(
|
|
135
|
+
kind: .Sequence,
|
|
136
|
+
children: nodes,
|
|
137
|
+
codepoint: u32(0),
|
|
138
|
+
ranges: ArrayList(CharRange).new(),
|
|
139
|
+
negated: false,
|
|
140
|
+
q_min: usize(0),
|
|
141
|
+
q_max: usize(0),
|
|
142
|
+
q_greedy: true,
|
|
143
|
+
group_index: usize(0),
|
|
144
|
+
anchor: .Start
|
|
145
|
+
)
|
|
146
|
+
),
|
|
147
|
+
|
|
148
|
+
// Create an alternation node (left | right)
|
|
149
|
+
alternation : (fn(left : RegexNode, right : RegexNode) -> Self)({
|
|
150
|
+
kids := ArrayList(RegexNode).new();
|
|
151
|
+
kids.push(left);
|
|
152
|
+
kids.push(right);
|
|
153
|
+
Self(
|
|
154
|
+
kind: .Alternation,
|
|
155
|
+
children: kids,
|
|
156
|
+
codepoint: u32(0),
|
|
157
|
+
ranges: ArrayList(CharRange).new(),
|
|
158
|
+
negated: false,
|
|
159
|
+
q_min: usize(0),
|
|
160
|
+
q_max: usize(0),
|
|
161
|
+
q_greedy: true,
|
|
162
|
+
group_index: usize(0),
|
|
163
|
+
anchor: .Start
|
|
164
|
+
)
|
|
165
|
+
}),
|
|
166
|
+
|
|
167
|
+
// Create a quantifier node (wrapping a child)
|
|
168
|
+
// max_val of 0 means unbounded (infinity)
|
|
169
|
+
quantifier : (fn(child : RegexNode, min_val : usize, max_val : usize, greedy : bool) -> Self)({
|
|
170
|
+
kids := ArrayList(RegexNode).new();
|
|
171
|
+
kids.push(child);
|
|
172
|
+
Self(
|
|
173
|
+
kind: .Quantifier,
|
|
174
|
+
children: kids,
|
|
175
|
+
codepoint: u32(0),
|
|
176
|
+
ranges: ArrayList(CharRange).new(),
|
|
177
|
+
negated: false,
|
|
178
|
+
q_min: min_val,
|
|
179
|
+
q_max: max_val,
|
|
180
|
+
q_greedy: greedy,
|
|
181
|
+
group_index: usize(0),
|
|
182
|
+
anchor: .Start
|
|
183
|
+
)
|
|
184
|
+
}),
|
|
185
|
+
|
|
186
|
+
// Create a capturing group node
|
|
187
|
+
group : (fn(child : RegexNode, index : usize) -> Self)({
|
|
188
|
+
kids := ArrayList(RegexNode).new();
|
|
189
|
+
kids.push(child);
|
|
190
|
+
Self(
|
|
191
|
+
kind: .Group,
|
|
192
|
+
children: kids,
|
|
193
|
+
codepoint: u32(0),
|
|
194
|
+
ranges: ArrayList(CharRange).new(),
|
|
195
|
+
negated: false,
|
|
196
|
+
q_min: usize(0),
|
|
197
|
+
q_max: usize(0),
|
|
198
|
+
q_greedy: true,
|
|
199
|
+
group_index: index,
|
|
200
|
+
anchor: .Start
|
|
201
|
+
)
|
|
202
|
+
}),
|
|
203
|
+
|
|
204
|
+
// Create a non-capturing group node
|
|
205
|
+
non_capturing_group : (fn(child : RegexNode) -> Self)({
|
|
206
|
+
kids := ArrayList(RegexNode).new();
|
|
207
|
+
kids.push(child);
|
|
208
|
+
Self(
|
|
209
|
+
kind: .NonCapturingGroup,
|
|
210
|
+
children: kids,
|
|
211
|
+
codepoint: u32(0),
|
|
212
|
+
ranges: ArrayList(CharRange).new(),
|
|
213
|
+
negated: false,
|
|
214
|
+
q_min: usize(0),
|
|
215
|
+
q_max: usize(0),
|
|
216
|
+
q_greedy: true,
|
|
217
|
+
group_index: usize(0),
|
|
218
|
+
anchor: .Start
|
|
219
|
+
)
|
|
220
|
+
}),
|
|
221
|
+
|
|
222
|
+
// Create a backreference node (\1, \k<name>)
|
|
223
|
+
backreference : (fn(group_idx : usize) -> Self)(
|
|
224
|
+
Self(
|
|
225
|
+
kind: .Backreference,
|
|
226
|
+
children: ArrayList(RegexNode).new(),
|
|
227
|
+
codepoint: u32(0),
|
|
228
|
+
ranges: ArrayList(CharRange).new(),
|
|
229
|
+
negated: false,
|
|
230
|
+
q_min: usize(0),
|
|
231
|
+
q_max: usize(0),
|
|
232
|
+
q_greedy: true,
|
|
233
|
+
group_index: group_idx,
|
|
234
|
+
anchor: .Start
|
|
235
|
+
)
|
|
236
|
+
),
|
|
237
|
+
|
|
238
|
+
// Create a lookahead node (?=...) or (?!...)
|
|
239
|
+
// negated=false means positive, negated=true means negative
|
|
240
|
+
lookahead : (fn(child : RegexNode, positive : bool) -> Self)({
|
|
241
|
+
kids := ArrayList(RegexNode).new();
|
|
242
|
+
kids.push(child);
|
|
243
|
+
Self(
|
|
244
|
+
kind: .Lookahead,
|
|
245
|
+
children: kids,
|
|
246
|
+
codepoint: u32(0),
|
|
247
|
+
ranges: ArrayList(CharRange).new(),
|
|
248
|
+
negated: (!(positive)),
|
|
249
|
+
q_min: usize(0),
|
|
250
|
+
q_max: usize(0),
|
|
251
|
+
q_greedy: true,
|
|
252
|
+
group_index: usize(0),
|
|
253
|
+
anchor: .Start
|
|
254
|
+
)
|
|
255
|
+
}),
|
|
256
|
+
|
|
257
|
+
// Create a lookbehind node (?<=...) or (?<!...)
|
|
258
|
+
// negated=false means positive, negated=true means negative
|
|
259
|
+
lookbehind : (fn(child : RegexNode, positive : bool) -> Self)({
|
|
260
|
+
kids := ArrayList(RegexNode).new();
|
|
261
|
+
kids.push(child);
|
|
262
|
+
Self(
|
|
263
|
+
kind: .Lookbehind,
|
|
264
|
+
children: kids,
|
|
265
|
+
codepoint: u32(0),
|
|
266
|
+
ranges: ArrayList(CharRange).new(),
|
|
267
|
+
negated: (!(positive)),
|
|
268
|
+
q_min: usize(0),
|
|
269
|
+
q_max: usize(0),
|
|
270
|
+
q_greedy: true,
|
|
271
|
+
group_index: usize(0),
|
|
272
|
+
anchor: .Start
|
|
273
|
+
)
|
|
274
|
+
})
|
|
275
|
+
);
|
|
276
|
+
|
|
277
|
+
export
|
|
278
|
+
RegexNode,
|
|
279
|
+
NodeKind,
|
|
280
|
+
CharRange,
|
|
281
|
+
AnchorKind,
|
|
282
|
+
GroupNameEntry
|
|
283
|
+
;
|