@shd101wyy/yo 0.0.28 → 0.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,283 @@
1
+ // std/regex/node.yo - Regex AST node types
2
+ //
3
+ // The parser produces a tree of RegexNode objects representing the
4
+ // structure of a regex pattern. Nodes are reference-counted objects
5
+ // since they form a recursive tree.
6
+
7
+ open import "std/collections/array_list";
8
+ open import "std/string";
9
+
10
+ // A range of characters for character classes, e.g. 'a'-'z'
11
+ CharRange :: struct(
12
+ low : u32,
13
+ high : u32
14
+ );
15
+
16
+ // Anchor types for ^ $ \b \B
17
+ AnchorKind :: enum(
18
+ Start,
19
+ End,
20
+ WordBoundary,
21
+ NonWordBoundary
22
+ );
23
+
24
+ // Node kind tag
25
+ NodeKind :: enum(
26
+ Literal,
27
+ Dot,
28
+ CharClass,
29
+ Anchor,
30
+ Sequence,
31
+ Alternation,
32
+ Quantifier,
33
+ Group,
34
+ NonCapturingGroup,
35
+ Backreference,
36
+ Lookahead,
37
+ Lookbehind
38
+ );
39
+
40
+ // Entry mapping a named group to its index
41
+ GroupNameEntry :: struct(
42
+ name : String,
43
+ index : usize
44
+ );
45
+
46
+ // The regex AST node type.
47
+ // Uses `object(...)` for reference counting since nodes are recursive.
48
+ // Self-reference via `Self` (like LinkedList's Node).
49
+ RegexNode :: object(
50
+ kind : NodeKind,
51
+ children : ArrayList(Self),
52
+ // For Literal nodes: the codepoint
53
+ codepoint : u32,
54
+ // For CharClass nodes: ranges and negation flag
55
+ ranges : ArrayList(CharRange),
56
+ negated : bool,
57
+ // For Quantifier nodes: min, max, greedy
58
+ q_min : usize,
59
+ q_max : usize,
60
+ q_greedy : bool,
61
+ // For Group/Save nodes: group index
62
+ group_index : usize,
63
+ // For Anchor nodes
64
+ anchor : AnchorKind
65
+ );
66
+
67
+ impl(RegexNode,
68
+ // Create a literal node matching a single codepoint
69
+ literal : (fn(cp : u32) -> Self)(
70
+ Self(
71
+ kind: .Literal,
72
+ children: ArrayList(RegexNode).new(),
73
+ codepoint: cp,
74
+ ranges: ArrayList(CharRange).new(),
75
+ negated: false,
76
+ q_min: usize(0),
77
+ q_max: usize(0),
78
+ q_greedy: true,
79
+ group_index: usize(0),
80
+ anchor: .Start
81
+ )
82
+ ),
83
+
84
+ // Create a dot (any char) node
85
+ dot : (fn() -> Self)(
86
+ Self(
87
+ kind: .Dot,
88
+ children: ArrayList(RegexNode).new(),
89
+ codepoint: u32(0),
90
+ ranges: ArrayList(CharRange).new(),
91
+ negated: false,
92
+ q_min: usize(0),
93
+ q_max: usize(0),
94
+ q_greedy: true,
95
+ group_index: usize(0),
96
+ anchor: .Start
97
+ )
98
+ ),
99
+
100
+ // Create a character class node
101
+ char_class : (fn(ranges : ArrayList(CharRange), negated : bool) -> Self)(
102
+ Self(
103
+ kind: .CharClass,
104
+ children: ArrayList(RegexNode).new(),
105
+ codepoint: u32(0),
106
+ ranges: ranges,
107
+ negated: negated,
108
+ q_min: usize(0),
109
+ q_max: usize(0),
110
+ q_greedy: true,
111
+ group_index: usize(0),
112
+ anchor: .Start
113
+ )
114
+ ),
115
+
116
+ // Create an anchor node (^, $, \b, \B)
117
+ anchor_node : (fn(kind : AnchorKind) -> Self)(
118
+ Self(
119
+ kind: .Anchor,
120
+ children: ArrayList(RegexNode).new(),
121
+ codepoint: u32(0),
122
+ ranges: ArrayList(CharRange).new(),
123
+ negated: false,
124
+ q_min: usize(0),
125
+ q_max: usize(0),
126
+ q_greedy: true,
127
+ group_index: usize(0),
128
+ anchor: kind
129
+ )
130
+ ),
131
+
132
+ // Create a sequence node (concatenation of children)
133
+ sequence : (fn(nodes : ArrayList(RegexNode)) -> Self)(
134
+ Self(
135
+ kind: .Sequence,
136
+ children: nodes,
137
+ codepoint: u32(0),
138
+ ranges: ArrayList(CharRange).new(),
139
+ negated: false,
140
+ q_min: usize(0),
141
+ q_max: usize(0),
142
+ q_greedy: true,
143
+ group_index: usize(0),
144
+ anchor: .Start
145
+ )
146
+ ),
147
+
148
+ // Create an alternation node (left | right)
149
+ alternation : (fn(left : RegexNode, right : RegexNode) -> Self)({
150
+ kids := ArrayList(RegexNode).new();
151
+ kids.push(left);
152
+ kids.push(right);
153
+ Self(
154
+ kind: .Alternation,
155
+ children: kids,
156
+ codepoint: u32(0),
157
+ ranges: ArrayList(CharRange).new(),
158
+ negated: false,
159
+ q_min: usize(0),
160
+ q_max: usize(0),
161
+ q_greedy: true,
162
+ group_index: usize(0),
163
+ anchor: .Start
164
+ )
165
+ }),
166
+
167
+ // Create a quantifier node (wrapping a child)
168
+ // max_val of 0 means unbounded (infinity)
169
+ quantifier : (fn(child : RegexNode, min_val : usize, max_val : usize, greedy : bool) -> Self)({
170
+ kids := ArrayList(RegexNode).new();
171
+ kids.push(child);
172
+ Self(
173
+ kind: .Quantifier,
174
+ children: kids,
175
+ codepoint: u32(0),
176
+ ranges: ArrayList(CharRange).new(),
177
+ negated: false,
178
+ q_min: min_val,
179
+ q_max: max_val,
180
+ q_greedy: greedy,
181
+ group_index: usize(0),
182
+ anchor: .Start
183
+ )
184
+ }),
185
+
186
+ // Create a capturing group node
187
+ group : (fn(child : RegexNode, index : usize) -> Self)({
188
+ kids := ArrayList(RegexNode).new();
189
+ kids.push(child);
190
+ Self(
191
+ kind: .Group,
192
+ children: kids,
193
+ codepoint: u32(0),
194
+ ranges: ArrayList(CharRange).new(),
195
+ negated: false,
196
+ q_min: usize(0),
197
+ q_max: usize(0),
198
+ q_greedy: true,
199
+ group_index: index,
200
+ anchor: .Start
201
+ )
202
+ }),
203
+
204
+ // Create a non-capturing group node
205
+ non_capturing_group : (fn(child : RegexNode) -> Self)({
206
+ kids := ArrayList(RegexNode).new();
207
+ kids.push(child);
208
+ Self(
209
+ kind: .NonCapturingGroup,
210
+ children: kids,
211
+ codepoint: u32(0),
212
+ ranges: ArrayList(CharRange).new(),
213
+ negated: false,
214
+ q_min: usize(0),
215
+ q_max: usize(0),
216
+ q_greedy: true,
217
+ group_index: usize(0),
218
+ anchor: .Start
219
+ )
220
+ }),
221
+
222
+ // Create a backreference node (\1, \k<name>)
223
+ backreference : (fn(group_idx : usize) -> Self)(
224
+ Self(
225
+ kind: .Backreference,
226
+ children: ArrayList(RegexNode).new(),
227
+ codepoint: u32(0),
228
+ ranges: ArrayList(CharRange).new(),
229
+ negated: false,
230
+ q_min: usize(0),
231
+ q_max: usize(0),
232
+ q_greedy: true,
233
+ group_index: group_idx,
234
+ anchor: .Start
235
+ )
236
+ ),
237
+
238
+ // Create a lookahead node (?=...) or (?!...)
239
+ // negated=false means positive, negated=true means negative
240
+ lookahead : (fn(child : RegexNode, positive : bool) -> Self)({
241
+ kids := ArrayList(RegexNode).new();
242
+ kids.push(child);
243
+ Self(
244
+ kind: .Lookahead,
245
+ children: kids,
246
+ codepoint: u32(0),
247
+ ranges: ArrayList(CharRange).new(),
248
+ negated: (!(positive)),
249
+ q_min: usize(0),
250
+ q_max: usize(0),
251
+ q_greedy: true,
252
+ group_index: usize(0),
253
+ anchor: .Start
254
+ )
255
+ }),
256
+
257
+ // Create a lookbehind node (?<=...) or (?<!...)
258
+ // negated=false means positive, negated=true means negative
259
+ lookbehind : (fn(child : RegexNode, positive : bool) -> Self)({
260
+ kids := ArrayList(RegexNode).new();
261
+ kids.push(child);
262
+ Self(
263
+ kind: .Lookbehind,
264
+ children: kids,
265
+ codepoint: u32(0),
266
+ ranges: ArrayList(CharRange).new(),
267
+ negated: (!(positive)),
268
+ q_min: usize(0),
269
+ q_max: usize(0),
270
+ q_greedy: true,
271
+ group_index: usize(0),
272
+ anchor: .Start
273
+ )
274
+ })
275
+ );
276
+
277
+ export
278
+ RegexNode,
279
+ NodeKind,
280
+ CharRange,
281
+ AnchorKind,
282
+ GroupNameEntry
283
+ ;