@shd101wyy/yo 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -6
- package/out/cjs/index.cjs +508 -503
- package/out/cjs/yo-cli.cjs +619 -612
- package/out/esm/index.mjs +397 -392
- package/out/types/src/codegen/codegen-c.d.ts +2 -0
- package/out/types/src/codegen/functions/context.d.ts +1 -0
- package/out/types/src/codegen/functions/generation.d.ts +10 -0
- package/out/types/src/codegen/utils/index.d.ts +1 -0
- package/out/types/src/env.d.ts +1 -0
- package/out/types/src/evaluator/builtins/build.d.ts +1 -0
- package/out/types/src/evaluator/context.d.ts +1 -0
- package/out/types/src/expr.d.ts +2 -0
- package/out/types/src/target.d.ts +1 -0
- package/out/types/src/value.d.ts +2 -1
- package/out/types/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
- package/std/build.yo +2 -1
- package/std/collections/array_list.yo +133 -1
- package/std/encoding/html.yo +283 -0
- package/std/encoding/html_char_utils.yo +36 -0
- package/std/encoding/html_entities.yo +2262 -0
- package/std/encoding/punycode.yo +366 -0
- package/std/fmt/to_string.yo +5 -4
- package/std/glob/index.yo +2 -2
- package/std/libc/wctype.yo +55 -0
- package/std/path.yo +6 -6
- package/std/prelude.yo +8 -0
- package/std/regex/parser.yo +69 -4
- package/std/regex/vm.yo +18 -31
- package/std/string/string.yo +1388 -1337
- package/std/string/unicode.yo +242 -0
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
// Punycode codec (RFC 3492)
|
|
2
|
+
//
|
|
3
|
+
// Provides punycode encoding/decoding and IDN hostname conversion.
|
|
4
|
+
//
|
|
5
|
+
// Example:
|
|
6
|
+
// { punycode_decode, punycode_encode, to_unicode, to_ascii } :: import "std/encoding/punycode";
|
|
7
|
+
//
|
|
8
|
+
// encoded := punycode_encode(`München`);
|
|
9
|
+
// decoded := punycode_decode(encoded);
|
|
10
|
+
// ascii_domain := to_ascii(`münchen.de`); // "xn--mnchen-3ya.de"
|
|
11
|
+
// unicode_domain := to_unicode(ascii_domain); // "münchen.de"
|
|
12
|
+
|
|
13
|
+
open import "../string";
|
|
14
|
+
{ ArrayList } :: import "../collections/array_list";
|
|
15
|
+
|
|
16
|
+
// Punycode constants (RFC 3492 section 5)
|
|
17
|
+
_BASE :: i32(36);
|
|
18
|
+
_TMIN :: i32(1);
|
|
19
|
+
_TMAX :: i32(26);
|
|
20
|
+
_SKEW :: i32(38);
|
|
21
|
+
_DAMP :: i32(700);
|
|
22
|
+
_INITIAL_BIAS :: i32(72);
|
|
23
|
+
_INITIAL_N :: i32(128);
|
|
24
|
+
|
|
25
|
+
// Decode a single punycode digit character to its value.
|
|
26
|
+
_decode_digit :: (fn(cp: i32) -> i32)(
|
|
27
|
+
cond(
|
|
28
|
+
((cp >= i32(0x30)) && (cp <= i32(0x39))) => (cp - i32(22)),
|
|
29
|
+
((cp >= i32(0x41)) && (cp <= i32(0x5A))) => (cp - i32(0x41)),
|
|
30
|
+
((cp >= i32(0x61)) && (cp <= i32(0x7A))) => (cp - i32(0x61)),
|
|
31
|
+
true => i32(-1)
|
|
32
|
+
)
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
// Encode a digit value to its punycode character.
|
|
36
|
+
_encode_digit :: (fn(d: i32) -> u8)(
|
|
37
|
+
cond(
|
|
38
|
+
(d < i32(26)) => u8((d + i32(0x61))),
|
|
39
|
+
true => u8(((d - i32(26)) + i32(0x30)))
|
|
40
|
+
)
|
|
41
|
+
);
|
|
42
|
+
|
|
43
|
+
// Bias adaptation function (RFC 3492 section 3.4)
|
|
44
|
+
_adapt :: (fn(delta_val: i32, num_points: i32, first_time: bool) -> i32)({
|
|
45
|
+
(d : i32) = cond(
|
|
46
|
+
first_time => (delta_val / _DAMP),
|
|
47
|
+
true => (delta_val / i32(2))
|
|
48
|
+
);
|
|
49
|
+
d = (d + (d / num_points));
|
|
50
|
+
(k : i32) = i32(0);
|
|
51
|
+
while (d > (((_BASE - _TMIN) * _TMAX) / i32(2))), {
|
|
52
|
+
d = (d / (_BASE - _TMIN));
|
|
53
|
+
k = (k + _BASE);
|
|
54
|
+
};
|
|
55
|
+
(k + ((((_BASE - _TMIN) + i32(1)) * d) / (d + _SKEW)))
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
// Encode a single Unicode code point as UTF-8 bytes.
|
|
59
|
+
_encode_codepoint :: (fn(cp: i32, out: *(ArrayList(u8))) -> unit)(
|
|
60
|
+
cond(
|
|
61
|
+
(cp < i32(0x80)) => {
|
|
62
|
+
out.*.push(u8(cp));
|
|
63
|
+
},
|
|
64
|
+
(cp < i32(0x800)) => {
|
|
65
|
+
out.*.push(u8((i32(0xC0) | (cp >> i32(6)))));
|
|
66
|
+
out.*.push(u8((i32(0x80) | (cp & i32(0x3F)))));
|
|
67
|
+
},
|
|
68
|
+
(cp < i32(0x10000)) => {
|
|
69
|
+
out.*.push(u8((i32(0xE0) | (cp >> i32(12)))));
|
|
70
|
+
out.*.push(u8((i32(0x80) | ((cp >> i32(6)) & i32(0x3F)))));
|
|
71
|
+
out.*.push(u8((i32(0x80) | (cp & i32(0x3F)))));
|
|
72
|
+
},
|
|
73
|
+
true => {
|
|
74
|
+
out.*.push(u8((i32(0xF0) | (cp >> i32(18)))));
|
|
75
|
+
out.*.push(u8((i32(0x80) | ((cp >> i32(12)) & i32(0x3F)))));
|
|
76
|
+
out.*.push(u8((i32(0x80) | ((cp >> i32(6)) & i32(0x3F)))));
|
|
77
|
+
out.*.push(u8((i32(0x80) | (cp & i32(0x3F)))));
|
|
78
|
+
}
|
|
79
|
+
)
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
// Decode a UTF-8 string into an array of code points.
|
|
83
|
+
_string_to_codepoints :: (fn(s: String) -> ArrayList(i32))({
|
|
84
|
+
(bytes : ArrayList(u8)) = s.as_bytes();
|
|
85
|
+
(cps : ArrayList(i32)) = ArrayList(i32).new();
|
|
86
|
+
(i : usize) = usize(0);
|
|
87
|
+
(blen : usize) = bytes.len();
|
|
88
|
+
while (i < blen), {
|
|
89
|
+
(b : i32) = i32(bytes.get(i).unwrap());
|
|
90
|
+
(cp : i32) = i32(0);
|
|
91
|
+
(size : usize) = usize(1);
|
|
92
|
+
cond(
|
|
93
|
+
(b < i32(0x80)) => {
|
|
94
|
+
cp = b;
|
|
95
|
+
},
|
|
96
|
+
((b >= i32(0xC0)) && (b < i32(0xE0))) => {
|
|
97
|
+
cp = (b & i32(0x1F));
|
|
98
|
+
size = usize(2);
|
|
99
|
+
},
|
|
100
|
+
((b >= i32(0xE0)) && (b < i32(0xF0))) => {
|
|
101
|
+
cp = (b & i32(0x0F));
|
|
102
|
+
size = usize(3);
|
|
103
|
+
},
|
|
104
|
+
((b >= i32(0xF0)) && (b < i32(0xF8))) => {
|
|
105
|
+
cp = (b & i32(0x07));
|
|
106
|
+
size = usize(4);
|
|
107
|
+
},
|
|
108
|
+
true => {
|
|
109
|
+
cp = i32(0xFFFD);
|
|
110
|
+
}
|
|
111
|
+
);
|
|
112
|
+
(j : usize) = usize(1);
|
|
113
|
+
while (j < size), {
|
|
114
|
+
if(((i + j) < blen), {
|
|
115
|
+
cp = ((cp << i32(6)) | (i32(bytes.get((i + j)).unwrap()) & i32(0x3F)));
|
|
116
|
+
});
|
|
117
|
+
j = (j + usize(1));
|
|
118
|
+
};
|
|
119
|
+
cps.push(cp);
|
|
120
|
+
i = (i + size);
|
|
121
|
+
};
|
|
122
|
+
cps
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
// Decode a punycode-encoded string (without the xn-- prefix).
|
|
126
|
+
// Returns .Some(decoded) on success, .None on error.
|
|
127
|
+
punycode_decode :: (fn(input: String) -> Option(String))({
|
|
128
|
+
(bytes : ArrayList(u8)) = input.as_bytes();
|
|
129
|
+
(input_len : i32) = i32(bytes.len());
|
|
130
|
+
|
|
131
|
+
// Find the last '-' separator
|
|
132
|
+
(basic_end : i32) = i32(-1);
|
|
133
|
+
(j : i32) = (input_len - i32(1));
|
|
134
|
+
while ((j >= i32(0)) && (basic_end < i32(0))), {
|
|
135
|
+
if((i32(bytes.get(usize(j)).unwrap()) == i32(0x2D)), {
|
|
136
|
+
basic_end = j;
|
|
137
|
+
});
|
|
138
|
+
j = (j - i32(1));
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
(output : ArrayList(i32)) = ArrayList(i32).new();
|
|
142
|
+
(basic_length : i32) = cond(
|
|
143
|
+
(basic_end >= i32(0)) => basic_end,
|
|
144
|
+
true => i32(0)
|
|
145
|
+
);
|
|
146
|
+
|
|
147
|
+
(bi : i32) = i32(0);
|
|
148
|
+
while (bi < basic_length), {
|
|
149
|
+
(cp : i32) = i32(bytes.get(usize(bi)).unwrap());
|
|
150
|
+
if((cp >= i32(0x80)), {
|
|
151
|
+
return .None;
|
|
152
|
+
});
|
|
153
|
+
output.push(cp);
|
|
154
|
+
bi = (bi + i32(1));
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
(idx : i32) = cond(
|
|
158
|
+
(basic_end >= i32(0)) => (basic_end + i32(1)),
|
|
159
|
+
true => i32(0)
|
|
160
|
+
);
|
|
161
|
+
(n : i32) = _INITIAL_N;
|
|
162
|
+
(bias : i32) = _INITIAL_BIAS;
|
|
163
|
+
(i_val : i32) = i32(0);
|
|
164
|
+
|
|
165
|
+
while (idx < input_len), {
|
|
166
|
+
(old_i : i32) = i_val;
|
|
167
|
+
(w : i32) = i32(1);
|
|
168
|
+
(k : i32) = _BASE;
|
|
169
|
+
(decode_done : bool) = false;
|
|
170
|
+
|
|
171
|
+
while (!(decode_done)), {
|
|
172
|
+
if((idx >= input_len), {
|
|
173
|
+
return .None;
|
|
174
|
+
});
|
|
175
|
+
(digit : i32) = _decode_digit(i32(bytes.get(usize(idx)).unwrap()));
|
|
176
|
+
idx = (idx + i32(1));
|
|
177
|
+
if((digit < i32(0)), {
|
|
178
|
+
return .None;
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
i_val = (i_val + (digit * w));
|
|
182
|
+
|
|
183
|
+
(t : i32) = cond(
|
|
184
|
+
(k <= bias) => _TMIN,
|
|
185
|
+
(k >= (bias + _TMAX)) => _TMAX,
|
|
186
|
+
true => (k - bias)
|
|
187
|
+
);
|
|
188
|
+
|
|
189
|
+
if((digit < t), {
|
|
190
|
+
decode_done = true;
|
|
191
|
+
}, {
|
|
192
|
+
w = (w * (_BASE - t));
|
|
193
|
+
k = (k + _BASE);
|
|
194
|
+
});
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
(out_len : i32) = (i32(output.len()) + i32(1));
|
|
198
|
+
bias = _adapt((i_val - old_i), out_len, (old_i == i32(0)));
|
|
199
|
+
n = (n + (i_val / out_len));
|
|
200
|
+
i_val = (i_val % out_len);
|
|
201
|
+
|
|
202
|
+
// Insert code point at position i_val
|
|
203
|
+
output.push(i32(0));
|
|
204
|
+
(shift_idx : i32) = (i32(output.len()) - i32(1));
|
|
205
|
+
while (shift_idx > i_val), {
|
|
206
|
+
_ := output.set(usize(shift_idx), output.get(usize((shift_idx - i32(1)))).unwrap());
|
|
207
|
+
shift_idx = (shift_idx - i32(1));
|
|
208
|
+
};
|
|
209
|
+
_ := output.set(usize(i_val), n);
|
|
210
|
+
i_val = (i_val + i32(1));
|
|
211
|
+
};
|
|
212
|
+
|
|
213
|
+
// Convert code points to UTF-8
|
|
214
|
+
(result_bytes : ArrayList(u8)) = ArrayList(u8).new();
|
|
215
|
+
(ri : usize) = usize(0);
|
|
216
|
+
while (ri < output.len()), {
|
|
217
|
+
_encode_codepoint(output.get(ri).unwrap(), (&result_bytes));
|
|
218
|
+
ri = (ri + usize(1));
|
|
219
|
+
};
|
|
220
|
+
.Some(String.from_bytes(result_bytes))
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
// Encode a Unicode string to punycode (without the xn-- prefix).
|
|
224
|
+
punycode_encode :: (fn(input: String) -> String)({
|
|
225
|
+
(cps : ArrayList(i32)) = _string_to_codepoints(input);
|
|
226
|
+
(cp_count : i32) = i32(cps.len());
|
|
227
|
+
|
|
228
|
+
// Separate basic and non-basic code points
|
|
229
|
+
(out : ArrayList(u8)) = ArrayList(u8).new();
|
|
230
|
+
(basic_count : i32) = i32(0);
|
|
231
|
+
(ci : i32) = i32(0);
|
|
232
|
+
while (ci < cp_count), {
|
|
233
|
+
(cp : i32) = cps.get(usize(ci)).unwrap();
|
|
234
|
+
if((cp < i32(0x80)), {
|
|
235
|
+
out.push(u8(cp));
|
|
236
|
+
basic_count = (basic_count + i32(1));
|
|
237
|
+
});
|
|
238
|
+
ci = (ci + i32(1));
|
|
239
|
+
};
|
|
240
|
+
|
|
241
|
+
if((basic_count > i32(0)), {
|
|
242
|
+
out.push(u8(0x2D));
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
(handled : i32) = basic_count;
|
|
246
|
+
(n : i32) = _INITIAL_N;
|
|
247
|
+
(delta : i32) = i32(0);
|
|
248
|
+
(bias : i32) = _INITIAL_BIAS;
|
|
249
|
+
|
|
250
|
+
while (handled < cp_count), {
|
|
251
|
+
// Find minimum code point >= n
|
|
252
|
+
(m : i32) = i32(0x7FFFFFFF);
|
|
253
|
+
(mi : i32) = i32(0);
|
|
254
|
+
while (mi < cp_count), {
|
|
255
|
+
(cp : i32) = cps.get(usize(mi)).unwrap();
|
|
256
|
+
if(((cp >= n) && (cp < m)), {
|
|
257
|
+
m = cp;
|
|
258
|
+
});
|
|
259
|
+
mi = (mi + i32(1));
|
|
260
|
+
};
|
|
261
|
+
|
|
262
|
+
delta = (delta + ((m - n) * (handled + i32(1))));
|
|
263
|
+
n = m;
|
|
264
|
+
|
|
265
|
+
(ei : i32) = i32(0);
|
|
266
|
+
while (ei < cp_count), {
|
|
267
|
+
(cp : i32) = cps.get(usize(ei)).unwrap();
|
|
268
|
+
if((cp < n), {
|
|
269
|
+
delta = (delta + i32(1));
|
|
270
|
+
});
|
|
271
|
+
if((cp == n), {
|
|
272
|
+
(q : i32) = delta;
|
|
273
|
+
(k : i32) = _BASE;
|
|
274
|
+
(encode_done : bool) = false;
|
|
275
|
+
while (!(encode_done)), {
|
|
276
|
+
(t : i32) = cond(
|
|
277
|
+
(k <= bias) => _TMIN,
|
|
278
|
+
(k >= (bias + _TMAX)) => _TMAX,
|
|
279
|
+
true => (k - bias)
|
|
280
|
+
);
|
|
281
|
+
if((q < t), {
|
|
282
|
+
out.push(_encode_digit(q));
|
|
283
|
+
encode_done = true;
|
|
284
|
+
}, {
|
|
285
|
+
out.push(_encode_digit((t + ((q - t) % (_BASE - t)))));
|
|
286
|
+
q = ((q - t) / (_BASE - t));
|
|
287
|
+
k = (k + _BASE);
|
|
288
|
+
});
|
|
289
|
+
};
|
|
290
|
+
bias = _adapt(delta, (handled + i32(1)), (handled == basic_count));
|
|
291
|
+
delta = i32(0);
|
|
292
|
+
handled = (handled + i32(1));
|
|
293
|
+
});
|
|
294
|
+
ei = (ei + i32(1));
|
|
295
|
+
};
|
|
296
|
+
delta = (delta + i32(1));
|
|
297
|
+
n = (n + i32(1));
|
|
298
|
+
};
|
|
299
|
+
|
|
300
|
+
String.from_bytes(out)
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
// Convert an IDN hostname to Unicode display form.
|
|
304
|
+
// Splits on '.', decodes xn-- labels, keeps original on failure.
|
|
305
|
+
to_unicode :: (fn(hostname: String) -> String)({
|
|
306
|
+
(parts : ArrayList(String)) = hostname.split(`.`);
|
|
307
|
+
(result : String) = ``;
|
|
308
|
+
(pi : usize) = usize(0);
|
|
309
|
+
while (pi < parts.len()), {
|
|
310
|
+
(part : String) = parts.get(pi).unwrap();
|
|
311
|
+
if((pi > usize(0)), {
|
|
312
|
+
result = `${result}.`;
|
|
313
|
+
});
|
|
314
|
+
(part_lower : String) = part.to_lowercase();
|
|
315
|
+
if(part_lower.starts_with(`xn--`), {
|
|
316
|
+
(encoded : String) = part.substring(usize(4), part.len());
|
|
317
|
+
match(punycode_decode(encoded),
|
|
318
|
+
.Some(decoded) => {
|
|
319
|
+
result = `${result}${decoded}`;
|
|
320
|
+
},
|
|
321
|
+
.None => {
|
|
322
|
+
// Keep the original label including xn-- prefix on decode failure
|
|
323
|
+
result = `${result}${part}`;
|
|
324
|
+
}
|
|
325
|
+
);
|
|
326
|
+
}, {
|
|
327
|
+
result = `${result}${part}`;
|
|
328
|
+
});
|
|
329
|
+
pi = (pi + usize(1));
|
|
330
|
+
};
|
|
331
|
+
result
|
|
332
|
+
});
|
|
333
|
+
|
|
334
|
+
// Convert a Unicode hostname to ASCII punycode form.
|
|
335
|
+
// Non-ASCII labels get xn-- prefix.
|
|
336
|
+
to_ascii :: (fn(hostname: String) -> String)({
|
|
337
|
+
(parts : ArrayList(String)) = hostname.split(`.`);
|
|
338
|
+
(result : String) = ``;
|
|
339
|
+
(pi : usize) = usize(0);
|
|
340
|
+
while (pi < parts.len()), {
|
|
341
|
+
(part : String) = parts.get(pi).unwrap();
|
|
342
|
+
if((pi > usize(0)), {
|
|
343
|
+
result = `${result}.`;
|
|
344
|
+
});
|
|
345
|
+
// Check if the label has non-ASCII characters
|
|
346
|
+
(has_non_ascii : bool) = false;
|
|
347
|
+
(bytes : ArrayList(u8)) = part.as_bytes();
|
|
348
|
+
(bi : usize) = usize(0);
|
|
349
|
+
while (bi < bytes.len()), {
|
|
350
|
+
if((i32(bytes.get(bi).unwrap()) >= i32(0x80)), {
|
|
351
|
+
has_non_ascii = true;
|
|
352
|
+
});
|
|
353
|
+
bi = (bi + usize(1));
|
|
354
|
+
};
|
|
355
|
+
if(has_non_ascii, {
|
|
356
|
+
(encoded : String) = punycode_encode(part);
|
|
357
|
+
result = `${result}xn--${encoded}`;
|
|
358
|
+
}, {
|
|
359
|
+
result = `${result}${part}`;
|
|
360
|
+
});
|
|
361
|
+
pi = (pi + usize(1));
|
|
362
|
+
};
|
|
363
|
+
result
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
export punycode_decode, punycode_encode, to_unicode, to_ascii;
|
package/std/fmt/to_string.yo
CHANGED
|
@@ -203,24 +203,25 @@ impl(rune, ToString(
|
|
|
203
203
|
// 0x80-0x7FF: 2 bytes
|
|
204
204
|
// 0x800-0xFFFF: 3 bytes (excluding surrogates 0xD800-0xDFFF)
|
|
205
205
|
// 0x10000-0x10FFFF: 4 bytes
|
|
206
|
+
// Use 5-byte buffer to always have space for null terminator
|
|
206
207
|
|
|
207
208
|
buffer := cond(
|
|
208
209
|
(code <= 0x7F) => {
|
|
209
210
|
// 1-byte encoding: 0xxxxxxx
|
|
210
|
-
arr := Array(u8, usize(
|
|
211
|
+
arr := Array(u8, usize(5)).fill(0);
|
|
211
212
|
arr(0) = u8(code);
|
|
212
213
|
arr
|
|
213
214
|
},
|
|
214
215
|
(code <= 0x7FF) => {
|
|
215
216
|
// 2-byte encoding: 110xxxxx 10xxxxxx
|
|
216
|
-
arr := Array(u8, usize(
|
|
217
|
+
arr := Array(u8, usize(5)).fill(0);
|
|
217
218
|
arr(0) = u8(u32(0xC0) | ((code >> 6) & 0x1F));
|
|
218
219
|
arr(1) = u8(u32(0x80) | (code & 0x3F));
|
|
219
220
|
arr
|
|
220
221
|
},
|
|
221
222
|
(code <= 0xFFFF) => {
|
|
222
223
|
// 3-byte encoding: 1110xxxx 10xxxxxx 10xxxxxx
|
|
223
|
-
arr := Array(u8, usize(
|
|
224
|
+
arr := Array(u8, usize(5)).fill(0);
|
|
224
225
|
arr(0) = u8(u32(0xE0) | ((code >> 12) & 0x0F));
|
|
225
226
|
arr(1) = u8(u32(0x80) | ((code >> 6) & 0x3F));
|
|
226
227
|
arr(2) = u8(u32(0x80) | (code & 0x3F));
|
|
@@ -228,7 +229,7 @@ impl(rune, ToString(
|
|
|
228
229
|
},
|
|
229
230
|
true => {
|
|
230
231
|
// 4-byte encoding: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
231
|
-
arr := Array(u8, usize(
|
|
232
|
+
arr := Array(u8, usize(5)).fill(0);
|
|
232
233
|
arr(0) = u8(u32(0xF0) | ((code >> 18) & 0x07));
|
|
233
234
|
arr(1) = u8(u32(0x80) | ((code >> 12) & 0x3F));
|
|
234
235
|
arr(2) = u8(u32(0x80) | ((code >> 6) & 0x3F));
|
package/std/glob/index.yo
CHANGED
|
@@ -187,7 +187,7 @@ _glob_match_impl :: (fn(pb: ArrayList(u8), pi: usize, tb: ArrayList(u8), ti: usi
|
|
|
187
187
|
});
|
|
188
188
|
|
|
189
189
|
glob_match :: (fn(pattern: String, text: String) -> bool)(
|
|
190
|
-
_glob_match_impl(pattern.
|
|
190
|
+
_glob_match_impl(pattern.as_bytes(), usize(0), text.as_bytes(), usize(0))
|
|
191
191
|
);
|
|
192
192
|
|
|
193
193
|
GlobPattern :: object(
|
|
@@ -199,7 +199,7 @@ impl(GlobPattern,
|
|
|
199
199
|
Self(_pattern: pattern)
|
|
200
200
|
),
|
|
201
201
|
matches : (fn(self: Self, text: String) -> bool)(
|
|
202
|
-
_glob_match_impl(self._pattern.
|
|
202
|
+
_glob_match_impl(self._pattern.as_bytes(), usize(0), text.as_bytes(), usize(0))
|
|
203
203
|
)
|
|
204
204
|
);
|
|
205
205
|
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
// C11 wctype.h - Wide character classification and conversion functions
|
|
2
|
+
// Provides functions to test and convert wide character types
|
|
3
|
+
|
|
4
|
+
{ wint_t } :: import "./stdint";
|
|
5
|
+
|
|
6
|
+
c_include "<wctype.h>",
|
|
7
|
+
// Wide character classification functions
|
|
8
|
+
iswalnum :
|
|
9
|
+
fn(wc : wint_t) -> int,
|
|
10
|
+
iswalpha :
|
|
11
|
+
fn(wc : wint_t) -> int,
|
|
12
|
+
iswblank :
|
|
13
|
+
fn(wc : wint_t) -> int,
|
|
14
|
+
iswcntrl :
|
|
15
|
+
fn(wc : wint_t) -> int,
|
|
16
|
+
iswdigit :
|
|
17
|
+
fn(wc : wint_t) -> int,
|
|
18
|
+
iswgraph :
|
|
19
|
+
fn(wc : wint_t) -> int,
|
|
20
|
+
iswlower :
|
|
21
|
+
fn(wc : wint_t) -> int,
|
|
22
|
+
iswprint :
|
|
23
|
+
fn(wc : wint_t) -> int,
|
|
24
|
+
iswpunct :
|
|
25
|
+
fn(wc : wint_t) -> int,
|
|
26
|
+
iswspace :
|
|
27
|
+
fn(wc : wint_t) -> int,
|
|
28
|
+
iswupper :
|
|
29
|
+
fn(wc : wint_t) -> int,
|
|
30
|
+
iswxdigit :
|
|
31
|
+
fn(wc : wint_t) -> int,
|
|
32
|
+
|
|
33
|
+
// Wide character conversion functions
|
|
34
|
+
towlower :
|
|
35
|
+
fn(wc : wint_t) -> wint_t,
|
|
36
|
+
towupper :
|
|
37
|
+
fn(wc : wint_t) -> wint_t
|
|
38
|
+
;
|
|
39
|
+
|
|
40
|
+
export
|
|
41
|
+
iswalnum,
|
|
42
|
+
iswalpha,
|
|
43
|
+
iswblank,
|
|
44
|
+
iswcntrl,
|
|
45
|
+
iswdigit,
|
|
46
|
+
iswgraph,
|
|
47
|
+
iswlower,
|
|
48
|
+
iswprint,
|
|
49
|
+
iswpunct,
|
|
50
|
+
iswspace,
|
|
51
|
+
iswupper,
|
|
52
|
+
iswxdigit,
|
|
53
|
+
towlower,
|
|
54
|
+
towupper
|
|
55
|
+
;
|
package/std/path.yo
CHANGED
|
@@ -67,7 +67,7 @@ impl(Path,
|
|
|
67
67
|
// Check if path is absolute
|
|
68
68
|
// Unix: starts with '/'
|
|
69
69
|
// Windows: starts with drive letter like 'C:' or UNC path '\\'
|
|
70
|
-
bytes := normalized.
|
|
70
|
+
bytes := normalized.as_bytes();
|
|
71
71
|
cond(
|
|
72
72
|
(bytes.len() > usize(0)) => {
|
|
73
73
|
first_byte := bytes.get(usize(0));
|
|
@@ -128,7 +128,7 @@ impl(Path,
|
|
|
128
128
|
true => {
|
|
129
129
|
// Check if it's "." (current directory)
|
|
130
130
|
is_dot := ((part.len() == usize(1)) && {
|
|
131
|
-
byte_opt := part.
|
|
131
|
+
byte_opt := part.as_bytes().get(usize(0));
|
|
132
132
|
match(byte_opt,
|
|
133
133
|
.Some(b) => (b == u8(46)),
|
|
134
134
|
.None => false
|
|
@@ -137,8 +137,8 @@ impl(Path,
|
|
|
137
137
|
|
|
138
138
|
// Check if it's ".." (parent directory)
|
|
139
139
|
is_dotdot := ((part.len() == usize(2)) && {
|
|
140
|
-
b0_opt := part.
|
|
141
|
-
b1_opt := part.
|
|
140
|
+
b0_opt := part.as_bytes().get(usize(0));
|
|
141
|
+
b1_opt := part.as_bytes().get(usize(1));
|
|
142
142
|
match(b0_opt,
|
|
143
143
|
.Some(b0) => match(b1_opt,
|
|
144
144
|
.Some(b1) => ((b0 == u8(46)) && (b1 == u8(46))),
|
|
@@ -605,8 +605,8 @@ impl(Path, ToString(
|
|
|
605
605
|
// Check if it's a drive letter like "C:"
|
|
606
606
|
cond(
|
|
607
607
|
(first_seg.len() == usize(2)) => {
|
|
608
|
-
b0_opt := first_seg.
|
|
609
|
-
b1_opt := first_seg.
|
|
608
|
+
b0_opt := first_seg.as_bytes().get(usize(0));
|
|
609
|
+
b1_opt := first_seg.as_bytes().get(usize(1));
|
|
610
610
|
match(b0_opt,
|
|
611
611
|
.Some(b0) => match(b1_opt,
|
|
612
612
|
.Some(b1) => {
|
package/std/prelude.yo
CHANGED
|
@@ -96,6 +96,8 @@ extern "Yo",
|
|
|
96
96
|
fn(forall(T: Type), slice: Slice(T)) -> usize,
|
|
97
97
|
__yo_slice_new :
|
|
98
98
|
fn(forall(T: Type), ptr: *(T), length: usize) -> Slice(T),
|
|
99
|
+
__yo_slice_ptr :
|
|
100
|
+
fn(forall(T: Type), slice: Slice(T)) -> *(T),
|
|
99
101
|
|
|
100
102
|
// C macro related
|
|
101
103
|
__yo_c_macro_defined : (fn(comptime(name) : comptime_string) -> comptime(bool)),
|
|
@@ -3196,6 +3198,9 @@ impl(forall(T : Type), Slice(T),
|
|
|
3196
3198
|
),
|
|
3197
3199
|
len : (fn(self : Self) -> usize)(
|
|
3198
3200
|
__yo_slice_len(self)
|
|
3201
|
+
),
|
|
3202
|
+
ptr : (fn(self : Self) -> *(T))(
|
|
3203
|
+
__yo_slice_ptr(self)
|
|
3199
3204
|
)
|
|
3200
3205
|
);
|
|
3201
3206
|
|
|
@@ -3209,6 +3214,9 @@ impl(str,
|
|
|
3209
3214
|
),
|
|
3210
3215
|
len : (fn(self : Self) -> usize)(
|
|
3211
3216
|
__yo_slice_len(self.bytes)
|
|
3217
|
+
),
|
|
3218
|
+
ptr : (fn(self : Self) -> *(u8))(
|
|
3219
|
+
__yo_slice_ptr(self.bytes)
|
|
3212
3220
|
)
|
|
3213
3221
|
);
|
|
3214
3222
|
|
package/std/regex/parser.yo
CHANGED
|
@@ -189,6 +189,27 @@ impl(RegexParser,
|
|
|
189
189
|
r
|
|
190
190
|
}),
|
|
191
191
|
|
|
192
|
+
// Parse \xHH hex escape — reads exactly 2 hex digits and returns the codepoint.
|
|
193
|
+
_parse_hex_byte : (fn(self : Self) -> Option(u32))({
|
|
194
|
+
if(((self._pos + usize(2)) > self._bytes.len()), { return .None; });
|
|
195
|
+
(h1 : u8) = self._bytes.get(self._pos).unwrap();
|
|
196
|
+
(h2 : u8) = self._bytes.get((self._pos + usize(1))).unwrap();
|
|
197
|
+
(v1 : i32) = cond(
|
|
198
|
+
((h1 >= u8(48)) && (h1 <= u8(57))) => (i32(h1) - i32(48)),
|
|
199
|
+
((h1 >= u8(65)) && (h1 <= u8(70))) => ((i32(h1) - i32(65)) + i32(10)),
|
|
200
|
+
((h1 >= u8(97)) && (h1 <= u8(102))) => ((i32(h1) - i32(97)) + i32(10)),
|
|
201
|
+
true => { return .None; }
|
|
202
|
+
);
|
|
203
|
+
(v2 : i32) = cond(
|
|
204
|
+
((h2 >= u8(48)) && (h2 <= u8(57))) => (i32(h2) - i32(48)),
|
|
205
|
+
((h2 >= u8(65)) && (h2 <= u8(70))) => ((i32(h2) - i32(65)) + i32(10)),
|
|
206
|
+
((h2 >= u8(97)) && (h2 <= u8(102))) => ((i32(h2) - i32(97)) + i32(10)),
|
|
207
|
+
true => { return .None; }
|
|
208
|
+
);
|
|
209
|
+
self._pos = (self._pos + usize(2));
|
|
210
|
+
.Some(u32(((v1 << i32(4)) | v2)))
|
|
211
|
+
}),
|
|
212
|
+
|
|
192
213
|
_parse_class_escape : (fn(self : Self) -> Result(ArrayList(CharRange), String))({
|
|
193
214
|
b := self._advance();
|
|
194
215
|
match(b,
|
|
@@ -219,6 +240,14 @@ impl(RegexParser,
|
|
|
219
240
|
r.push(CharRange(low: u32(33), high: u32(0x10FFFF)));
|
|
220
241
|
.Ok(r)
|
|
221
242
|
},
|
|
243
|
+
(ch == u8(120)) => {
|
|
244
|
+
r := ArrayList(CharRange).new();
|
|
245
|
+
match(self._parse_hex_byte(),
|
|
246
|
+
.Some(v) => r.push(CharRange(low: v, high: v)),
|
|
247
|
+
.None => r.push(CharRange(low: u32(ch), high: u32(ch)))
|
|
248
|
+
);
|
|
249
|
+
.Ok(r)
|
|
250
|
+
},
|
|
222
251
|
true => {
|
|
223
252
|
r := ArrayList(CharRange).new();
|
|
224
253
|
codepoint := self._escape_char_codepoint(ch);
|
|
@@ -246,6 +275,31 @@ impl(RegexParser,
|
|
|
246
275
|
(end_first == u8(93)) => {
|
|
247
276
|
ranges.push(CharRange(low: low, high: low));
|
|
248
277
|
},
|
|
278
|
+
(end_first == u8(92)) => {
|
|
279
|
+
// High end is an escape sequence (e.g. \x20, \0, \n)
|
|
280
|
+
self._pos = (self._pos + usize(1));
|
|
281
|
+
self._pos = (self._pos + usize(1));
|
|
282
|
+
esc := self._parse_class_escape();
|
|
283
|
+
match(esc,
|
|
284
|
+
.Ok(esc_ranges) => {
|
|
285
|
+
if(((esc_ranges.len() == usize(1)) && (esc_ranges.get(usize(0)).unwrap().low == esc_ranges.get(usize(0)).unwrap().high)), {
|
|
286
|
+
(high : u32) = esc_ranges.get(usize(0)).unwrap().low;
|
|
287
|
+
ranges.push(CharRange(low: low, high: high));
|
|
288
|
+
}, {
|
|
289
|
+
// Multi-range escape like \d can't be range endpoint; treat dash as literal
|
|
290
|
+
ranges.push(CharRange(low: low, high: low));
|
|
291
|
+
ranges.push(CharRange(low: u32(45), high: u32(45)));
|
|
292
|
+
j := usize(0);
|
|
293
|
+
while (j < esc_ranges.len()), (j = (j + usize(1))), {
|
|
294
|
+
ranges.push(esc_ranges.get(j).unwrap());
|
|
295
|
+
};
|
|
296
|
+
});
|
|
297
|
+
},
|
|
298
|
+
.Err(_e) => {
|
|
299
|
+
ranges.push(CharRange(low: low, high: low));
|
|
300
|
+
}
|
|
301
|
+
);
|
|
302
|
+
},
|
|
249
303
|
true => {
|
|
250
304
|
// Consume dash
|
|
251
305
|
self._pos = (self._pos + usize(1));
|
|
@@ -284,10 +338,16 @@ impl(RegexParser,
|
|
|
284
338
|
esc := self._parse_class_escape();
|
|
285
339
|
match(esc,
|
|
286
340
|
.Ok(esc_ranges) => {
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
341
|
+
// If escape produced a single codepoint, check for range (e.g. \0-\x20)
|
|
342
|
+
if(((esc_ranges.len() == usize(1)) && (esc_ranges.get(usize(0)).unwrap().low == esc_ranges.get(usize(0)).unwrap().high)), {
|
|
343
|
+
(low : u32) = esc_ranges.get(usize(0)).unwrap().low;
|
|
344
|
+
self._try_parse_char_range(ranges, low);
|
|
345
|
+
}, {
|
|
346
|
+
j := usize(0);
|
|
347
|
+
while (j < esc_ranges.len()), (j = (j + usize(1))), {
|
|
348
|
+
ranges.push(esc_ranges.get(j).unwrap());
|
|
349
|
+
};
|
|
350
|
+
});
|
|
291
351
|
},
|
|
292
352
|
.Err(e) => { return .Err(e); }
|
|
293
353
|
);
|
|
@@ -452,6 +512,11 @@ impl(RegexParser,
|
|
|
452
512
|
(ch == u8(112)) => self._parse_unicode_property(false),
|
|
453
513
|
// Negated unicode property \P{Name}
|
|
454
514
|
(ch == u8(80)) => self._parse_unicode_property(true),
|
|
515
|
+
// Hex escape \xHH
|
|
516
|
+
(ch == u8(120)) => match(self._parse_hex_byte(),
|
|
517
|
+
.Some(v) => .Ok(RegexNode.literal(v)),
|
|
518
|
+
.None => .Ok(RegexNode.literal(u32(ch)))
|
|
519
|
+
),
|
|
455
520
|
true => .Ok(RegexNode.literal(self._escape_char_codepoint(ch)))
|
|
456
521
|
),
|
|
457
522
|
.None => .Err(`Unexpected end of pattern after backslash`)
|