npm - @shd101wyy/yo - Versions diffs - 0.1.5 → 0.1.6 - Mend

@shd101wyy/yo 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.md +7 -6
package/out/cjs/index.cjs +508 -503
package/out/cjs/yo-cli.cjs +619 -612
package/out/esm/index.mjs +397 -392
package/out/types/src/codegen/codegen-c.d.ts +2 -0
package/out/types/src/codegen/functions/context.d.ts +1 -0
package/out/types/src/codegen/functions/generation.d.ts +10 -0
package/out/types/src/codegen/utils/index.d.ts +1 -0
package/out/types/src/env.d.ts +1 -0
package/out/types/src/evaluator/builtins/build.d.ts +1 -0
package/out/types/src/evaluator/context.d.ts +1 -0
package/out/types/src/expr.d.ts +2 -0
package/out/types/src/target.d.ts +1 -0
package/out/types/src/value.d.ts +2 -1
package/out/types/tsconfig.tsbuildinfo +1 -1
package/package.json +1 -1
package/std/build.yo +2 -1
package/std/collections/array_list.yo +133 -1
package/std/encoding/html.yo +283 -0
package/std/encoding/html_char_utils.yo +36 -0
package/std/encoding/html_entities.yo +2262 -0
package/std/encoding/punycode.yo +366 -0
package/std/fmt/to_string.yo +5 -4
package/std/glob/index.yo +2 -2
package/std/libc/wctype.yo +55 -0
package/std/path.yo +6 -6
package/std/prelude.yo +8 -0
package/std/regex/parser.yo +69 -4
package/std/regex/vm.yo +18 -31
package/std/string/string.yo +1388 -1337
package/std/string/unicode.yo +242 -0

package/std/encoding/punycode.yo ADDED Viewed

@@ -0,0 +1,366 @@
+// Punycode codec (RFC 3492)
+//
+// Provides punycode encoding/decoding and IDN hostname conversion.
+//
+// Example:
+//   { punycode_decode, punycode_encode, to_unicode, to_ascii } :: import "std/encoding/punycode";
+//
+//   encoded := punycode_encode(`München`);
+//   decoded := punycode_decode(encoded);
+//   ascii_domain := to_ascii(`münchen.de`);  // "xn--mnchen-3ya.de"
+//   unicode_domain := to_unicode(ascii_domain);  // "münchen.de"
+open import "../string";
+{ ArrayList } :: import "../collections/array_list";
+// Punycode constants (RFC 3492 section 5)
+_BASE :: i32(36);
+_TMIN :: i32(1);
+_TMAX :: i32(26);
+_SKEW :: i32(38);
+_DAMP :: i32(700);
+_INITIAL_BIAS :: i32(72);
+_INITIAL_N :: i32(128);
+// Decode a single punycode digit character to its value.
+_decode_digit :: (fn(cp: i32) -> i32)(
+  cond(
+    ((cp >= i32(0x30)) && (cp <= i32(0x39))) => (cp - i32(22)),
+    ((cp >= i32(0x41)) && (cp <= i32(0x5A))) => (cp - i32(0x41)),
+    ((cp >= i32(0x61)) && (cp <= i32(0x7A))) => (cp - i32(0x61)),
+    true => i32(-1)
+  )
+);
+// Encode a digit value to its punycode character.
+_encode_digit :: (fn(d: i32) -> u8)(
+  cond(
+    (d < i32(26)) => u8((d + i32(0x61))),
+    true => u8(((d - i32(26)) + i32(0x30)))
+  )
+);
+// Bias adaptation function (RFC 3492 section 3.4)
+_adapt :: (fn(delta_val: i32, num_points: i32, first_time: bool) -> i32)({
+  (d : i32) = cond(
+    first_time => (delta_val / _DAMP),
+    true => (delta_val / i32(2))
+  );
+  d = (d + (d / num_points));
+  (k : i32) = i32(0);
+  while (d > (((_BASE - _TMIN) * _TMAX) / i32(2))), {
+    d = (d / (_BASE - _TMIN));
+    k = (k + _BASE);
+  };
+  (k + ((((_BASE - _TMIN) + i32(1)) * d) / (d + _SKEW)))
+});
+// Encode a single Unicode code point as UTF-8 bytes.
+_encode_codepoint :: (fn(cp: i32, out: *(ArrayList(u8))) -> unit)(
+  cond(
+    (cp < i32(0x80)) => {
+      out.*.push(u8(cp));
+    },
+    (cp < i32(0x800)) => {
+      out.*.push(u8((i32(0xC0) | (cp >> i32(6)))));
+      out.*.push(u8((i32(0x80) | (cp & i32(0x3F)))));
+    },
+    (cp < i32(0x10000)) => {
+      out.*.push(u8((i32(0xE0) | (cp >> i32(12)))));
+      out.*.push(u8((i32(0x80) | ((cp >> i32(6)) & i32(0x3F)))));
+      out.*.push(u8((i32(0x80) | (cp & i32(0x3F)))));
+    },
+    true => {
+      out.*.push(u8((i32(0xF0) | (cp >> i32(18)))));
+      out.*.push(u8((i32(0x80) | ((cp >> i32(12)) & i32(0x3F)))));
+      out.*.push(u8((i32(0x80) | ((cp >> i32(6)) & i32(0x3F)))));
+      out.*.push(u8((i32(0x80) | (cp & i32(0x3F)))));
+    }
+  )
+);
+// Decode a UTF-8 string into an array of code points.
+_string_to_codepoints :: (fn(s: String) -> ArrayList(i32))({
+  (bytes : ArrayList(u8)) = s.as_bytes();
+  (cps : ArrayList(i32)) = ArrayList(i32).new();
+  (i : usize) = usize(0);
+  (blen : usize) = bytes.len();
+  while (i < blen), {
+    (b : i32) = i32(bytes.get(i).unwrap());
+    (cp : i32) = i32(0);
+    (size : usize) = usize(1);
+    cond(
+      (b < i32(0x80)) => {
+        cp = b;
+      },
+      ((b >= i32(0xC0)) && (b < i32(0xE0))) => {
+        cp = (b & i32(0x1F));
+        size = usize(2);
+      },
+      ((b >= i32(0xE0)) && (b < i32(0xF0))) => {
+        cp = (b & i32(0x0F));
+        size = usize(3);
+      },
+      ((b >= i32(0xF0)) && (b < i32(0xF8))) => {
+        cp = (b & i32(0x07));
+        size = usize(4);
+      },
+      true => {
+        cp = i32(0xFFFD);
+      }
+    );
+    (j : usize) = usize(1);
+    while (j < size), {
+      if(((i + j) < blen), {
+        cp = ((cp << i32(6)) | (i32(bytes.get((i + j)).unwrap()) & i32(0x3F)));
+      });
+      j = (j + usize(1));
+    };
+    cps.push(cp);
+    i = (i + size);
+  };
+  cps
+});
+// Decode a punycode-encoded string (without the xn-- prefix).
+// Returns .Some(decoded) on success, .None on error.
+punycode_decode :: (fn(input: String) -> Option(String))({
+  (bytes : ArrayList(u8)) = input.as_bytes();
+  (input_len : i32) = i32(bytes.len());
+  // Find the last '-' separator
+  (basic_end : i32) = i32(-1);
+  (j : i32) = (input_len - i32(1));
+  while ((j >= i32(0)) && (basic_end < i32(0))), {
+    if((i32(bytes.get(usize(j)).unwrap()) == i32(0x2D)), {
+      basic_end = j;
+    });
+    j = (j - i32(1));
+  };
+  (output : ArrayList(i32)) = ArrayList(i32).new();
+  (basic_length : i32) = cond(
+    (basic_end >= i32(0)) => basic_end,
+    true => i32(0)
+  );
+  (bi : i32) = i32(0);
+  while (bi < basic_length), {
+    (cp : i32) = i32(bytes.get(usize(bi)).unwrap());
+    if((cp >= i32(0x80)), {
+      return .None;
+    });
+    output.push(cp);
+    bi = (bi + i32(1));
+  };
+  (idx : i32) = cond(
+    (basic_end >= i32(0)) => (basic_end + i32(1)),
+    true => i32(0)
+  );
+  (n : i32) = _INITIAL_N;
+  (bias : i32) = _INITIAL_BIAS;
+  (i_val : i32) = i32(0);
+  while (idx < input_len), {
+    (old_i : i32) = i_val;
+    (w : i32) = i32(1);
+    (k : i32) = _BASE;
+    (decode_done : bool) = false;
+    while (!(decode_done)), {
+      if((idx >= input_len), {
+        return .None;
+      });
+      (digit : i32) = _decode_digit(i32(bytes.get(usize(idx)).unwrap()));
+      idx = (idx + i32(1));
+      if((digit < i32(0)), {
+        return .None;
+      });
+      i_val = (i_val + (digit * w));
+      (t : i32) = cond(
+        (k <= bias) => _TMIN,
+        (k >= (bias + _TMAX)) => _TMAX,
+        true => (k - bias)
+      );
+      if((digit < t), {
+        decode_done = true;
+      }, {
+        w = (w * (_BASE - t));
+        k = (k + _BASE);
+      });
+    };
+    (out_len : i32) = (i32(output.len()) + i32(1));
+    bias = _adapt((i_val - old_i), out_len, (old_i == i32(0)));
+    n = (n + (i_val / out_len));
+    i_val = (i_val % out_len);
+    // Insert code point at position i_val
+    output.push(i32(0));
+    (shift_idx : i32) = (i32(output.len()) - i32(1));
+    while (shift_idx > i_val), {
+      _ := output.set(usize(shift_idx), output.get(usize((shift_idx - i32(1)))).unwrap());
+      shift_idx = (shift_idx - i32(1));
+    };
+    _ := output.set(usize(i_val), n);
+    i_val = (i_val + i32(1));
+  };
+  // Convert code points to UTF-8
+  (result_bytes : ArrayList(u8)) = ArrayList(u8).new();
+  (ri : usize) = usize(0);
+  while (ri < output.len()), {
+    _encode_codepoint(output.get(ri).unwrap(), (&result_bytes));
+    ri = (ri + usize(1));
+  };
+  .Some(String.from_bytes(result_bytes))
+});
+// Encode a Unicode string to punycode (without the xn-- prefix).
+punycode_encode :: (fn(input: String) -> String)({
+  (cps : ArrayList(i32)) = _string_to_codepoints(input);
+  (cp_count : i32) = i32(cps.len());
+  // Separate basic and non-basic code points
+  (out : ArrayList(u8)) = ArrayList(u8).new();
+  (basic_count : i32) = i32(0);
+  (ci : i32) = i32(0);
+  while (ci < cp_count), {
+    (cp : i32) = cps.get(usize(ci)).unwrap();
+    if((cp < i32(0x80)), {
+      out.push(u8(cp));
+      basic_count = (basic_count + i32(1));
+    });
+    ci = (ci + i32(1));
+  };
+  if((basic_count > i32(0)), {
+    out.push(u8(0x2D));
+  });
+  (handled : i32) = basic_count;
+  (n : i32) = _INITIAL_N;
+  (delta : i32) = i32(0);
+  (bias : i32) = _INITIAL_BIAS;
+  while (handled < cp_count), {
+    // Find minimum code point >= n
+    (m : i32) = i32(0x7FFFFFFF);
+    (mi : i32) = i32(0);
+    while (mi < cp_count), {
+      (cp : i32) = cps.get(usize(mi)).unwrap();
+      if(((cp >= n) && (cp < m)), {
+        m = cp;
+      });
+      mi = (mi + i32(1));
+    };
+    delta = (delta + ((m - n) * (handled + i32(1))));
+    n = m;
+    (ei : i32) = i32(0);
+    while (ei < cp_count), {
+      (cp : i32) = cps.get(usize(ei)).unwrap();
+      if((cp < n), {
+        delta = (delta + i32(1));
+      });
+      if((cp == n), {
+        (q : i32) = delta;
+        (k : i32) = _BASE;
+        (encode_done : bool) = false;
+        while (!(encode_done)), {
+          (t : i32) = cond(
+            (k <= bias) => _TMIN,
+            (k >= (bias + _TMAX)) => _TMAX,
+            true => (k - bias)
+          );
+          if((q < t), {
+            out.push(_encode_digit(q));
+            encode_done = true;
+          }, {
+            out.push(_encode_digit((t + ((q - t) % (_BASE - t)))));
+            q = ((q - t) / (_BASE - t));
+            k = (k + _BASE);
+          });
+        };
+        bias = _adapt(delta, (handled + i32(1)), (handled == basic_count));
+        delta = i32(0);
+        handled = (handled + i32(1));
+      });
+      ei = (ei + i32(1));
+    };
+    delta = (delta + i32(1));
+    n = (n + i32(1));
+  };
+  String.from_bytes(out)
+});
+// Convert an IDN hostname to Unicode display form.
+// Splits on '.', decodes xn-- labels, keeps original on failure.
+to_unicode :: (fn(hostname: String) -> String)({
+  (parts : ArrayList(String)) = hostname.split(`.`);
+  (result : String) = ``;
+  (pi : usize) = usize(0);
+  while (pi < parts.len()), {
+    (part : String) = parts.get(pi).unwrap();
+    if((pi > usize(0)), {
+      result = `${result}.`;
+    });
+    (part_lower : String) = part.to_lowercase();
+    if(part_lower.starts_with(`xn--`), {
+      (encoded : String) = part.substring(usize(4), part.len());
+      match(punycode_decode(encoded),
+        .Some(decoded) => {
+          result = `${result}${decoded}`;
+        },
+        .None => {
+          // Keep the original label including xn-- prefix on decode failure
+          result = `${result}${part}`;
+        }
+      );
+    }, {
+      result = `${result}${part}`;
+    });
+    pi = (pi + usize(1));
+  };
+  result
+});
+// Convert a Unicode hostname to ASCII punycode form.
+// Non-ASCII labels get xn-- prefix.
+to_ascii :: (fn(hostname: String) -> String)({
+  (parts : ArrayList(String)) = hostname.split(`.`);
+  (result : String) = ``;
+  (pi : usize) = usize(0);
+  while (pi < parts.len()), {
+    (part : String) = parts.get(pi).unwrap();
+    if((pi > usize(0)), {
+      result = `${result}.`;
+    });
+    // Check if the label has non-ASCII characters
+    (has_non_ascii : bool) = false;
+    (bytes : ArrayList(u8)) = part.as_bytes();
+    (bi : usize) = usize(0);
+    while (bi < bytes.len()), {
+      if((i32(bytes.get(bi).unwrap()) >= i32(0x80)), {
+        has_non_ascii = true;
+      });
+      bi = (bi + usize(1));
+    };
+    if(has_non_ascii, {
+      (encoded : String) = punycode_encode(part);
+      result = `${result}xn--${encoded}`;
+    }, {
+      result = `${result}${part}`;
+    });
+    pi = (pi + usize(1));
+  };
+  result
+});
+export punycode_decode, punycode_encode, to_unicode, to_ascii;

package/std/fmt/to_string.yo CHANGED Viewed

@@ -203,24 +203,25 @@ impl(rune, ToString(
     // 0x80-0x7FF: 2 bytes
     // 0x800-0xFFFF: 3 bytes (excluding surrogates 0xD800-0xDFFF)
     // 0x10000-0x10FFFF: 4 bytes
+    // Use 5-byte buffer to always have space for null terminator
     buffer := cond(
       (code <= 0x7F) => {
         // 1-byte encoding: 0xxxxxxx
-        arr := Array(u8, usize(4)).fill(0);
+        arr := Array(u8, usize(5)).fill(0);
         arr(0) = u8(code);
         arr
       },
       (code <= 0x7FF) => {
         // 2-byte encoding: 110xxxxx 10xxxxxx
-        arr := Array(u8, usize(4)).fill(0);
+        arr := Array(u8, usize(5)).fill(0);
         arr(0) = u8(u32(0xC0) | ((code >> 6) & 0x1F));
         arr(1) = u8(u32(0x80) | (code & 0x3F));
         arr
       },
       (code <= 0xFFFF) => {
         // 3-byte encoding: 1110xxxx 10xxxxxx 10xxxxxx
-        arr := Array(u8, usize(4)).fill(0);
+        arr := Array(u8, usize(5)).fill(0);
         arr(0) = u8(u32(0xE0) | ((code >> 12) & 0x0F));
         arr(1) = u8(u32(0x80) | ((code >> 6) & 0x3F));
         arr(2) = u8(u32(0x80) | (code & 0x3F));
@@ -228,7 +229,7 @@ impl(rune, ToString(
       },
       true => {
         // 4-byte encoding: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-        arr := Array(u8, usize(4)).fill(0);
+        arr := Array(u8, usize(5)).fill(0);
         arr(0) = u8(u32(0xF0) | ((code >> 18) & 0x07));
         arr(1) = u8(u32(0x80) | ((code >> 12) & 0x3F));
         arr(2) = u8(u32(0x80) | ((code >> 6) & 0x3F));

package/std/glob/index.yo CHANGED Viewed

@@ -187,7 +187,7 @@ _glob_match_impl :: (fn(pb: ArrayList(u8), pi: usize, tb: ArrayList(u8), ti: usi
 });
 glob_match :: (fn(pattern: String, text: String) -> bool)(
-  _glob_match_impl(pattern._bytes, usize(0), text._bytes, usize(0))
+  _glob_match_impl(pattern.as_bytes(), usize(0), text.as_bytes(), usize(0))
 );
 GlobPattern :: object(
@@ -199,7 +199,7 @@ impl(GlobPattern,
     Self(_pattern: pattern)
   ),
   matches : (fn(self: Self, text: String) -> bool)(
-    _glob_match_impl(self._pattern._bytes, usize(0), text._bytes, usize(0))
+    _glob_match_impl(self._pattern.as_bytes(), usize(0), text.as_bytes(), usize(0))
   )
 );

package/std/libc/wctype.yo ADDED Viewed

@@ -0,0 +1,55 @@
+// C11 wctype.h - Wide character classification and conversion functions
+// Provides functions to test and convert wide character types
+{ wint_t } :: import "./stdint";
+c_include "<wctype.h>",
+  // Wide character classification functions
+  iswalnum :
+    fn(wc : wint_t) -> int,
+  iswalpha :
+    fn(wc : wint_t) -> int,
+  iswblank :
+    fn(wc : wint_t) -> int,
+  iswcntrl :
+    fn(wc : wint_t) -> int,
+  iswdigit :
+    fn(wc : wint_t) -> int,
+  iswgraph :
+    fn(wc : wint_t) -> int,
+  iswlower :
+    fn(wc : wint_t) -> int,
+  iswprint :
+    fn(wc : wint_t) -> int,
+  iswpunct :
+    fn(wc : wint_t) -> int,
+  iswspace :
+    fn(wc : wint_t) -> int,
+  iswupper :
+    fn(wc : wint_t) -> int,
+  iswxdigit :
+    fn(wc : wint_t) -> int,
+  // Wide character conversion functions
+  towlower :
+    fn(wc : wint_t) -> wint_t,
+  towupper :
+    fn(wc : wint_t) -> wint_t
+;
+export
+  iswalnum,
+  iswalpha,
+  iswblank,
+  iswcntrl,
+  iswdigit,
+  iswgraph,
+  iswlower,
+  iswprint,
+  iswpunct,
+  iswspace,
+  iswupper,
+  iswxdigit,
+  towlower,
+  towupper
+;

package/std/path.yo CHANGED Viewed

@@ -67,7 +67,7 @@ impl(Path,
     // Check if path is absolute
     // Unix: starts with '/'
     // Windows: starts with drive letter like 'C:' or UNC path '\\'
-    bytes := normalized._bytes;
+    bytes := normalized.as_bytes();
     cond(
       (bytes.len() > usize(0)) => {
         first_byte := bytes.get(usize(0));
@@ -128,7 +128,7 @@ impl(Path,
             true => {
               // Check if it's "." (current directory)
               is_dot := ((part.len() == usize(1)) && {
-                byte_opt := part._bytes.get(usize(0));
+                byte_opt := part.as_bytes().get(usize(0));
                 match(byte_opt,
                   .Some(b) => (b == u8(46)),
                   .None => false
@@ -137,8 +137,8 @@ impl(Path,
               // Check if it's ".." (parent directory)
               is_dotdot := ((part.len() == usize(2)) && {
-                b0_opt := part._bytes.get(usize(0));
-                b1_opt := part._bytes.get(usize(1));
+                b0_opt := part.as_bytes().get(usize(0));
+                b1_opt := part.as_bytes().get(usize(1));
                 match(b0_opt,
                   .Some(b0) => match(b1_opt,
                     .Some(b1) => ((b0 == u8(46)) && (b1 == u8(46))),
@@ -605,8 +605,8 @@ impl(Path, ToString(
             // Check if it's a drive letter like "C:"
             cond(
               (first_seg.len() == usize(2)) => {
-                b0_opt := first_seg._bytes.get(usize(0));
-                b1_opt := first_seg._bytes.get(usize(1));
+                b0_opt := first_seg.as_bytes().get(usize(0));
+                b1_opt := first_seg.as_bytes().get(usize(1));
                 match(b0_opt,
                   .Some(b0) => match(b1_opt,
                     .Some(b1) => {

package/std/prelude.yo CHANGED Viewed

@@ -96,6 +96,8 @@ extern "Yo",
     fn(forall(T: Type), slice: Slice(T)) -> usize,
   __yo_slice_new :
     fn(forall(T: Type), ptr: *(T), length: usize) -> Slice(T),
+  __yo_slice_ptr :
+    fn(forall(T: Type), slice: Slice(T)) -> *(T),
   // C macro related
   __yo_c_macro_defined : (fn(comptime(name) : comptime_string) -> comptime(bool)),
@@ -3196,6 +3198,9 @@ impl(forall(T : Type), Slice(T),
   ),
   len : (fn(self : Self) -> usize)(
     __yo_slice_len(self)
+  ),
+  ptr : (fn(self : Self) -> *(T))(
+    __yo_slice_ptr(self)
   )
 );
@@ -3209,6 +3214,9 @@ impl(str,
   ),
   len : (fn(self : Self) -> usize)(
     __yo_slice_len(self.bytes)
+  ),
+  ptr : (fn(self : Self) -> *(u8))(
+    __yo_slice_ptr(self.bytes)
   )
 );

package/std/regex/parser.yo CHANGED Viewed

@@ -189,6 +189,27 @@ impl(RegexParser,
     r
   }),
+  // Parse \xHH hex escape — reads exactly 2 hex digits and returns the codepoint.
+  _parse_hex_byte : (fn(self : Self) -> Option(u32))({
+    if(((self._pos + usize(2)) > self._bytes.len()), { return .None; });
+    (h1 : u8) = self._bytes.get(self._pos).unwrap();
+    (h2 : u8) = self._bytes.get((self._pos + usize(1))).unwrap();
+    (v1 : i32) = cond(
+      ((h1 >= u8(48)) && (h1 <= u8(57))) => (i32(h1) - i32(48)),
+      ((h1 >= u8(65)) && (h1 <= u8(70))) => ((i32(h1) - i32(65)) + i32(10)),
+      ((h1 >= u8(97)) && (h1 <= u8(102))) => ((i32(h1) - i32(97)) + i32(10)),
+      true => { return .None; }
+    );
+    (v2 : i32) = cond(
+      ((h2 >= u8(48)) && (h2 <= u8(57))) => (i32(h2) - i32(48)),
+      ((h2 >= u8(65)) && (h2 <= u8(70))) => ((i32(h2) - i32(65)) + i32(10)),
+      ((h2 >= u8(97)) && (h2 <= u8(102))) => ((i32(h2) - i32(97)) + i32(10)),
+      true => { return .None; }
+    );
+    self._pos = (self._pos + usize(2));
+    .Some(u32(((v1 << i32(4)) | v2)))
+  }),
   _parse_class_escape : (fn(self : Self) -> Result(ArrayList(CharRange), String))({
     b := self._advance();
     match(b,
@@ -219,6 +240,14 @@ impl(RegexParser,
             r.push(CharRange(low: u32(33), high: u32(0x10FFFF)));
             .Ok(r)
           },
+          (ch == u8(120)) => {
+            r := ArrayList(CharRange).new();
+            match(self._parse_hex_byte(),
+              .Some(v) => r.push(CharRange(low: v, high: v)),
+              .None => r.push(CharRange(low: u32(ch), high: u32(ch)))
+            );
+            .Ok(r)
+          },
           true => {
             r := ArrayList(CharRange).new();
             codepoint := self._escape_char_codepoint(ch);
@@ -246,6 +275,31 @@ impl(RegexParser,
               (end_first == u8(93)) => {
                 ranges.push(CharRange(low: low, high: low));
               },
+              (end_first == u8(92)) => {
+                // High end is an escape sequence (e.g. \x20, \0, \n)
+                self._pos = (self._pos + usize(1));
+                self._pos = (self._pos + usize(1));
+                esc := self._parse_class_escape();
+                match(esc,
+                  .Ok(esc_ranges) => {
+                    if(((esc_ranges.len() == usize(1)) && (esc_ranges.get(usize(0)).unwrap().low == esc_ranges.get(usize(0)).unwrap().high)), {
+                      (high : u32) = esc_ranges.get(usize(0)).unwrap().low;
+                      ranges.push(CharRange(low: low, high: high));
+                    }, {
+                      // Multi-range escape like \d can't be range endpoint; treat dash as literal
+                      ranges.push(CharRange(low: low, high: low));
+                      ranges.push(CharRange(low: u32(45), high: u32(45)));
+                      j := usize(0);
+                      while (j < esc_ranges.len()), (j = (j + usize(1))), {
+                        ranges.push(esc_ranges.get(j).unwrap());
+                      };
+                    });
+                  },
+                  .Err(_e) => {
+                    ranges.push(CharRange(low: low, high: low));
+                  }
+                );
+              },
               true => {
                 // Consume dash
                 self._pos = (self._pos + usize(1));
@@ -284,10 +338,16 @@ impl(RegexParser,
         esc := self._parse_class_escape();
         match(esc,
           .Ok(esc_ranges) => {
-            j := usize(0);
-            while (j < esc_ranges.len()), (j = (j + usize(1))), {
-              ranges.push(esc_ranges.get(j).unwrap());
-            };
+            // If escape produced a single codepoint, check for range (e.g. \0-\x20)
+            if(((esc_ranges.len() == usize(1)) && (esc_ranges.get(usize(0)).unwrap().low == esc_ranges.get(usize(0)).unwrap().high)), {
+              (low : u32) = esc_ranges.get(usize(0)).unwrap().low;
+              self._try_parse_char_range(ranges, low);
+            }, {
+              j := usize(0);
+              while (j < esc_ranges.len()), (j = (j + usize(1))), {
+                ranges.push(esc_ranges.get(j).unwrap());
+              };
+            });
           },
           .Err(e) => { return .Err(e); }
         );
@@ -452,6 +512,11 @@ impl(RegexParser,
           (ch == u8(112)) => self._parse_unicode_property(false),
           // Negated unicode property \P{Name}
           (ch == u8(80)) => self._parse_unicode_property(true),
+          // Hex escape \xHH
+          (ch == u8(120)) => match(self._parse_hex_byte(),
+            .Some(v) => .Ok(RegexNode.literal(v)),
+            .None => .Ok(RegexNode.literal(u32(ch)))
+          ),
           true => .Ok(RegexNode.literal(self._escape_char_codepoint(ch)))
         ),
       .None => .Err(`Unexpected end of pattern after backslash`)