npm - @shd101wyy/yo - Versions diffs - 0.0.28 → 0.0.29 - Mend

@shd101wyy/yo 0.0.28 → 0.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +1 -1
package/out/cjs/index.cjs +57 -57
package/out/cjs/yo-cli.cjs +72 -72
package/out/esm/index.mjs +62 -62
package/out/types/tsconfig.tsbuildinfo +1 -1
package/package.json +1 -1
package/std/regex/compiler.yo +355 -0
package/std/regex/flags.yo +104 -0
package/std/regex/match.yo +83 -0
package/std/regex/node.yo +283 -0
package/std/regex/parser.yo +847 -0
package/std/regex/regex.yo +714 -0
package/std/regex/unicode.yo +365 -0
package/std/regex/vm.yo +737 -0
package/std/time/sleep.yo +18 -0
package/std/time.yo +0 -13

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@shd101wyy/yo",
   "displayName": "Yo",
-  "version": "0.0.28",
+  "version": "0.0.29",
   "main": "./out/cjs/index.cjs",
   "module": "./out/esm/index.mjs",
   "types": "./out/types/src/index.d.ts",

package/std/regex/compiler.yo ADDED Viewed

@@ -0,0 +1,355 @@
+// std/regex/compiler.yo - NFA compiler
+//
+// Compiles a RegexNode AST into a flat list of NFA instructions
+// using Thompson's construction algorithm.
+open import "std/collections/array_list";
+open import "std/string";
+{ RegexNode, NodeKind, CharRange, AnchorKind, GroupNameEntry } :: import "./node.yo";
+// NFA instruction types
+InstrKind :: enum(
+  Char,
+  CharClass,
+  AnyChar,
+  Split,
+  Jump,
+  Save,
+  Match,
+  AssertStart,
+  AssertEnd,
+  AssertWordBoundary,
+  AssertNonWordBoundary,
+  Backref,
+  Lookahead,
+  Lookbehind
+);
+// A single NFA instruction
+Instr :: struct(
+  kind      : InstrKind,
+  codepoint : u32,
+  class_idx : usize,
+  target_a  : usize,
+  target_b  : usize,
+  target    : usize,
+  slot      : usize
+);
+impl(Instr,
+  char_instr : (fn(cp : u32) -> Self)(
+    Self(kind: .Char, codepoint: cp, class_idx: usize(0),
+         target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
+  ),
+  any_char_instr : (fn() -> Self)(
+    Self(kind: .AnyChar, codepoint: u32(0), class_idx: usize(0),
+         target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
+  ),
+  char_class_instr : (fn(idx : usize) -> Self)(
+    Self(kind: .CharClass, codepoint: u32(0), class_idx: idx,
+         target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
+  ),
+  split_instr : (fn(a : usize, b : usize) -> Self)(
+    Self(kind: .Split, codepoint: u32(0), class_idx: usize(0),
+         target_a: a, target_b: b, target: usize(0), slot: usize(0))
+  ),
+  jump_instr : (fn(t : usize) -> Self)(
+    Self(kind: .Jump, codepoint: u32(0), class_idx: usize(0),
+         target_a: usize(0), target_b: usize(0), target: t, slot: usize(0))
+  ),
+  save_instr : (fn(s : usize) -> Self)(
+    Self(kind: .Save, codepoint: u32(0), class_idx: usize(0),
+         target_a: usize(0), target_b: usize(0), target: usize(0), slot: s)
+  ),
+  match_instr : (fn() -> Self)(
+    Self(kind: .Match, codepoint: u32(0), class_idx: usize(0),
+         target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
+  ),
+  assert_start_instr : (fn() -> Self)(
+    Self(kind: .AssertStart, codepoint: u32(0), class_idx: usize(0),
+         target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
+  ),
+  assert_end_instr : (fn() -> Self)(
+    Self(kind: .AssertEnd, codepoint: u32(0), class_idx: usize(0),
+         target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
+  ),
+  assert_word_boundary_instr : (fn() -> Self)(
+    Self(kind: .AssertWordBoundary, codepoint: u32(0), class_idx: usize(0),
+         target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
+  ),
+  assert_non_word_boundary_instr : (fn() -> Self)(
+    Self(kind: .AssertNonWordBoundary, codepoint: u32(0), class_idx: usize(0),
+         target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
+  ),
+  backref_instr : (fn(group_idx : usize) -> Self)(
+    Self(kind: .Backref, codepoint: u32(0), class_idx: usize(0),
+         target_a: usize(0), target_b: usize(0), target: usize(0), slot: group_idx)
+  ),
+  // Lookahead: target_a=sub_start, target_b=sub_end, slot=1 for positive/0 for negative
+  lookahead_instr : (fn(sub_start : usize, sub_end : usize, positive : bool) -> Self)(
+    Self(kind: .Lookahead, codepoint: u32(0), class_idx: usize(0),
+         target_a: sub_start, target_b: sub_end, target: usize(0),
+         slot: cond(positive => usize(1), true => usize(0)))
+  ),
+  // Lookbehind: target_a=sub_start, target_b=sub_end, slot=1 for positive/0 for negative
+  lookbehind_instr : (fn(sub_start : usize, sub_end : usize, positive : bool) -> Self)(
+    Self(kind: .Lookbehind, codepoint: u32(0), class_idx: usize(0),
+         target_a: sub_start, target_b: sub_end, target: usize(0),
+         slot: cond(positive => usize(1), true => usize(0)))
+  )
+);
+// Character class table entry
+ClassEntry :: struct(
+  ranges  : ArrayList(CharRange),
+  negated : bool
+);
+// The compiled NFA program
+NfaProgram :: object(
+  instructions   : ArrayList(Instr),
+  classes        : ArrayList(ClassEntry),
+  n_groups       : usize,
+  group_names    : ArrayList(GroupNameEntry),
+  literal_prefix : ArrayList(u8)
+);
+// The NFA compiler
+NfaCompiler :: object(
+  _program : NfaProgram
+);
+// Utilities defined first (bottom-up ordering required)
+impl(NfaCompiler,
+  new : (fn() -> Self)(
+    Self(
+      _program: NfaProgram(
+        instructions: ArrayList(Instr).new(),
+        classes: ArrayList(ClassEntry).new(),
+        n_groups: usize(0),
+        group_names: ArrayList(GroupNameEntry).new(),
+        literal_prefix: ArrayList(u8).new()
+      )
+    )
+  ),
+  _emit : (fn(self : Self, instr : Instr) -> usize)({
+    idx := self._program.instructions.len();
+    self._program.instructions.push(instr);
+    idx
+  }),
+  _current_pc : (fn(self : Self) -> usize)(
+    self._program.instructions.len()
+  ),
+  _add_class : (fn(self : Self, ranges : ArrayList(CharRange), negated : bool) -> usize)({
+    idx := self._program.classes.len();
+    self._program.classes.push(ClassEntry(ranges: ranges, negated: negated));
+    idx
+  })
+);
+// Compile methods: _compile_node is self-recursive, quantifier logic inlined
+impl(NfaCompiler,
+  _compile_node : (fn(self : Self, node : RegexNode) -> unit)({
+    match(node.kind,
+      .Literal => {
+        self._emit(Instr.char_instr(node.codepoint));
+      },
+      .Dot => {
+        self._emit(Instr.any_char_instr());
+      },
+      .CharClass => {
+        idx := self._add_class(node.ranges, node.negated);
+        self._emit(Instr.char_class_instr(idx));
+      },
+      .Anchor =>
+        match(node.anchor,
+          .Start => { self._emit(Instr.assert_start_instr()); },
+          .End => { self._emit(Instr.assert_end_instr()); },
+          .WordBoundary => { self._emit(Instr.assert_word_boundary_instr()); },
+          .NonWordBoundary => { self._emit(Instr.assert_non_word_boundary_instr()); }
+        ),
+      .Sequence => {
+        i := usize(0);
+        while (i < node.children.len()), (i = (i + usize(1))), {
+          child := node.children.get(i).unwrap();
+          recur(self, child);
+        };
+      },
+      .Alternation => {
+        left := node.children.get(usize(0)).unwrap();
+        right := node.children.get(usize(1)).unwrap();
+        split_pc := self._emit(Instr.split_instr(usize(0), usize(0)));
+        left_start := self._current_pc();
+        self._program.instructions.set(split_pc, Instr.split_instr(left_start, usize(0)));
+        recur(self, left);
+        jump_pc := self._emit(Instr.jump_instr(usize(0)));
+        right_start := self._current_pc();
+        self._program.instructions.set(split_pc, Instr.split_instr(left_start, right_start));
+        recur(self, right);
+        end_pc := self._current_pc();
+        self._program.instructions.set(jump_pc, Instr.jump_instr(end_pc));
+      },
+      .Quantifier => {
+        child := node.children.get(usize(0)).unwrap();
+        min_val := node.q_min;
+        max_val := node.q_max;
+        greedy := node.q_greedy;
+        // Emit min required copies, tracking start of last copy for loop-back
+        (last_body_start : usize) = self._current_pc();
+        qi := usize(0);
+        while (qi < min_val), (qi = (qi + usize(1))), {
+          last_body_start = self._current_pc();
+          recur(self, child);
+        };
+        cond(
+          ((max_val == usize(0)) && (min_val == usize(0))) => {
+            // * — zero or more (max=0 means unbounded when min=0)
+            l1 := self._current_pc();
+            split_pc := self._emit(Instr.split_instr(usize(0), usize(0)));
+            l2 := self._current_pc();
+            recur(self, child);
+            self._emit(Instr.jump_instr(l1));
+            l3 := self._current_pc();
+            cond(
+              greedy => {
+                self._program.instructions.set(split_pc, Instr.split_instr(l2, l3));
+              },
+              true => {
+                self._program.instructions.set(split_pc, Instr.split_instr(l3, l2));
+              }
+            );
+          },
+          ((max_val == usize(0)) && (min_val > usize(0))) => {
+            // + beyond min copies — loop back to last min copy (no extra body)
+            split_pc := self._emit(Instr.split_instr(usize(0), usize(0)));
+            l2 := self._current_pc();
+            cond(
+              greedy => {
+                self._program.instructions.set(split_pc, Instr.split_instr(last_body_start, l2));
+              },
+              true => {
+                self._program.instructions.set(split_pc, Instr.split_instr(l2, last_body_start));
+              }
+            );
+          },
+          true => {
+            // {min, max} — emit (max - min) optional copies
+            remaining := (max_val - min_val);
+            qj := usize(0);
+            while (qj < remaining), (qj = (qj + usize(1))), {
+              split_pc := self._emit(Instr.split_instr(usize(0), usize(0)));
+              body_start := self._current_pc();
+              recur(self, child);
+              after := self._current_pc();
+              cond(
+                greedy => {
+                  self._program.instructions.set(split_pc, Instr.split_instr(body_start, after));
+                },
+                true => {
+                  self._program.instructions.set(split_pc, Instr.split_instr(after, body_start));
+                }
+              );
+            };
+          }
+        );
+      },
+      .Group => {
+        child := node.children.get(usize(0)).unwrap();
+        start_slot := (node.group_index * usize(2));
+        end_slot := ((node.group_index * usize(2)) + usize(1));
+        self._emit(Instr.save_instr(start_slot));
+        recur(self, child);
+        self._emit(Instr.save_instr(end_slot));
+      },
+      .NonCapturingGroup => {
+        child := node.children.get(usize(0)).unwrap();
+        recur(self, child);
+      },
+      .Backreference => {
+        self._emit(Instr.backref_instr(node.group_index));
+      },
+      .Lookahead => {
+        child := node.children.get(usize(0)).unwrap();
+        positive := (!(node.negated));
+        // Emit lookahead instruction with placeholder sub_end
+        la_pc := self._emit(Instr.lookahead_instr(usize(0), usize(0), positive));
+        sub_start := self._current_pc();
+        recur(self, child);
+        self._emit(Instr.match_instr());
+        sub_end := self._current_pc();
+        self._program.instructions.set(la_pc, Instr.lookahead_instr(sub_start, sub_end, positive));
+      },
+      .Lookbehind => {
+        child := node.children.get(usize(0)).unwrap();
+        positive := (!(node.negated));
+        lb_pc := self._emit(Instr.lookbehind_instr(usize(0), usize(0), positive));
+        sub_start := self._current_pc();
+        recur(self, child);
+        self._emit(Instr.match_instr());
+        sub_end := self._current_pc();
+        self._program.instructions.set(lb_pc, Instr.lookbehind_instr(sub_start, sub_end, positive));
+      }
+    );
+  })
+);
+// Literal prefix extraction (must be before compile)
+impl(NfaCompiler,
+  // Extract literal bytes from the start of the pattern for fast scanning.
+  // Walks past Save/AssertStart instructions, then collects Char instructions.
+  _extract_literal_prefix : (fn(self : Self) -> unit)({
+    prefix := ArrayList(u8).new();
+    pc := usize(0);
+    instrs := self._program.instructions;
+    (done : bool) = false;
+    while ((pc < instrs.len()) && (!(done))), {
+      instr := instrs.get(pc).unwrap();
+      match(instr.kind,
+        .Save => { pc = (pc + usize(1)); },
+        .AssertStart => { pc = (pc + usize(1)); },
+        .Char => {
+          cp := instr.codepoint;
+          cond(
+            (cp < u32(0x80)) => {
+              prefix.push(u8(cp));
+              pc = (pc + usize(1));
+            },
+            true => { done = true; }
+          );
+        },
+        _ => { done = true; }
+      );
+    };
+    self._program.literal_prefix = prefix;
+  })
+);
+// Top-level compile method
+impl(NfaCompiler,
+  compile : (fn(self : Self, root : RegexNode, n_groups : usize, group_names : ArrayList(GroupNameEntry)) -> NfaProgram)({
+    self._program.n_groups = n_groups;
+    self._program.group_names = group_names;
+    self._emit(Instr.save_instr(usize(0)));
+    self._compile_node(root);
+    self._emit(Instr.save_instr(usize(1)));
+    self._emit(Instr.match_instr());
+    self._extract_literal_prefix();
+    self._program
+  })
+);
+export
+  NfaCompiler,
+  NfaProgram,
+  Instr,
+  InstrKind,
+  ClassEntry,
+  GroupNameEntry
+;

package/std/regex/flags.yo ADDED Viewed

@@ -0,0 +1,104 @@
+// std/regex/flags.yo - RegexFlags parsing and representation
+//
+// Regex flags follow JavaScript syntax: "gi", "ms", "iu", etc.
+//
+// Supported flags:
+//   g - global: match all occurrences
+//   i - ignoreCase: case-insensitive matching
+//   m - multiline: ^ and $ match line boundaries
+//   s - dotAll: . matches newline characters
+//   u - unicode: full Unicode matching
+//   y - sticky: match from lastIndex only
+open import "std/string";
+open import "std/collections/array_list";
+RegexFlags :: struct(
+  global      : bool,
+  ignore_case : bool,
+  multiline   : bool,
+  dot_all     : bool,
+  unicode     : bool,
+  sticky      : bool
+);
+impl(RegexFlags,
+  // Create default flags (all false)
+  default : (fn() -> Self)(
+    Self(
+      global: false,
+      ignore_case: false,
+      multiline: false,
+      dot_all: false,
+      unicode: false,
+      sticky: false
+    )
+  ),
+  // Parse flags from a string like `gi`, `ms`, etc.
+  parse : (fn(flags_str: String) -> Result(Self, String))({
+    result := Self.default();
+    bytes := flags_str.as_bytes();
+    i := usize(0);
+    while (i < bytes.len()), (i = (i + usize(1))), {
+      byte_opt := bytes.get(i);
+      match(byte_opt,
+        .Some(b) => {
+          cond(
+            (b == u8(103)) => {
+              // 'g'
+              cond(
+                result.global => { return .Err(`Duplicate flag: g`); },
+                true => { result.global = true; }
+              );
+            },
+            (b == u8(105)) => {
+              // 'i'
+              cond(
+                result.ignore_case => { return .Err(`Duplicate flag: i`); },
+                true => { result.ignore_case = true; }
+              );
+            },
+            (b == u8(109)) => {
+              // 'm'
+              cond(
+                result.multiline => { return .Err(`Duplicate flag: m`); },
+                true => { result.multiline = true; }
+              );
+            },
+            (b == u8(115)) => {
+              // 's'
+              cond(
+                result.dot_all => { return .Err(`Duplicate flag: s`); },
+                true => { result.dot_all = true; }
+              );
+            },
+            (b == u8(117)) => {
+              // 'u'
+              cond(
+                result.unicode => { return .Err(`Duplicate flag: u`); },
+                true => { result.unicode = true; }
+              );
+            },
+            (b == u8(121)) => {
+              // 'y'
+              cond(
+                result.sticky => { return .Err(`Duplicate flag: y`); },
+                true => { result.sticky = true; }
+              );
+            },
+            true => {
+              return .Err(`Invalid flag character`);
+            }
+          );
+        },
+        .None => ()
+      );
+    };
+    .Ok(result)
+  })
+);
+export
+  RegexFlags
+;

package/std/regex/match.yo ADDED Viewed

@@ -0,0 +1,83 @@
+// std/regex/match.yo - Match result type
+//
+// Represents the result of a regex match, including the matched text,
+// position, and captured groups.
+open import "std/collections/array_list";
+open import "std/string";
+{ GroupNameEntry } :: import "./node.yo";
+// A single regex match result
+RegexMatch :: object(
+  _value       : String,
+  _index       : usize,
+  _input       : String,
+  _groups      : ArrayList(Option(String)),
+  _group_names : ArrayList(GroupNameEntry)
+);
+impl(RegexMatch,
+  // Create a match result
+  new : (fn(value : String, index : usize, input : String, groups : ArrayList(Option(String)), group_names : ArrayList(GroupNameEntry)) -> Self)(
+    Self(
+      _value: value,
+      _index: index,
+      _input: input,
+      _groups: groups,
+      _group_names: group_names
+    )
+  ),
+  // Get the full matched text
+  value : (fn(self : Self) -> String)(
+    self._value
+  ),
+  // Get the start position (character index) of the match
+  index : (fn(self : Self) -> usize)(
+    self._index
+  ),
+  // Get the original input string
+  input : (fn(self : Self) -> String)(
+    self._input
+  ),
+  // Get capture group by index (1-based, group 0 is the full match)
+  group : (fn(self : Self, idx : usize) -> Option(String))(
+    cond(
+      (idx == usize(0)) => .Some(self._value),
+      true => {
+        actual_idx := (idx - usize(1));
+        cond(
+          (actual_idx < self._groups.len()) => self._groups.get(actual_idx).unwrap(),
+          true => .None
+        )
+      }
+    )
+  ),
+  // Get capture group by name
+  named_group : (fn(self : Self, name : String) -> Option(String))({
+    i := usize(0);
+    while (i < self._group_names.len()), (i = (i + usize(1))), {
+      entry := self._group_names.get(i).unwrap();
+      cond(
+        (entry.name == name) => {
+          return self.group(entry.index);
+        },
+        true => ()
+      );
+    };
+    .None
+  }),
+  // Get number of capture groups (not counting group 0)
+  group_count : (fn(self : Self) -> usize)(
+    self._groups.len()
+  )
+);
+export
+  RegexMatch
+;