npm - @shd101wyy/yo - Versions diffs - 0.1.5 → 0.1.6 - Mend

@shd101wyy/yo 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.md +7 -6
package/out/cjs/index.cjs +508 -503
package/out/cjs/yo-cli.cjs +619 -612
package/out/esm/index.mjs +397 -392
package/out/types/src/codegen/codegen-c.d.ts +2 -0
package/out/types/src/codegen/functions/context.d.ts +1 -0
package/out/types/src/codegen/functions/generation.d.ts +10 -0
package/out/types/src/codegen/utils/index.d.ts +1 -0
package/out/types/src/env.d.ts +1 -0
package/out/types/src/evaluator/builtins/build.d.ts +1 -0
package/out/types/src/evaluator/context.d.ts +1 -0
package/out/types/src/expr.d.ts +2 -0
package/out/types/src/target.d.ts +1 -0
package/out/types/src/value.d.ts +2 -1
package/out/types/tsconfig.tsbuildinfo +1 -1
package/package.json +1 -1
package/std/build.yo +2 -1
package/std/collections/array_list.yo +133 -1
package/std/encoding/html.yo +283 -0
package/std/encoding/html_char_utils.yo +36 -0
package/std/encoding/html_entities.yo +2262 -0
package/std/encoding/punycode.yo +366 -0
package/std/fmt/to_string.yo +5 -4
package/std/glob/index.yo +2 -2
package/std/libc/wctype.yo +55 -0
package/std/path.yo +6 -6
package/std/prelude.yo +8 -0
package/std/regex/parser.yo +69 -4
package/std/regex/vm.yo +18 -31
package/std/string/string.yo +1388 -1337
package/std/string/unicode.yo +242 -0

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@shd101wyy/yo",
   "displayName": "Yo",
-  "version": "0.1.5",
+  "version": "0.1.6",
   "main": "./out/cjs/index.cjs",
   "module": "./out/esm/index.mjs",
   "types": "./out/types/src/index.d.ts",

package/std/build.yo CHANGED Viewed

@@ -57,6 +57,7 @@ CompilationTarget :: {
   X86_64_Linux_Gnu: "x86_64-linux-gnu",
   X86_64_Linux_Musl: "x86_64-linux-musl",
   Aarch64_Linux_Gnu: "aarch64-linux-gnu",
+  Aarch64_Linux_Musl: "aarch64-linux-musl",
   Aarch64_Macos: "aarch64-macos",
   X86_64_Macos: "x86_64-macos",
   X86_64_Windows_Msvc: "x86_64-windows-msvc",
@@ -80,7 +81,7 @@ Executable :: struct(
   root : comptime_string,
   (target : comptime_string) ?= __yo_build_target_host(),
   (optimize : Optimize) ?= Optimize.Debug,
-  (allocator : Allocator) ?= Allocator.Mimalloc,
+  (allocator : Allocator) ?= Allocator.Libc,
   (sanitize : Sanitize) ?= Sanitize.None
 );
 export Executable;

package/std/collections/array_list.yo CHANGED Viewed

@@ -8,7 +8,7 @@
  * - RAII for automatic cleanup
  */
 { GlobalAllocator, AllocError } :: import "../allocator.yo";
-{ memmove } :: import "../libc/string.yo";
+{ memmove, memcpy, memset } :: import "../libc/string.yo";
 { malloc, calloc, realloc, free, aligned_alloc } :: GlobalAllocator;
 /**
@@ -415,12 +415,144 @@ impl(forall(T : Type), ArrayList(T),
     )
   ),
+  /**
+    * Ensure the ArrayList can hold at least `min_cap` total elements
+    * without further reallocation.
+    */
+  ensure_total_capacity : (fn(self: Self, min_cap: usize) -> unit)({
+    cond(
+      (min_cap <= self._capacity) => (),
+      true => {
+        new_capacity := cond(
+          (self._capacity == usize(0)) => min_cap,
+          true => {
+            cap := self._capacity;
+            while (cap < min_cap), {
+              cap = (cap * usize(2));
+            };
+            cap
+          }
+        );
+        new_some_ptr := match(self._ptr,
+          .None => GlobalAllocator.malloc((sizeof(T) * new_capacity)),
+          .Some(old_ptr) => GlobalAllocator.realloc(
+            .Some((*(void))(old_ptr)),
+            (sizeof(T) * new_capacity)
+          )
+        );
+        match(new_some_ptr,
+          .Some(new_ptr) => {
+            self._ptr = .Some((*(T))(new_ptr));
+            self._capacity = new_capacity;
+          },
+          .None => panic("ArrayList.ensure_total_capacity: allocation failed")
+        );
+      }
+    );
+  }),
+  /**
+    * Append `count` elements from a raw pointer using memcpy.
+    * The caller must ensure `src` points to at least `count` valid elements.
+    */
+  extend_from_ptr : (fn(self: Self, src: *(T), count: usize) -> unit)({
+    cond(
+      (count == usize(0)) => (),
+      true => {
+        self.ensure_total_capacity((self._length + count));
+        match(self._ptr,
+          .Some(dst_base) => {
+            dst := (*(void))((dst_base &+ self._length));
+            _ := memcpy(dst, (*(void))(src), (count * sizeof(T)));
+            self._length = (self._length + count);
+          },
+          .None => panic("ArrayList.extend_from_ptr: no ptr after ensure_total_capacity")
+        );
+      }
+    );
+  }),
   /**
     * Clear all elements but keep capacity
     */
   clear : (fn(self: Self) -> unit)({
     Self._free_elements(self);
     self._length = usize(0);
+  }),
+  /**
+    * Get element at index without bounds checking.
+    * Caller must ensure index < len.
+    */
+  get_unchecked : (fn(self: Self, index: usize) -> T)(
+    match(self._ptr,
+      .None => panic("ArrayList.get_unchecked: no ptr"),
+      .Some(_ptr) => (_ptr &+ index).*
+    )
+  ),
+  /**
+    * Get a pointer to element at index without bounds checking or copying.
+    * Caller must ensure index < len. The pointer is valid until the list is modified.
+    */
+  get_ptr : (fn(self: Self, index: usize) -> *(T))(
+    match(self._ptr,
+      .None => panic("ArrayList.get_ptr: no ptr"),
+      .Some(_ptr) => (_ptr &+ index)
+    )
+  ),
+  /**
+    * Set element at index without bounds checking.
+    * Caller must ensure index < len.
+    */
+  set_unchecked : (fn(self: Self, index: usize, value: T) -> unit)(
+    match(self._ptr,
+      .None => panic("ArrayList.set_unchecked: no ptr"),
+      .Some(_ptr) => {
+        target_ptr := (_ptr &+ index);
+        target_ptr.* = value;
+      }
+    )
+  ),
+  /**
+    * Fill all elements with a byte pattern using memset.
+    * Useful for zeroing bool/integer arrays in O(1).
+    * Only safe for types without RC (e.g., bool, u8, usize).
+    */
+  fill_with_byte : (fn(self: Self, byte_val: int) -> unit)(
+    match(self._ptr,
+      .None => (),
+      .Some(_ptr) => {
+        _ := memset((*(void))(_ptr), byte_val, (self._length * sizeof(T)));
+      }
+    )
+  ),
+  /**
+    * Resize ArrayList to exactly `new_len` elements, filling new slots with
+    * a byte pattern via memset. Does not call destructors on removed elements.
+    * Only safe for trivial types (bool, u8, usize, etc.).
+    */
+  resize_with_byte : (fn(self: Self, new_len: usize, byte_val: int) -> unit)({
+    cond(
+      (new_len <= self._length) => {
+        self._length = new_len;
+      },
+      true => {
+        self.ensure_total_capacity(new_len);
+        match(self._ptr,
+          .Some(_ptr) => {
+            start := (*(void))((_ptr &+ self._length));
+            fill_count := ((new_len - self._length) * sizeof(T));
+            _ := memset(start, byte_val, fill_count);
+            self._length = new_len;
+          },
+          .None => panic("ArrayList.resize_with_byte: no ptr after ensure")
+        );
+      }
+    );
   })
 );
 impl(forall(T : Type), ArrayList(T), Dispose(

package/std/encoding/html.yo ADDED Viewed

@@ -0,0 +1,283 @@
+// HTML entity decoding
+//
+// Decodes named (&amp;), decimal (&#38;), and hex (&#x26;) HTML character references.
+// Uses Legacy mode — entities without trailing semicolon are also decoded.
+//
+// Example:
+//   { decode_html } :: import "std/encoding/html";
+//
+//   result := decode_html(`&amp; &lt; &#38; &#x26;`);
+//   assert((result == `& < & &`), "decoded entities");
+open import "../string";
+{ HashMap } :: import "../collections/hash_map";
+{ HashSet } :: import "../collections/hash_set";
+{ is_valid_entity_code, from_code_point } :: import "./html_char_utils";
+{ _build_entity_map, _build_legacy_set } :: import "./html_entities";
+// Module-level state: lazily initialized entity map and legacy set.
+_state_initialized := false;
+_entity_map := HashMap(String, String).new();
+_legacy_set := HashSet(String).new();
+_ensure_init :: (fn() -> unit)({
+  if(!((_state_initialized)), {
+    _entity_map = _build_entity_map();
+    _legacy_set = _build_legacy_set();
+    _state_initialized = true;
+  });
+});
+// Parse a hex string to i32
+_parse_hex :: (fn(s: String) -> i32)({
+  (result : i32) = i32(0);
+  (i : usize) = usize(0);
+  while ((i < s.len())), {
+    c := s.at(i).unwrap();
+    result = (result * i32(16));
+    if(((c >= rune(u32('0'))) && (c <= rune(u32('9')))), {
+      result = (result + (i32(c.to_u32()) - i32(48)));
+    }, if(((c >= rune(u32('a'))) && (c <= rune(u32('f')))), {
+      result = (result + ((i32(c.to_u32()) - i32(97)) + i32(10)));
+    }, if(((c >= rune(u32('A'))) && (c <= rune(u32('F')))), {
+      result = (result + ((i32(c.to_u32()) - i32(65)) + i32(10)));
+    })));
+    i = (i + usize(1));
+  };
+  result
+});
+// Parse a decimal string to i32
+_parse_dec :: (fn(s: String) -> i32)({
+  (result : i32) = i32(0);
+  (i : usize) = usize(0);
+  while ((i < s.len())), {
+    c := s.at(i).unwrap();
+    result = ((result * i32(10)) + (i32(c.to_u32()) - i32(48)));
+    i = (i + usize(1));
+  };
+  result
+});
+// Check if a character is an ASCII alphanumeric
+_is_alpha_numeric :: (fn(c: rune) -> bool)(
+  ((((c >= rune(u32('a'))) && (c <= rune(u32('z')))) || ((c >= rune(u32('A'))) && (c <= rune(u32('Z'))))) || ((c >= rune(u32('0'))) && (c <= rune(u32('9')))))
+);
+// Decode HTML entities in a string (Legacy mode — entities without ; are also decoded).
+decode_html :: (fn(input: String) -> String)({
+  _ensure_init();
+  (len : usize) = input.len();
+  if(((len == usize(0))), {
+    return input;
+  });
+  // Quick check: if no '&', return as-is
+  if(!(input.contains(`&`)), {
+    return input;
+  });
+  (result : String) = ``;
+  (i : usize) = usize(0);
+  while ((i < len)), {
+    c := input.at(i).unwrap();
+    if((c != rune(u32('&'))), {
+      // Not an entity start, just append the character
+      result = `${result}${from_code_point(i32(c.to_u32()))}`;
+      i = (i + usize(1));
+    }, {
+      // Found '&' — try to decode entity
+      (start : usize) = i;
+      i = (i + usize(1));
+      if(((i >= len)), {
+        result = `${result}&`;
+      }, {
+        next := input.at(i).unwrap();
+        if(((next == rune(u32('#')))), {
+          // Numeric entity: &#N; or &#xN;
+          i = (i + usize(1));
+          if(((i >= len)), {
+            result = `${result}&#`;
+          }, {
+            hex_char := input.at(i).unwrap();
+            if(((hex_char == rune(u32('x'))) || (hex_char == rune(u32('X')))), {
+              // Hex: &#xHH;
+              (digit_start : usize) = (i + usize(1));
+              (digit_end : usize) = digit_start;
+              while (((digit_end < len))), {
+                dc := input.at(digit_end).unwrap();
+                if(((((dc >= rune(u32('0'))) && (dc <= rune(u32('9')))) || (((dc >= rune(u32('a'))) && (dc <= rune(u32('f')))) || ((dc >= rune(u32('A'))) && (dc <= rune(u32('F'))))))), {
+                  digit_end = (digit_end + usize(1));
+                }, {
+                  // Done with hex digits, break out
+                  digit_end = (digit_end + usize(0));
+                  // Use a flag to break
+                  return_early := true;
+                  // TODO: proper break
+                  digit_end = (len + usize(1));
+                });
+              };
+              // Fix digit_end if it overflowed
+              if(((digit_end > len)), {
+                // We used the overflow trick - find actual end
+                digit_end = digit_start;
+                while ((digit_end < len)), {
+                  dc2 := input.at(digit_end).unwrap();
+                  if(((((dc2 >= rune(u32('0'))) && (dc2 <= rune(u32('9')))) || (((dc2 >= rune(u32('a'))) && (dc2 <= rune(u32('f')))) || ((dc2 >= rune(u32('A'))) && (dc2 <= rune(u32('F'))))))), {
+                    digit_end = (digit_end + usize(1));
+                  }, {
+                    digit_end = ((len + digit_end) + usize(1));
+                  });
+                };
+                if(((digit_end > len)), {
+                  digit_end = ((digit_end - len) - usize(1));
+                });
+              });
+              if(((digit_end > digit_start)), {
+                hex_str := input.substring(digit_start, digit_end);
+                (code : i32) = _parse_hex(hex_str);
+                // Check for semicolon
+                if((((digit_end < len) && (input.at(digit_end).unwrap() == rune(u32(';'))))), {
+                  i = (digit_end + usize(1));
+                }, {
+                  i = digit_end;
+                });
+                if(is_valid_entity_code(code), {
+                  result = `${result}${from_code_point(code)}`;
+                }, {
+                  // Invalid code (e.g., surrogates) — keep original entity text
+                  (orig_hex : String) = input.substring(start, i);
+                  result = `${result}${orig_hex}`;
+                });
+              }, {
+                // No hex digits — output literally
+                result = `${result}&#${from_code_point(i32(hex_char.to_u32()))}`;
+                i = (i + usize(1));
+              });
+            }, {
+              // Decimal: &#DD;
+              (digit_start : usize) = i;
+              (digit_end : usize) = digit_start;
+              while ((digit_end < len)), {
+                dc := input.at(digit_end).unwrap();
+                if((((dc >= rune(u32('0'))) && (dc <= rune(u32('9'))))), {
+                  digit_end = (digit_end + usize(1));
+                }, {
+                  digit_end = ((len + digit_end) + usize(1));
+                });
+              };
+              if(((digit_end > len)), {
+                digit_end = ((digit_end - len) - usize(1));
+              });
+              if(((digit_end > digit_start)), {
+                dec_str := input.substring(digit_start, digit_end);
+                (code : i32) = _parse_dec(dec_str);
+                // Check for semicolon
+                if((((digit_end < len) && (input.at(digit_end).unwrap() == rune(u32(';'))))), {
+                  i = (digit_end + usize(1));
+                }, {
+                  i = digit_end;
+                });
+                if(is_valid_entity_code(code), {
+                  result = `${result}${from_code_point(code)}`;
+                }, {
+                  // Invalid code (e.g., surrogates) — keep original entity text
+                  (orig_dec : String) = input.substring(start, i);
+                  result = `${result}${orig_dec}`;
+                });
+              }, {
+                // No decimal digits
+                result = `${result}&#`;
+              });
+            });
+          });
+        }, {
+          // Named entity: &name; or &name (legacy)
+          (name_start : usize) = i;
+          (name_end : usize) = name_start;
+          while ((name_end < len)), {
+            nc := input.at(name_end).unwrap();
+            if(((nc == rune(u32(';')))), {
+              // Found semicolon — end of entity name
+              name_end = ((len + name_end) + usize(1));
+            }, if(_is_alpha_numeric(nc), {
+              name_end = (name_end + usize(1));
+            }, {
+              // Non-alphanumeric, non-semicolon — end of potential entity
+              name_end = ((len + name_end) + usize(1));
+            }));
+          };
+          // Decode the overflow trick
+          (found_end : bool) = false;
+          if(((name_end > len)), {
+            name_end = ((name_end - len) - usize(1));
+            found_end = true;
+          });
+          name_str := input.substring(name_start, name_end);
+          // Check for semicolon at name_end
+          (has_semi : bool) = (((name_end < len) && (input.at(name_end).unwrap() == rune(u32(';')))));
+          if(has_semi, {
+            // Try exact match with semicolon
+            match(_entity_map.get(name_str),
+              .Some(decoded) => {
+                result = `${result}${decoded}`;
+                i = (name_end + usize(1));
+              },
+              .None => {
+                // Unknown entity — output literally
+                result = `${result}&${name_str};`;
+                i = (name_end + usize(1));
+              }
+            );
+          }, {
+            // Legacy mode: try progressively shorter names
+            (matched : bool) = false;
+            (try_end : usize) = name_end;
+            while ((((try_end > name_start) && !(matched)))), {
+              try_name := input.substring(name_start, try_end);
+              if(_legacy_set.contains(try_name), {
+                match(_entity_map.get(try_name),
+                  .Some(decoded) => {
+                    result = `${result}${decoded}`;
+                    i = try_end;
+                    matched = true;
+                  },
+                  .None => {
+                    try_end = (try_end - usize(1));
+                  }
+                );
+              }, {
+                try_end = (try_end - usize(1));
+              });
+            };
+            if(!(matched), {
+              // No legacy match — output '&' literally and continue
+              result = `${result}&`;
+              i = name_start;
+            });
+          });
+        });
+      });
+    });
+  };
+  result
+});
+export decode_html, is_valid_entity_code, from_code_point;

package/std/encoding/html_char_utils.yo ADDED Viewed

@@ -0,0 +1,36 @@
+// HTML character utility functions
+//
+// Provides Unicode codepoint validation and conversion for HTML entity processing.
+//
+// Example:
+//   { is_valid_entity_code, from_code_point } :: import "std/encoding/html_char_utils";
+//
+//   assert(is_valid_entity_code(i32(65)), "A is valid");
+//   s := from_code_point(i32(65));  // "A"
+open import "../string";
+// Check if a Unicode codepoint is a valid HTML entity value.
+is_valid_entity_code :: (fn(c: i32) -> bool)(
+  cond(
+    ((c >= i32(0xD800)) && (c <= i32(0xDFFF))) => false,
+    ((c >= i32(0xFDD0)) && (c <= i32(0xFDEF))) => false,
+    (((c & i32(0xFFFF)) == i32(0xFFFF)) || ((c & i32(0xFFFF)) == i32(0xFFFE))) => false,
+    ((c >= i32(0x00)) && (c <= i32(0x08))) => false,
+    (c == i32(0x0B)) => false,
+    ((c >= i32(0x0E)) && (c <= i32(0x1F))) => false,
+    ((c >= i32(0x7F)) && (c <= i32(0x9F))) => false,
+    (c > i32(0x10FFFF)) => false,
+    true => true
+  )
+);
+// Convert a Unicode codepoint to a String.
+from_code_point :: (fn(c: i32) -> String)(
+  {
+    (r : rune) = rune(u32(c));
+    `${r}`
+  }
+);
+export is_valid_entity_code, from_code_point;