RubyGems - forthic - Versions diffs - 0.2.0 → 0.3.0 - Mend

forthic 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

checksums.yaml +4 -4
data/README.md +314 -14
data/Rakefile +36 -7
data/lib/forthic/decorators/docs.rb +69 -0
data/lib/forthic/decorators/word.rb +331 -0
data/lib/forthic/errors.rb +270 -0
data/lib/forthic/grpc/client.rb +223 -0
data/lib/forthic/grpc/errors.rb +149 -0
data/lib/forthic/grpc/forthic_runtime_pb.rb +32 -0
data/lib/forthic/grpc/forthic_runtime_services_pb.rb +31 -0
data/lib/forthic/grpc/remote_module.rb +120 -0
data/lib/forthic/grpc/remote_runtime_module.rb +148 -0
data/lib/forthic/grpc/remote_word.rb +91 -0
data/lib/forthic/grpc/runtime_manager.rb +60 -0
data/lib/forthic/grpc/serializer.rb +184 -0
data/lib/forthic/grpc/server.rb +361 -0
data/lib/forthic/interpreter.rb +694 -245
data/lib/forthic/literals.rb +170 -0
data/lib/forthic/module.rb +383 -0
data/lib/forthic/modules/standard/array_module.rb +940 -0
data/lib/forthic/modules/standard/boolean_module.rb +176 -0
data/lib/forthic/modules/standard/core_module.rb +362 -0
data/lib/forthic/modules/standard/datetime_module.rb +349 -0
data/lib/forthic/modules/standard/json_module.rb +55 -0
data/lib/forthic/modules/standard/math_module.rb +365 -0
data/lib/forthic/modules/standard/record_module.rb +203 -0
data/lib/forthic/modules/standard/string_module.rb +170 -0
data/lib/forthic/tokenizer.rb +224 -77
data/lib/forthic/utils.rb +35 -0
data/lib/forthic/websocket/handler.rb +548 -0
data/lib/forthic/websocket/serializer.rb +160 -0
data/lib/forthic/word_options.rb +141 -0
data/lib/forthic.rb +30 -20
data/protos/README.md +43 -0
data/protos/v1/forthic_runtime.proto +200 -0
metadata +72 -39
data/.standard.yml +0 -3
data/CHANGELOG.md +0 -11
data/CLAUDE.md +0 -74
data/Guardfile +0 -42
data/lib/forthic/code_location.rb +0 -20
data/lib/forthic/forthic_error.rb +0 -50
data/lib/forthic/forthic_module.rb +0 -146
data/lib/forthic/global_module.rb +0 -2328
data/lib/forthic/positioned_string.rb +0 -19
data/lib/forthic/token.rb +0 -37
data/lib/forthic/variable.rb +0 -34
data/lib/forthic/version.rb +0 -5
data/lib/forthic/words/definition_word.rb +0 -38
data/lib/forthic/words/end_array_word.rb +0 -28
data/lib/forthic/words/end_module_word.rb +0 -16
data/lib/forthic/words/imported_word.rb +0 -27
data/lib/forthic/words/map_word.rb +0 -169
data/lib/forthic/words/module_memo_bang_at_word.rb +0 -22
data/lib/forthic/words/module_memo_bang_word.rb +0 -21
data/lib/forthic/words/module_memo_word.rb +0 -35
data/lib/forthic/words/module_word.rb +0 -21
data/lib/forthic/words/push_value_word.rb +0 -21
data/lib/forthic/words/start_module_word.rb +0 -31
data/lib/forthic/words/word.rb +0 -30
data/sig/forthic.rbs +0 -4

data/lib/forthic/modules/standard/string_module.rb ADDED Viewed

@@ -0,0 +1,170 @@
+# frozen_string_literal: true
+require_relative '../../decorators/word'
+require 'uri'
+module Forthic
+  module Modules
+    # StringModule - String manipulation and processing operations
+    #
+    # Provides operations for string conversion, transformation, splitting/joining,
+    # pattern matching with regex, and URL encoding/decoding.
+    class StringModule < Decorators::DecoratedModule
+      # Register module documentation
+      module_doc <<~DOC
+        String manipulation and processing operations with regex and URL encoding support.
+        ## Categories
+        - Conversion: >STR, URL-ENCODE, URL-DECODE
+        - Transform: LOWERCASE, UPPERCASE, STRIP, ASCII
+        - Split/Join: SPLIT, JOIN, CONCAT
+        - Pattern: REPLACE, RE-MATCH, RE-MATCH-ALL, RE-MATCH-GROUP
+        - Constants: /N, /R, /T
+        ## Examples
+        "hello" "world" CONCAT
+        ["a" "b" "c"] CONCAT
+        "hello world" " " SPLIT
+        ["hello" "world"] " " JOIN
+        "Hello" LOWERCASE
+        "test@example.com" "(@.+)" RE-MATCH 1 RE-MATCH-GROUP
+      DOC
+      def initialize
+        super("string")
+      end
+    forthic_direct_word :CONCAT, "( str1:string str2:string -- result:string ) OR ( strings:string[] -- result:string )", "Concatenate two strings or array of strings"
+      def CONCAT(interp)
+        str2 = interp.stack_pop
+        array = if str2.is_a?(Array)
+                  str2
+                else
+                  str1 = interp.stack_pop
+                  [str1, str2]
+                end
+        result = array.join("")
+        interp.stack_push(result)
+      end
+    forthic_word :to_STR, "( item:any -- string:string )", "Convert item to string", ">STR"
+      def to_STR(item)
+        item.to_s
+      end
+    forthic_word :SPLIT, "( string:string sep:string -- items:any[] )", "Split string by separator"
+      def SPLIT(string, sep)
+        string ||= ""
+        string.split(sep)
+      end
+    forthic_word :JOIN, "( strings:string[] sep:string -- result:string )", "Join strings with separator"
+      def JOIN(strings, sep)
+        strings ||= []
+        strings.join(sep)
+      end
+    forthic_word :slash_N, "( -- char:string )", "Newline character", "/N"
+      def slash_N
+        "\n"
+      end
+    forthic_word :slash_R, "( -- char:string )", "Carriage return character", "/R"
+      def slash_R
+        "\r"
+      end
+    forthic_word :slash_T, "( -- char:string )", "Tab character", "/T"
+      def slash_T
+        "\t"
+      end
+    forthic_word :LOWERCASE, "( string:string -- result:string )", "Convert string to lowercase"
+      def LOWERCASE(string)
+        result = ""
+        result = string.downcase if string
+        result
+      end
+    forthic_word :UPPERCASE, "( string:string -- result:string )", "Convert string to uppercase"
+      def UPPERCASE(string)
+        result = ""
+        result = string.upcase if string
+        result
+      end
+    forthic_word :ASCII, "( string:string -- result:string )", "Keep only ASCII characters (< 256)"
+      def ASCII(string)
+        string ||= ""
+        result = ""
+        string.each_char do |ch|
+          result += ch if ch.ord < 256
+        end
+        result
+      end
+    forthic_word :STRIP, "( string:string -- result:string )", "Trim whitespace from string"
+      def STRIP(string)
+        result = string
+        result = result.strip if result
+        result
+      end
+    forthic_word :REPLACE, "( string:string text:string replace:string -- result:string )", "Replace all occurrences of text with replace"
+      def REPLACE(string, text, replace)
+        result = string
+        if string
+          pattern = Regexp.new(Regexp.escape(text))
+          result = string.gsub(pattern, replace)
+        end
+        result
+      end
+    forthic_direct_word :RE_MATCH, "( string:string pattern:string -- match:any )", "Match string against regex pattern", "RE-MATCH"
+      def RE_MATCH(interp)
+        pattern = interp.stack_pop
+        string = interp.stack_pop
+        re_pattern = Regexp.new(pattern)
+        result = false
+        result = string.match(re_pattern) unless string.nil?
+        interp.stack_push(result)
+      end
+    forthic_word :RE_MATCH_ALL, "( string:string pattern:string -- matches:any[] )", "Find all regex matches in string", "RE-MATCH-ALL"
+      def RE_MATCH_ALL(string, pattern)
+        re_pattern = Regexp.new(pattern)
+        matches = []
+        unless string.nil?
+          matches = string.scan(re_pattern).map do |match|
+            # scan returns array of captures for each match
+            # If there's a capture group, return the first one (like JS matchAll)
+            match.is_a?(Array) ? match[0] : match
+          end
+        end
+        matches
+      end
+    forthic_word :RE_MATCH_GROUP, "( match:any num:number -- result:any )", "Get capture group from regex match", "RE-MATCH-GROUP"
+      def RE_MATCH_GROUP(match, num)
+        result = nil
+        result = match[num] if match
+        result
+      end
+    forthic_word :URL_ENCODE, "( str:string -- encoded:string )", "URL encode string", "URL-ENCODE"
+      def URL_ENCODE(str)
+        result = ""
+        result = URI.encode_www_form_component(str) if str
+        result
+      end
+    forthic_word :URL_DECODE, "( urlencoded:string -- decoded:string )", "URL decode string", "URL-DECODE"
+      def URL_DECODE(urlencoded)
+        result = ""
+        result = URI.decode_www_form_component(urlencoded) if urlencoded
+        result
+      end
+    end
+  end
+end

data/lib/forthic/tokenizer.rb CHANGED Viewed

@@ -1,22 +1,84 @@
 # frozen_string_literal: true
+require_relative 'errors'
 module Forthic
+  # TokenType - Enum for different token types
+  module TokenType
+    STRING = 1
+    COMMENT = 2
+    START_ARRAY = 3
+    END_ARRAY = 4
+    START_MODULE = 5
+    END_MODULE = 6
+    START_DEF = 7
+    END_DEF = 8
+    START_MEMO = 9
+    WORD = 10
+    DOT_SYMBOL = 11
+    EOS = 12
+  end
+  # StringDelta - Tracks start and end positions for streaming string deltas
+  StringDelta = Struct.new(:start, :end, keyword_init: true)
+  # CodeLocation - Tracks location information for tokens in source code
+  class CodeLocation
+    attr_accessor :source, :line, :column, :start_pos, :end_pos
+    def initialize(source: nil, line: 1, column: 1, start_pos: 0, end_pos: 0)
+      @source = source
+      @line = line
+      @column = column
+      @start_pos = start_pos
+      @end_pos = end_pos
+    end
+  end
+  # Token - Represents a lexical token
+  class Token
+    attr_reader :type, :string, :location
+    def initialize(type, string, location)
+      @type = type
+      @string = string
+      @location = location
+    end
+  end
+  # PositionedString - String with associated location information
+  class PositionedString
+    attr_reader :string, :location
+    def initialize(string, location)
+      @string = string
+      @location = location
+    end
+    def to_s
+      @string
+    end
+    # For compatibility with value extraction
+    def value
+      @string
+    end
+  end
+  # Tokenizer - Lexical analyzer for Forthic source code
   class Tokenizer
-    attr_accessor :reference_location, :line, :column, :input_string, :input_pos,
-      :whitespace, :quote_chars, :token_start_pos, :token_end_pos,
-      :token_line, :token_column, :token_string
-    # @param [String] string
-    # @param [CodeLocation, nil] reference_location
-    def initialize(string, reference_location = nil)
-      reference_location ||= CodeLocation.new(screen_name: "<ad-hoc>")
-      @reference_location = reference_location
-      @line = reference_location.line
-      @column = reference_location.column
+    attr_reader :reference_location, :line, :column, :input_string, :input_pos
+    attr_reader :token_start_pos, :token_end_pos, :token_line, :token_column
+    attr_reader :token_string, :string_delta
+    def initialize(string, reference_location = nil, streaming = false)
+      @reference_location = reference_location || CodeLocation.new
+      @line = @reference_location.line
+      @column = @reference_location.column
       @input_string = unescape_string(string)
       @input_pos = 0
       @whitespace = [" ", "\t", "\n", "\r", "(", ")", ","]
-      @quote_chars = ['"', "'"]
+      @quote_chars = ['"', "'", "^"]
       # Token info
       @token_start_pos = 0
@@ -24,18 +86,21 @@ module Forthic
       @token_line = 0
       @token_column = 0
       @token_string = ""
+      @string_delta = nil
+      @streaming = streaming
     end
-    # @return [Token]
     def next_token
       clear_token_string
       transition_from_START
     end
-    # @param [String] string
-    # @return [String]
+    # ===================
+    # Helper functions
     def unescape_string(string)
-      string
+      result = string.gsub(/&lt;/, "<")
+      result.gsub(/&gt;/, ">")
     end
     def clear_token_string
@@ -48,35 +113,25 @@ module Forthic
       @token_column = @column
     end
-    # @param [String] char
-    # @return [Boolean]
-    def is_whitespace(char)
+    def whitespace?(char)
       @whitespace.include?(char)
     end
-    # @param [String] char
-    # @return [Boolean]
-    def is_quote(char)
+    def quote?(char)
       @quote_chars.include?(char)
     end
-    # @param [Integer] index
-    # @param [String] char
-    # @return [Boolean]
-    def is_triple_quote(index, char)
-      return false unless is_quote(char)
+    def triple_quote?(index, char)
+      return false unless quote?(char)
       return false if index + 2 >= @input_string.length
       @input_string[index + 1] == char && @input_string[index + 2] == char
     end
-    # @param [Integer] index
-    # @return [Boolean]
-    def is_start_memo(index)
+    def start_memo?(index)
       return false if index + 1 >= @input_string.length
       @input_string[index] == "@" && @input_string[index + 1] == ":"
     end
-    # @param [Integer] num_chars
     def advance_position(num_chars)
       if num_chars >= 0
         num_chars.times do
@@ -88,10 +143,11 @@ module Forthic
           end
           @input_pos += 1
         end
+        num_chars
       else
         (-num_chars).times do
           @input_pos -= 1
-          raise Forthic::Error, "Invalid position" if @input_pos < 0 || @column < 0
+          raise InvalidInputPositionError.new(@input_string) if @input_pos < 0 || @column < 0
           if @input_string[@input_pos] == "\n"
             @line -= 1
             @column = 1
@@ -99,13 +155,13 @@ module Forthic
             @column -= 1
           end
         end
+        -num_chars
       end
     end
-    # @return [CodeLocation]
     def get_token_location
       CodeLocation.new(
-        screen_name: @reference_location.screen_name,
+        source: @reference_location.source,
         line: @token_line,
         column: @token_column,
         start_pos: @token_start_pos,
@@ -113,14 +169,23 @@ module Forthic
       )
     end
-    # @return [Token]
+    def get_input_string
+      @input_string
+    end
+    def get_string_delta
+      return "" unless @string_delta
+      @input_string[@string_delta.start...@string_delta.end]
+    end
     def transition_from_START
       while @input_pos < @input_string.length
         char = @input_string[@input_pos]
         note_start_token
         advance_position(1)
-        next if is_whitespace(char)
+        next if whitespace?(char)
         case char
         when "#"
           return transition_from_COMMENT
@@ -140,105 +205,140 @@ module Forthic
         when "}"
           @token_string = char
           return Token.new(TokenType::END_MODULE, char, get_token_location)
+        when "."
+          advance_position(-1) # Back up to beginning of dot symbol
+          return transition_from_GATHER_DOT_SYMBOL
         else
-          if is_start_memo(@input_pos - 1)
-            advance_position(1)
+          if start_memo?(@input_pos - 1)
+            advance_position(1) # Skip over ":" in "@:"
             return transition_from_START_MEMO
-          elsif is_triple_quote(@input_pos - 1, char)
-            advance_position(2)
+          elsif triple_quote?(@input_pos - 1, char)
+            advance_position(2) # Skip over 2nd and 3rd quote chars
             return transition_from_GATHER_TRIPLE_QUOTE_STRING(char)
-          elsif is_quote(char)
+          elsif quote?(char)
             return transition_from_GATHER_STRING(char)
           else
-            advance_position(-1)
+            advance_position(-1) # Back up to beginning of word
             return transition_from_GATHER_WORD
           end
         end
       end
       Token.new(TokenType::EOS, "", get_token_location)
     end
-    # @return [Token]
     def transition_from_COMMENT
       note_start_token
       while @input_pos < @input_string.length
         char = @input_string[@input_pos]
         @token_string += char
         advance_position(1)
-        break if char == "\n"
+        if char == "\n"
+          advance_position(-1)
+          break
+        end
       end
       Token.new(TokenType::COMMENT, @token_string, get_token_location)
     end
-    # @return [Token]
     def transition_from_START_DEFINITION
       while @input_pos < @input_string.length
         char = @input_string[@input_pos]
         advance_position(1)
-        next if is_whitespace(char)
-        if is_quote(char)
-          raise Forthic::Error, "Definition names can't have quotes in them"
+        next if whitespace?(char)
+        if quote?(char)
+          raise InvalidWordNameError.new(
+            @input_string,
+            location: get_token_location,
+            note: "Definition names can't have quotes in them"
+          )
         else
           advance_position(-1)
           return transition_from_GATHER_DEFINITION_NAME
         end
       end
-      raise Forthic::Error, "Got EOS in START_DEFINITION"
+      raise InvalidWordNameError.new(
+        @input_string,
+        location: get_token_location,
+        note: "Got EOS in START_DEFINITION"
+      )
     end
-    # @return [Token]
     def transition_from_START_MEMO
       while @input_pos < @input_string.length
         char = @input_string[@input_pos]
         advance_position(1)
-        next if is_whitespace(char)
-        if is_quote(char)
-          raise Forthic::Error, "Definitions shouldn't have quotes in them"
+        next if whitespace?(char)
+        if quote?(char)
+          raise InvalidWordNameError.new(
+            @input_string,
+            location: get_token_location,
+            note: "Memo names can't have quotes in them"
+          )
         else
           advance_position(-1)
           return transition_from_GATHER_MEMO_NAME
         end
       end
-      raise Forthic::Error, "Got EOS in START_MEMO"
+      raise InvalidWordNameError.new(
+        @input_string,
+        location: get_token_location,
+        note: "Got EOS in START_MEMO"
+      )
     end
     def gather_definition_name
       while @input_pos < @input_string.length
         char = @input_string[@input_pos]
         advance_position(1)
-        break if is_whitespace(char)
-        if is_quote(char)
-          raise Forthic::Error, "Definition names can't have quotes in them"
-        elsif ["[", "]", "{", "}"].include?(char)
-          raise Forthic::Error, "Definitions can't have '#{char}' in them"
-        else
-          @token_string += char
+        break if whitespace?(char)
+        if quote?(char)
+          raise InvalidWordNameError.new(
+            @input_string,
+            location: get_token_location,
+            note: "Definition names can't have quotes in them"
+          )
+        end
+        if ["[", "]", "{", "}"].include?(char)
+          raise InvalidWordNameError.new(
+            @input_string,
+            location: get_token_location,
+            note: "Definition names can't have '#{char}' in them"
+          )
         end
+        @token_string += char
       end
     end
-    # @return [Token]
     def transition_from_GATHER_DEFINITION_NAME
       note_start_token
       gather_definition_name
       Token.new(TokenType::START_DEF, @token_string, get_token_location)
     end
-    # @return [Token]
     def transition_from_GATHER_MEMO_NAME
       note_start_token
       gather_definition_name
       Token.new(TokenType::START_MEMO, @token_string, get_token_location)
     end
-    # @return [Token]
     def transition_from_GATHER_MODULE
       note_start_token
       while @input_pos < @input_string.length
         char = @input_string[@input_pos]
         advance_position(1)
-        break if is_whitespace(char)
-        if char == "}"
+        if whitespace?(char)
+          break
+        elsif char == "}"
           advance_position(-1)
           break
         else
@@ -248,50 +348,70 @@ module Forthic
       Token.new(TokenType::START_MODULE, @token_string, get_token_location)
     end
-    # @param [String] delim
-    # @return [Token]
     def transition_from_GATHER_TRIPLE_QUOTE_STRING(delim)
       note_start_token
       string_delimiter = delim
+      @string_delta = StringDelta.new(start: @input_pos, end: @input_pos)
       while @input_pos < @input_string.length
         char = @input_string[@input_pos]
-        if char == string_delimiter && is_triple_quote(@input_pos, char)
+        if char == string_delimiter && triple_quote?(@input_pos, char)
+          # Check if this triple quote is followed by at least one more quote (greedy mode trigger)
+          if @input_pos + 3 < @input_string.length && @input_string[@input_pos + 3] == string_delimiter
+            # Greedy mode: include this quote as content and continue looking for the end
+            advance_position(1) # Advance by 1 to catch overlapping sequences
+            @token_string += string_delimiter
+            @string_delta.end = @input_pos
+            next
+          end
+          # Normal behavior: close at first triple quote
           advance_position(3)
-          return Token.new(TokenType::STRING, @token_string, get_token_location)
+          token = Token.new(TokenType::STRING, @token_string, get_token_location)
+          @string_delta = nil
+          return token
         else
           advance_position(1)
           @token_string += char
+          @string_delta.end = @input_pos
         end
       end
-      raise Forthic::Error, "Unterminated string: #{delim * 3}#{@token_string}"
+      return nil if @streaming
+      raise UnterminatedStringError.new(@input_string, location: get_token_location)
     end
-    # @param [String] delim
-    # @return [Token]
     def transition_from_GATHER_STRING(delim)
       note_start_token
       string_delimiter = delim
+      @string_delta = StringDelta.new(start: @input_pos, end: @input_pos)
       while @input_pos < @input_string.length
         char = @input_string[@input_pos]
         advance_position(1)
         if char == string_delimiter
-          return Token.new(TokenType::STRING, @token_string, get_token_location)
+          token = Token.new(TokenType::STRING, @token_string, get_token_location)
+          @string_delta = nil
+          return token
         else
           @token_string += char
+          @string_delta.end = @input_pos
         end
       end
-      raise Forthic::Error, "Unterminated string: #{delim}#{@token_string}"
+      return nil if @streaming
+      raise UnterminatedStringError.new(@input_string, location: get_token_location)
     end
-    # @return [Token]
     def transition_from_GATHER_WORD
       note_start_token
       while @input_pos < @input_string.length
         char = @input_string[@input_pos]
         advance_position(1)
-        break if is_whitespace(char)
+        break if whitespace?(char)
         if [";", "[", "]", "{", "}", "#"].include?(char)
           advance_position(-1)
           break
@@ -301,5 +421,32 @@ module Forthic
       end
       Token.new(TokenType::WORD, @token_string, get_token_location)
     end
+    def transition_from_GATHER_DOT_SYMBOL
+      note_start_token
+      full_token_string = ""
+      while @input_pos < @input_string.length
+        char = @input_string[@input_pos]
+        advance_position(1)
+        break if whitespace?(char)
+        if [";", "[", "]", "{", "}", "#"].include?(char)
+          advance_position(-1)
+          break
+        else
+          full_token_string += char
+          @token_string += char
+        end
+      end
+      # If dot symbol has no characters after the dot, treat it as a word
+      if full_token_string.length < 2 # "." + at least 1 char = 2 minimum
+        return Token.new(TokenType::WORD, full_token_string, get_token_location)
+      end
+      # For DOT_SYMBOL, return the string without the dot prefix
+      symbol_without_dot = full_token_string[1..-1]
+      Token.new(TokenType::DOT_SYMBOL, symbol_without_dot, get_token_location)
+    end
   end
 end