forthic 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +314 -14
- data/Rakefile +36 -7
- data/lib/forthic/decorators/docs.rb +69 -0
- data/lib/forthic/decorators/word.rb +331 -0
- data/lib/forthic/errors.rb +270 -0
- data/lib/forthic/grpc/client.rb +223 -0
- data/lib/forthic/grpc/errors.rb +149 -0
- data/lib/forthic/grpc/forthic_runtime_pb.rb +32 -0
- data/lib/forthic/grpc/forthic_runtime_services_pb.rb +31 -0
- data/lib/forthic/grpc/remote_module.rb +120 -0
- data/lib/forthic/grpc/remote_runtime_module.rb +148 -0
- data/lib/forthic/grpc/remote_word.rb +91 -0
- data/lib/forthic/grpc/runtime_manager.rb +60 -0
- data/lib/forthic/grpc/serializer.rb +184 -0
- data/lib/forthic/grpc/server.rb +361 -0
- data/lib/forthic/interpreter.rb +694 -245
- data/lib/forthic/literals.rb +170 -0
- data/lib/forthic/module.rb +383 -0
- data/lib/forthic/modules/standard/array_module.rb +940 -0
- data/lib/forthic/modules/standard/boolean_module.rb +176 -0
- data/lib/forthic/modules/standard/core_module.rb +362 -0
- data/lib/forthic/modules/standard/datetime_module.rb +349 -0
- data/lib/forthic/modules/standard/json_module.rb +55 -0
- data/lib/forthic/modules/standard/math_module.rb +365 -0
- data/lib/forthic/modules/standard/record_module.rb +203 -0
- data/lib/forthic/modules/standard/string_module.rb +170 -0
- data/lib/forthic/tokenizer.rb +224 -77
- data/lib/forthic/utils.rb +35 -0
- data/lib/forthic/websocket/handler.rb +548 -0
- data/lib/forthic/websocket/serializer.rb +160 -0
- data/lib/forthic/word_options.rb +141 -0
- data/lib/forthic.rb +30 -20
- data/protos/README.md +43 -0
- data/protos/v1/forthic_runtime.proto +200 -0
- metadata +72 -39
- data/.standard.yml +0 -3
- data/CHANGELOG.md +0 -11
- data/CLAUDE.md +0 -74
- data/Guardfile +0 -42
- data/lib/forthic/code_location.rb +0 -20
- data/lib/forthic/forthic_error.rb +0 -50
- data/lib/forthic/forthic_module.rb +0 -146
- data/lib/forthic/global_module.rb +0 -2328
- data/lib/forthic/positioned_string.rb +0 -19
- data/lib/forthic/token.rb +0 -37
- data/lib/forthic/variable.rb +0 -34
- data/lib/forthic/version.rb +0 -5
- data/lib/forthic/words/definition_word.rb +0 -38
- data/lib/forthic/words/end_array_word.rb +0 -28
- data/lib/forthic/words/end_module_word.rb +0 -16
- data/lib/forthic/words/imported_word.rb +0 -27
- data/lib/forthic/words/map_word.rb +0 -169
- data/lib/forthic/words/module_memo_bang_at_word.rb +0 -22
- data/lib/forthic/words/module_memo_bang_word.rb +0 -21
- data/lib/forthic/words/module_memo_word.rb +0 -35
- data/lib/forthic/words/module_word.rb +0 -21
- data/lib/forthic/words/push_value_word.rb +0 -21
- data/lib/forthic/words/start_module_word.rb +0 -31
- data/lib/forthic/words/word.rb +0 -30
- data/sig/forthic.rbs +0 -4
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../../decorators/word'
|
|
4
|
+
require 'uri'
|
|
5
|
+
|
|
6
|
+
module Forthic
|
|
7
|
+
module Modules
|
|
8
|
+
# StringModule - String manipulation and processing operations
|
|
9
|
+
#
|
|
10
|
+
# Provides operations for string conversion, transformation, splitting/joining,
|
|
11
|
+
# pattern matching with regex, and URL encoding/decoding.
|
|
12
|
+
class StringModule < Decorators::DecoratedModule
|
|
13
|
+
# Register module documentation
|
|
14
|
+
module_doc <<~DOC
|
|
15
|
+
String manipulation and processing operations with regex and URL encoding support.
|
|
16
|
+
|
|
17
|
+
## Categories
|
|
18
|
+
- Conversion: >STR, URL-ENCODE, URL-DECODE
|
|
19
|
+
- Transform: LOWERCASE, UPPERCASE, STRIP, ASCII
|
|
20
|
+
- Split/Join: SPLIT, JOIN, CONCAT
|
|
21
|
+
- Pattern: REPLACE, RE-MATCH, RE-MATCH-ALL, RE-MATCH-GROUP
|
|
22
|
+
- Constants: /N, /R, /T
|
|
23
|
+
|
|
24
|
+
## Examples
|
|
25
|
+
"hello" "world" CONCAT
|
|
26
|
+
["a" "b" "c"] CONCAT
|
|
27
|
+
"hello world" " " SPLIT
|
|
28
|
+
["hello" "world"] " " JOIN
|
|
29
|
+
"Hello" LOWERCASE
|
|
30
|
+
"test@example.com" "(@.+)" RE-MATCH 1 RE-MATCH-GROUP
|
|
31
|
+
DOC
|
|
32
|
+
|
|
33
|
+
def initialize
|
|
34
|
+
super("string")
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
forthic_direct_word :CONCAT, "( str1:string str2:string -- result:string ) OR ( strings:string[] -- result:string )", "Concatenate two strings or array of strings"
|
|
38
|
+
def CONCAT(interp)
|
|
39
|
+
str2 = interp.stack_pop
|
|
40
|
+
array = if str2.is_a?(Array)
|
|
41
|
+
str2
|
|
42
|
+
else
|
|
43
|
+
str1 = interp.stack_pop
|
|
44
|
+
[str1, str2]
|
|
45
|
+
end
|
|
46
|
+
result = array.join("")
|
|
47
|
+
interp.stack_push(result)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
forthic_word :to_STR, "( item:any -- string:string )", "Convert item to string", ">STR"
|
|
51
|
+
def to_STR(item)
|
|
52
|
+
item.to_s
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
forthic_word :SPLIT, "( string:string sep:string -- items:any[] )", "Split string by separator"
|
|
56
|
+
def SPLIT(string, sep)
|
|
57
|
+
string ||= ""
|
|
58
|
+
string.split(sep)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
forthic_word :JOIN, "( strings:string[] sep:string -- result:string )", "Join strings with separator"
|
|
62
|
+
def JOIN(strings, sep)
|
|
63
|
+
strings ||= []
|
|
64
|
+
strings.join(sep)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
forthic_word :slash_N, "( -- char:string )", "Newline character", "/N"
|
|
68
|
+
def slash_N
|
|
69
|
+
"\n"
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
forthic_word :slash_R, "( -- char:string )", "Carriage return character", "/R"
|
|
73
|
+
def slash_R
|
|
74
|
+
"\r"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
forthic_word :slash_T, "( -- char:string )", "Tab character", "/T"
|
|
78
|
+
def slash_T
|
|
79
|
+
"\t"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
forthic_word :LOWERCASE, "( string:string -- result:string )", "Convert string to lowercase"
|
|
83
|
+
def LOWERCASE(string)
|
|
84
|
+
result = ""
|
|
85
|
+
result = string.downcase if string
|
|
86
|
+
result
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
forthic_word :UPPERCASE, "( string:string -- result:string )", "Convert string to uppercase"
|
|
90
|
+
def UPPERCASE(string)
|
|
91
|
+
result = ""
|
|
92
|
+
result = string.upcase if string
|
|
93
|
+
result
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
forthic_word :ASCII, "( string:string -- result:string )", "Keep only ASCII characters (< 256)"
|
|
97
|
+
def ASCII(string)
|
|
98
|
+
string ||= ""
|
|
99
|
+
|
|
100
|
+
result = ""
|
|
101
|
+
string.each_char do |ch|
|
|
102
|
+
result += ch if ch.ord < 256
|
|
103
|
+
end
|
|
104
|
+
result
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
forthic_word :STRIP, "( string:string -- result:string )", "Trim whitespace from string"
|
|
108
|
+
def STRIP(string)
|
|
109
|
+
result = string
|
|
110
|
+
result = result.strip if result
|
|
111
|
+
result
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
forthic_word :REPLACE, "( string:string text:string replace:string -- result:string )", "Replace all occurrences of text with replace"
|
|
115
|
+
def REPLACE(string, text, replace)
|
|
116
|
+
result = string
|
|
117
|
+
if string
|
|
118
|
+
pattern = Regexp.new(Regexp.escape(text))
|
|
119
|
+
result = string.gsub(pattern, replace)
|
|
120
|
+
end
|
|
121
|
+
result
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
forthic_direct_word :RE_MATCH, "( string:string pattern:string -- match:any )", "Match string against regex pattern", "RE-MATCH"
|
|
125
|
+
def RE_MATCH(interp)
|
|
126
|
+
pattern = interp.stack_pop
|
|
127
|
+
string = interp.stack_pop
|
|
128
|
+
re_pattern = Regexp.new(pattern)
|
|
129
|
+
result = false
|
|
130
|
+
result = string.match(re_pattern) unless string.nil?
|
|
131
|
+
interp.stack_push(result)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
forthic_word :RE_MATCH_ALL, "( string:string pattern:string -- matches:any[] )", "Find all regex matches in string", "RE-MATCH-ALL"
|
|
135
|
+
def RE_MATCH_ALL(string, pattern)
|
|
136
|
+
re_pattern = Regexp.new(pattern)
|
|
137
|
+
matches = []
|
|
138
|
+
unless string.nil?
|
|
139
|
+
matches = string.scan(re_pattern).map do |match|
|
|
140
|
+
# scan returns array of captures for each match
|
|
141
|
+
# If there's a capture group, return the first one (like JS matchAll)
|
|
142
|
+
match.is_a?(Array) ? match[0] : match
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
matches
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
forthic_word :RE_MATCH_GROUP, "( match:any num:number -- result:any )", "Get capture group from regex match", "RE-MATCH-GROUP"
|
|
149
|
+
def RE_MATCH_GROUP(match, num)
|
|
150
|
+
result = nil
|
|
151
|
+
result = match[num] if match
|
|
152
|
+
result
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
forthic_word :URL_ENCODE, "( str:string -- encoded:string )", "URL encode string", "URL-ENCODE"
|
|
156
|
+
def URL_ENCODE(str)
|
|
157
|
+
result = ""
|
|
158
|
+
result = URI.encode_www_form_component(str) if str
|
|
159
|
+
result
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
forthic_word :URL_DECODE, "( urlencoded:string -- decoded:string )", "URL decode string", "URL-DECODE"
|
|
163
|
+
def URL_DECODE(urlencoded)
|
|
164
|
+
result = ""
|
|
165
|
+
result = URI.decode_www_form_component(urlencoded) if urlencoded
|
|
166
|
+
result
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
data/lib/forthic/tokenizer.rb
CHANGED
|
@@ -1,22 +1,84 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative 'errors'
|
|
4
|
+
|
|
3
5
|
module Forthic
|
|
6
|
+
# TokenType - Enum for different token types
|
|
7
|
+
module TokenType
|
|
8
|
+
STRING = 1
|
|
9
|
+
COMMENT = 2
|
|
10
|
+
START_ARRAY = 3
|
|
11
|
+
END_ARRAY = 4
|
|
12
|
+
START_MODULE = 5
|
|
13
|
+
END_MODULE = 6
|
|
14
|
+
START_DEF = 7
|
|
15
|
+
END_DEF = 8
|
|
16
|
+
START_MEMO = 9
|
|
17
|
+
WORD = 10
|
|
18
|
+
DOT_SYMBOL = 11
|
|
19
|
+
EOS = 12
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# StringDelta - Tracks start and end positions for streaming string deltas
|
|
23
|
+
StringDelta = Struct.new(:start, :end, keyword_init: true)
|
|
24
|
+
|
|
25
|
+
# CodeLocation - Tracks location information for tokens in source code
|
|
26
|
+
class CodeLocation
|
|
27
|
+
attr_accessor :source, :line, :column, :start_pos, :end_pos
|
|
28
|
+
|
|
29
|
+
def initialize(source: nil, line: 1, column: 1, start_pos: 0, end_pos: 0)
|
|
30
|
+
@source = source
|
|
31
|
+
@line = line
|
|
32
|
+
@column = column
|
|
33
|
+
@start_pos = start_pos
|
|
34
|
+
@end_pos = end_pos
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Token - Represents a lexical token
|
|
39
|
+
class Token
|
|
40
|
+
attr_reader :type, :string, :location
|
|
41
|
+
|
|
42
|
+
def initialize(type, string, location)
|
|
43
|
+
@type = type
|
|
44
|
+
@string = string
|
|
45
|
+
@location = location
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# PositionedString - String with associated location information
|
|
50
|
+
class PositionedString
|
|
51
|
+
attr_reader :string, :location
|
|
52
|
+
|
|
53
|
+
def initialize(string, location)
|
|
54
|
+
@string = string
|
|
55
|
+
@location = location
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def to_s
|
|
59
|
+
@string
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# For compatibility with value extraction
|
|
63
|
+
def value
|
|
64
|
+
@string
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Tokenizer - Lexical analyzer for Forthic source code
|
|
4
69
|
class Tokenizer
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@reference_location = reference_location
|
|
14
|
-
@line = reference_location.line
|
|
15
|
-
@column = reference_location.column
|
|
70
|
+
attr_reader :reference_location, :line, :column, :input_string, :input_pos
|
|
71
|
+
attr_reader :token_start_pos, :token_end_pos, :token_line, :token_column
|
|
72
|
+
attr_reader :token_string, :string_delta
|
|
73
|
+
|
|
74
|
+
def initialize(string, reference_location = nil, streaming = false)
|
|
75
|
+
@reference_location = reference_location || CodeLocation.new
|
|
76
|
+
@line = @reference_location.line
|
|
77
|
+
@column = @reference_location.column
|
|
16
78
|
@input_string = unescape_string(string)
|
|
17
79
|
@input_pos = 0
|
|
18
80
|
@whitespace = [" ", "\t", "\n", "\r", "(", ")", ","]
|
|
19
|
-
@quote_chars = ['"', "'"]
|
|
81
|
+
@quote_chars = ['"', "'", "^"]
|
|
20
82
|
|
|
21
83
|
# Token info
|
|
22
84
|
@token_start_pos = 0
|
|
@@ -24,18 +86,21 @@ module Forthic
|
|
|
24
86
|
@token_line = 0
|
|
25
87
|
@token_column = 0
|
|
26
88
|
@token_string = ""
|
|
89
|
+
@string_delta = nil
|
|
90
|
+
@streaming = streaming
|
|
27
91
|
end
|
|
28
92
|
|
|
29
|
-
# @return [Token]
|
|
30
93
|
def next_token
|
|
31
94
|
clear_token_string
|
|
32
95
|
transition_from_START
|
|
33
96
|
end
|
|
34
97
|
|
|
35
|
-
#
|
|
36
|
-
#
|
|
98
|
+
# ===================
|
|
99
|
+
# Helper functions
|
|
100
|
+
|
|
37
101
|
def unescape_string(string)
|
|
38
|
-
string
|
|
102
|
+
result = string.gsub(/</, "<")
|
|
103
|
+
result.gsub(/>/, ">")
|
|
39
104
|
end
|
|
40
105
|
|
|
41
106
|
def clear_token_string
|
|
@@ -48,35 +113,25 @@ module Forthic
|
|
|
48
113
|
@token_column = @column
|
|
49
114
|
end
|
|
50
115
|
|
|
51
|
-
|
|
52
|
-
# @return [Boolean]
|
|
53
|
-
def is_whitespace(char)
|
|
116
|
+
def whitespace?(char)
|
|
54
117
|
@whitespace.include?(char)
|
|
55
118
|
end
|
|
56
119
|
|
|
57
|
-
|
|
58
|
-
# @return [Boolean]
|
|
59
|
-
def is_quote(char)
|
|
120
|
+
def quote?(char)
|
|
60
121
|
@quote_chars.include?(char)
|
|
61
122
|
end
|
|
62
123
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
# @return [Boolean]
|
|
66
|
-
def is_triple_quote(index, char)
|
|
67
|
-
return false unless is_quote(char)
|
|
124
|
+
def triple_quote?(index, char)
|
|
125
|
+
return false unless quote?(char)
|
|
68
126
|
return false if index + 2 >= @input_string.length
|
|
69
127
|
@input_string[index + 1] == char && @input_string[index + 2] == char
|
|
70
128
|
end
|
|
71
129
|
|
|
72
|
-
|
|
73
|
-
# @return [Boolean]
|
|
74
|
-
def is_start_memo(index)
|
|
130
|
+
def start_memo?(index)
|
|
75
131
|
return false if index + 1 >= @input_string.length
|
|
76
132
|
@input_string[index] == "@" && @input_string[index + 1] == ":"
|
|
77
133
|
end
|
|
78
134
|
|
|
79
|
-
# @param [Integer] num_chars
|
|
80
135
|
def advance_position(num_chars)
|
|
81
136
|
if num_chars >= 0
|
|
82
137
|
num_chars.times do
|
|
@@ -88,10 +143,11 @@ module Forthic
|
|
|
88
143
|
end
|
|
89
144
|
@input_pos += 1
|
|
90
145
|
end
|
|
146
|
+
num_chars
|
|
91
147
|
else
|
|
92
148
|
(-num_chars).times do
|
|
93
149
|
@input_pos -= 1
|
|
94
|
-
raise
|
|
150
|
+
raise InvalidInputPositionError.new(@input_string) if @input_pos < 0 || @column < 0
|
|
95
151
|
if @input_string[@input_pos] == "\n"
|
|
96
152
|
@line -= 1
|
|
97
153
|
@column = 1
|
|
@@ -99,13 +155,13 @@ module Forthic
|
|
|
99
155
|
@column -= 1
|
|
100
156
|
end
|
|
101
157
|
end
|
|
158
|
+
-num_chars
|
|
102
159
|
end
|
|
103
160
|
end
|
|
104
161
|
|
|
105
|
-
# @return [CodeLocation]
|
|
106
162
|
def get_token_location
|
|
107
163
|
CodeLocation.new(
|
|
108
|
-
|
|
164
|
+
source: @reference_location.source,
|
|
109
165
|
line: @token_line,
|
|
110
166
|
column: @token_column,
|
|
111
167
|
start_pos: @token_start_pos,
|
|
@@ -113,14 +169,23 @@ module Forthic
|
|
|
113
169
|
)
|
|
114
170
|
end
|
|
115
171
|
|
|
116
|
-
|
|
172
|
+
def get_input_string
|
|
173
|
+
@input_string
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def get_string_delta
|
|
177
|
+
return "" unless @string_delta
|
|
178
|
+
@input_string[@string_delta.start...@string_delta.end]
|
|
179
|
+
end
|
|
180
|
+
|
|
117
181
|
def transition_from_START
|
|
118
182
|
while @input_pos < @input_string.length
|
|
119
183
|
char = @input_string[@input_pos]
|
|
120
184
|
note_start_token
|
|
121
185
|
advance_position(1)
|
|
122
186
|
|
|
123
|
-
next if
|
|
187
|
+
next if whitespace?(char)
|
|
188
|
+
|
|
124
189
|
case char
|
|
125
190
|
when "#"
|
|
126
191
|
return transition_from_COMMENT
|
|
@@ -140,105 +205,140 @@ module Forthic
|
|
|
140
205
|
when "}"
|
|
141
206
|
@token_string = char
|
|
142
207
|
return Token.new(TokenType::END_MODULE, char, get_token_location)
|
|
208
|
+
when "."
|
|
209
|
+
advance_position(-1) # Back up to beginning of dot symbol
|
|
210
|
+
return transition_from_GATHER_DOT_SYMBOL
|
|
143
211
|
else
|
|
144
|
-
if
|
|
145
|
-
advance_position(1)
|
|
212
|
+
if start_memo?(@input_pos - 1)
|
|
213
|
+
advance_position(1) # Skip over ":" in "@:"
|
|
146
214
|
return transition_from_START_MEMO
|
|
147
|
-
elsif
|
|
148
|
-
advance_position(2)
|
|
215
|
+
elsif triple_quote?(@input_pos - 1, char)
|
|
216
|
+
advance_position(2) # Skip over 2nd and 3rd quote chars
|
|
149
217
|
return transition_from_GATHER_TRIPLE_QUOTE_STRING(char)
|
|
150
|
-
elsif
|
|
218
|
+
elsif quote?(char)
|
|
151
219
|
return transition_from_GATHER_STRING(char)
|
|
152
220
|
else
|
|
153
|
-
advance_position(-1)
|
|
221
|
+
advance_position(-1) # Back up to beginning of word
|
|
154
222
|
return transition_from_GATHER_WORD
|
|
155
223
|
end
|
|
156
224
|
end
|
|
157
225
|
end
|
|
226
|
+
|
|
158
227
|
Token.new(TokenType::EOS, "", get_token_location)
|
|
159
228
|
end
|
|
160
229
|
|
|
161
|
-
# @return [Token]
|
|
162
230
|
def transition_from_COMMENT
|
|
163
231
|
note_start_token
|
|
164
232
|
while @input_pos < @input_string.length
|
|
165
233
|
char = @input_string[@input_pos]
|
|
166
234
|
@token_string += char
|
|
167
235
|
advance_position(1)
|
|
168
|
-
|
|
236
|
+
if char == "\n"
|
|
237
|
+
advance_position(-1)
|
|
238
|
+
break
|
|
239
|
+
end
|
|
169
240
|
end
|
|
170
241
|
Token.new(TokenType::COMMENT, @token_string, get_token_location)
|
|
171
242
|
end
|
|
172
243
|
|
|
173
|
-
# @return [Token]
|
|
174
244
|
def transition_from_START_DEFINITION
|
|
175
245
|
while @input_pos < @input_string.length
|
|
176
246
|
char = @input_string[@input_pos]
|
|
177
247
|
advance_position(1)
|
|
178
|
-
|
|
179
|
-
if
|
|
180
|
-
|
|
248
|
+
|
|
249
|
+
next if whitespace?(char)
|
|
250
|
+
|
|
251
|
+
if quote?(char)
|
|
252
|
+
raise InvalidWordNameError.new(
|
|
253
|
+
@input_string,
|
|
254
|
+
location: get_token_location,
|
|
255
|
+
note: "Definition names can't have quotes in them"
|
|
256
|
+
)
|
|
181
257
|
else
|
|
182
258
|
advance_position(-1)
|
|
183
259
|
return transition_from_GATHER_DEFINITION_NAME
|
|
184
260
|
end
|
|
185
261
|
end
|
|
186
|
-
|
|
262
|
+
|
|
263
|
+
raise InvalidWordNameError.new(
|
|
264
|
+
@input_string,
|
|
265
|
+
location: get_token_location,
|
|
266
|
+
note: "Got EOS in START_DEFINITION"
|
|
267
|
+
)
|
|
187
268
|
end
|
|
188
269
|
|
|
189
|
-
# @return [Token]
|
|
190
270
|
def transition_from_START_MEMO
|
|
191
271
|
while @input_pos < @input_string.length
|
|
192
272
|
char = @input_string[@input_pos]
|
|
193
273
|
advance_position(1)
|
|
194
|
-
|
|
195
|
-
if
|
|
196
|
-
|
|
274
|
+
|
|
275
|
+
next if whitespace?(char)
|
|
276
|
+
|
|
277
|
+
if quote?(char)
|
|
278
|
+
raise InvalidWordNameError.new(
|
|
279
|
+
@input_string,
|
|
280
|
+
location: get_token_location,
|
|
281
|
+
note: "Memo names can't have quotes in them"
|
|
282
|
+
)
|
|
197
283
|
else
|
|
198
284
|
advance_position(-1)
|
|
199
285
|
return transition_from_GATHER_MEMO_NAME
|
|
200
286
|
end
|
|
201
287
|
end
|
|
202
|
-
|
|
288
|
+
|
|
289
|
+
raise InvalidWordNameError.new(
|
|
290
|
+
@input_string,
|
|
291
|
+
location: get_token_location,
|
|
292
|
+
note: "Got EOS in START_MEMO"
|
|
293
|
+
)
|
|
203
294
|
end
|
|
204
295
|
|
|
205
296
|
def gather_definition_name
|
|
206
297
|
while @input_pos < @input_string.length
|
|
207
298
|
char = @input_string[@input_pos]
|
|
208
299
|
advance_position(1)
|
|
209
|
-
break if
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
300
|
+
break if whitespace?(char)
|
|
301
|
+
|
|
302
|
+
if quote?(char)
|
|
303
|
+
raise InvalidWordNameError.new(
|
|
304
|
+
@input_string,
|
|
305
|
+
location: get_token_location,
|
|
306
|
+
note: "Definition names can't have quotes in them"
|
|
307
|
+
)
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
if ["[", "]", "{", "}"].include?(char)
|
|
311
|
+
raise InvalidWordNameError.new(
|
|
312
|
+
@input_string,
|
|
313
|
+
location: get_token_location,
|
|
314
|
+
note: "Definition names can't have '#{char}' in them"
|
|
315
|
+
)
|
|
216
316
|
end
|
|
317
|
+
|
|
318
|
+
@token_string += char
|
|
217
319
|
end
|
|
218
320
|
end
|
|
219
321
|
|
|
220
|
-
# @return [Token]
|
|
221
322
|
def transition_from_GATHER_DEFINITION_NAME
|
|
222
323
|
note_start_token
|
|
223
324
|
gather_definition_name
|
|
224
325
|
Token.new(TokenType::START_DEF, @token_string, get_token_location)
|
|
225
326
|
end
|
|
226
327
|
|
|
227
|
-
# @return [Token]
|
|
228
328
|
def transition_from_GATHER_MEMO_NAME
|
|
229
329
|
note_start_token
|
|
230
330
|
gather_definition_name
|
|
231
331
|
Token.new(TokenType::START_MEMO, @token_string, get_token_location)
|
|
232
332
|
end
|
|
233
333
|
|
|
234
|
-
# @return [Token]
|
|
235
334
|
def transition_from_GATHER_MODULE
|
|
236
335
|
note_start_token
|
|
237
336
|
while @input_pos < @input_string.length
|
|
238
337
|
char = @input_string[@input_pos]
|
|
239
338
|
advance_position(1)
|
|
240
|
-
|
|
241
|
-
|
|
339
|
+
if whitespace?(char)
|
|
340
|
+
break
|
|
341
|
+
elsif char == "}"
|
|
242
342
|
advance_position(-1)
|
|
243
343
|
break
|
|
244
344
|
else
|
|
@@ -248,50 +348,70 @@ module Forthic
|
|
|
248
348
|
Token.new(TokenType::START_MODULE, @token_string, get_token_location)
|
|
249
349
|
end
|
|
250
350
|
|
|
251
|
-
# @param [String] delim
|
|
252
|
-
# @return [Token]
|
|
253
351
|
def transition_from_GATHER_TRIPLE_QUOTE_STRING(delim)
|
|
254
352
|
note_start_token
|
|
255
353
|
string_delimiter = delim
|
|
354
|
+
@string_delta = StringDelta.new(start: @input_pos, end: @input_pos)
|
|
256
355
|
|
|
257
356
|
while @input_pos < @input_string.length
|
|
258
357
|
char = @input_string[@input_pos]
|
|
259
|
-
if char == string_delimiter &&
|
|
358
|
+
if char == string_delimiter && triple_quote?(@input_pos, char)
|
|
359
|
+
# Check if this triple quote is followed by at least one more quote (greedy mode trigger)
|
|
360
|
+
if @input_pos + 3 < @input_string.length && @input_string[@input_pos + 3] == string_delimiter
|
|
361
|
+
# Greedy mode: include this quote as content and continue looking for the end
|
|
362
|
+
advance_position(1) # Advance by 1 to catch overlapping sequences
|
|
363
|
+
@token_string += string_delimiter
|
|
364
|
+
@string_delta.end = @input_pos
|
|
365
|
+
next
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
# Normal behavior: close at first triple quote
|
|
260
369
|
advance_position(3)
|
|
261
|
-
|
|
370
|
+
token = Token.new(TokenType::STRING, @token_string, get_token_location)
|
|
371
|
+
@string_delta = nil
|
|
372
|
+
return token
|
|
262
373
|
else
|
|
263
374
|
advance_position(1)
|
|
264
375
|
@token_string += char
|
|
376
|
+
@string_delta.end = @input_pos
|
|
265
377
|
end
|
|
266
378
|
end
|
|
267
|
-
|
|
379
|
+
|
|
380
|
+
return nil if @streaming
|
|
381
|
+
|
|
382
|
+
raise UnterminatedStringError.new(@input_string, location: get_token_location)
|
|
268
383
|
end
|
|
269
384
|
|
|
270
|
-
# @param [String] delim
|
|
271
|
-
# @return [Token]
|
|
272
385
|
def transition_from_GATHER_STRING(delim)
|
|
273
386
|
note_start_token
|
|
274
387
|
string_delimiter = delim
|
|
388
|
+
@string_delta = StringDelta.new(start: @input_pos, end: @input_pos)
|
|
275
389
|
|
|
276
390
|
while @input_pos < @input_string.length
|
|
277
391
|
char = @input_string[@input_pos]
|
|
278
392
|
advance_position(1)
|
|
279
393
|
if char == string_delimiter
|
|
280
|
-
|
|
394
|
+
token = Token.new(TokenType::STRING, @token_string, get_token_location)
|
|
395
|
+
@string_delta = nil
|
|
396
|
+
return token
|
|
281
397
|
else
|
|
282
398
|
@token_string += char
|
|
399
|
+
@string_delta.end = @input_pos
|
|
283
400
|
end
|
|
284
401
|
end
|
|
285
|
-
|
|
402
|
+
|
|
403
|
+
return nil if @streaming
|
|
404
|
+
|
|
405
|
+
raise UnterminatedStringError.new(@input_string, location: get_token_location)
|
|
286
406
|
end
|
|
287
407
|
|
|
288
|
-
# @return [Token]
|
|
289
408
|
def transition_from_GATHER_WORD
|
|
290
409
|
note_start_token
|
|
291
410
|
while @input_pos < @input_string.length
|
|
292
411
|
char = @input_string[@input_pos]
|
|
293
412
|
advance_position(1)
|
|
294
|
-
break if
|
|
413
|
+
break if whitespace?(char)
|
|
414
|
+
|
|
295
415
|
if [";", "[", "]", "{", "}", "#"].include?(char)
|
|
296
416
|
advance_position(-1)
|
|
297
417
|
break
|
|
@@ -301,5 +421,32 @@ module Forthic
|
|
|
301
421
|
end
|
|
302
422
|
Token.new(TokenType::WORD, @token_string, get_token_location)
|
|
303
423
|
end
|
|
424
|
+
|
|
425
|
+
def transition_from_GATHER_DOT_SYMBOL
|
|
426
|
+
note_start_token
|
|
427
|
+
full_token_string = ""
|
|
428
|
+
while @input_pos < @input_string.length
|
|
429
|
+
char = @input_string[@input_pos]
|
|
430
|
+
advance_position(1)
|
|
431
|
+
break if whitespace?(char)
|
|
432
|
+
|
|
433
|
+
if [";", "[", "]", "{", "}", "#"].include?(char)
|
|
434
|
+
advance_position(-1)
|
|
435
|
+
break
|
|
436
|
+
else
|
|
437
|
+
full_token_string += char
|
|
438
|
+
@token_string += char
|
|
439
|
+
end
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
# If dot symbol has no characters after the dot, treat it as a word
|
|
443
|
+
if full_token_string.length < 2 # "." + at least 1 char = 2 minimum
|
|
444
|
+
return Token.new(TokenType::WORD, full_token_string, get_token_location)
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
# For DOT_SYMBOL, return the string without the dot prefix
|
|
448
|
+
symbol_without_dot = full_token_string[1..-1]
|
|
449
|
+
Token.new(TokenType::DOT_SYMBOL, symbol_without_dot, get_token_location)
|
|
450
|
+
end
|
|
304
451
|
end
|
|
305
452
|
end
|