grongigo 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/MIT +21 -0
  3. data/README.md +266 -0
  4. data/Rakefile +8 -0
  5. data/bin/grongigo +157 -0
  6. data/examples/calc.grg +17 -0
  7. data/examples/factorial.grg +21 -0
  8. data/examples/fizzbuzz.grg +86 -0
  9. data/examples/hello.grg +5 -0
  10. data/grongigo.gemspec +30 -0
  11. data/lib/grongigo/ast/assign_expr.rb +18 -0
  12. data/lib/grongigo/ast/binary_expr.rb +19 -0
  13. data/lib/grongigo/ast/block_stmt.rb +17 -0
  14. data/lib/grongigo/ast/break_stmt.rb +14 -0
  15. data/lib/grongigo/ast/call_expr.rb +18 -0
  16. data/lib/grongigo/ast/case_clause.rb +18 -0
  17. data/lib/grongigo/ast/char_literal.rb +17 -0
  18. data/lib/grongigo/ast/continue_stmt.rb +14 -0
  19. data/lib/grongigo/ast/expr_stmt.rb +17 -0
  20. data/lib/grongigo/ast/for_stmt.rb +20 -0
  21. data/lib/grongigo/ast/function_decl.rb +20 -0
  22. data/lib/grongigo/ast/identifier.rb +17 -0
  23. data/lib/grongigo/ast/if_stmt.rb +19 -0
  24. data/lib/grongigo/ast/index_expr.rb +18 -0
  25. data/lib/grongigo/ast/node.rb +15 -0
  26. data/lib/grongigo/ast/number_literal.rb +17 -0
  27. data/lib/grongigo/ast/parameter.rb +18 -0
  28. data/lib/grongigo/ast/program.rb +17 -0
  29. data/lib/grongigo/ast/return_stmt.rb +17 -0
  30. data/lib/grongigo/ast/string_literal.rb +17 -0
  31. data/lib/grongigo/ast/switch_stmt.rb +19 -0
  32. data/lib/grongigo/ast/unary_expr.rb +19 -0
  33. data/lib/grongigo/ast/var_decl.rb +19 -0
  34. data/lib/grongigo/ast/while_stmt.rb +18 -0
  35. data/lib/grongigo/ast.rb +30 -0
  36. data/lib/grongigo/codegen.rb +357 -0
  37. data/lib/grongigo/compiler.rb +156 -0
  38. data/lib/grongigo/constants.rb +129 -0
  39. data/lib/grongigo/jp2grg.rb +117 -0
  40. data/lib/grongigo/lexer.rb +349 -0
  41. data/lib/grongigo/parse_error.rb +13 -0
  42. data/lib/grongigo/parser.rb +572 -0
  43. data/lib/grongigo/token.rb +23 -0
  44. data/lib/grongigo.rb +12 -0
  45. metadata +90 -0
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Grongigo
4
+ # Utility: Convert Japanese to Grongigo
5
+ class Jp2Grg
6
+ CONVERSION_TABLE = {
7
+ # Seion (unvoiced)
8
+ 'あ' => 'ガ', 'い' => 'ギ', 'う' => 'グ', 'え' => 'ゲ', 'お' => 'ゴ',
9
+ 'か' => 'バ', 'き' => 'ビ', 'く' => 'ブ', 'け' => 'ベ', 'こ' => 'ボ',
10
+ 'さ' => 'ガ', 'し' => 'ギ', 'す' => 'グ', 'せ' => 'ゲ', 'そ' => 'ゴ',
11
+ 'た' => 'ダ', 'ち' => 'ヂ', 'つ' => 'ヅ', 'て' => 'デ', 'と' => 'ド',
12
+ 'な' => 'バ', 'に' => 'ビ', 'ぬ' => 'ブ', 'ね' => 'ベ', 'の' => 'ボ',
13
+ 'は' => 'ザ', 'ひ' => 'ジ', 'ふ' => 'ズ', 'へ' => 'ゼ', 'ほ' => 'ゾ',
14
+ 'ま' => 'ラ', 'み' => 'リ', 'む' => 'ル', 'め' => 'レ', 'も' => 'ロ',
15
+ 'や' => 'ジャ', 'ゆ' => 'ジュ', 'よ' => 'ジョ',
16
+ 'ら' => 'サ', 'り' => 'シ', 'る' => 'ス', 'れ' => 'セ', 'ろ' => 'ソ',
17
+ 'わ' => 'パ', 'を' => 'ゾ', 'ん' => 'ン',
18
+ # Dakuon (voiced)
19
+ 'が' => 'ガ', 'ぎ' => 'ギ', 'ぐ' => 'グ', 'げ' => 'ゲ', 'ご' => 'ゴ',
20
+ 'ざ' => 'ザ', 'じ' => 'ジ', 'ず' => 'ズ', 'ぜ' => 'ゼ', 'ぞ' => 'ゾ',
21
+ 'だ' => 'ザ', 'ぢ' => 'ジ', 'づ' => 'ズ', 'で' => 'ゼ', 'ど' => 'ゾ',
22
+ 'ば' => 'ダ', 'び' => 'ヂ', 'ぶ' => 'ヅ', 'べ' => 'デ', 'ぼ' => 'ド',
23
+ 'ぱ' => 'マ', 'ぴ' => 'ミ', 'ぷ' => 'ム', 'ぺ' => 'メ', 'ぽ' => 'モ',
24
+ # Small letters
25
+ 'ぁ' => 'ァ', 'ぃ' => 'ィ', 'ぅ' => 'ゥ', 'ぇ' => 'ェ', 'ぉ' => 'ォ',
26
+ 'ゃ' => 'ャ', 'ゅ' => 'ュ', 'ょ' => 'ョ',
27
+ 'っ' => 'ッ',
28
+ # Katakana (direct conversion)
29
+ 'ア' => 'ガ', 'イ' => 'ギ', 'ウ' => 'グ', 'エ' => 'ゲ', 'オ' => 'ゴ',
30
+ 'カ' => 'バ', 'キ' => 'ビ', 'ク' => 'ブ', 'ケ' => 'ベ', 'コ' => 'ボ',
31
+ 'サ' => 'ガ', 'シ' => 'ギ', 'ス' => 'グ', 'セ' => 'ゲ', 'ソ' => 'ゴ',
32
+ 'タ' => 'ダ', 'チ' => 'ヂ', 'ツ' => 'ヅ', 'テ' => 'デ', 'ト' => 'ド',
33
+ 'ナ' => 'バ', 'ニ' => 'ビ', 'ヌ' => 'ブ', 'ネ' => 'ベ', 'ノ' => 'ボ',
34
+ 'ハ' => 'ザ', 'ヒ' => 'ジ', 'フ' => 'ズ', 'ヘ' => 'ゼ', 'ホ' => 'ゾ',
35
+ 'マ' => 'ラ', 'ミ' => 'リ', 'ム' => 'ル', 'メ' => 'レ', 'モ' => 'ロ',
36
+ 'ヤ' => 'ジャ', 'ユ' => 'ジュ', 'ヨ' => 'ジョ',
37
+ 'ラ' => 'サ', 'リ' => 'シ', 'ル' => 'ス', 'レ' => 'セ', 'ロ' => 'ソ',
38
+ 'ワ' => 'パ', 'ヲ' => 'ゾ', 'ン' => 'ン',
39
+ 'ガ' => 'ガ', 'ギ' => 'ギ', 'グ' => 'グ', 'ゲ' => 'ゲ', 'ゴ' => 'ゴ',
40
+ 'ザ' => 'ザ', 'ジ' => 'ジ', 'ズ' => 'ズ', 'ゼ' => 'ゼ', 'ゾ' => 'ゾ',
41
+ 'ダ' => 'ザ', 'ヂ' => 'ジ', 'ヅ' => 'ズ', 'デ' => 'ゼ', 'ド' => 'ゾ',
42
+ 'バ' => 'ダ', 'ビ' => 'ヂ', 'ブ' => 'ヅ', 'ベ' => 'デ', 'ボ' => 'ド',
43
+ 'パ' => 'マ', 'ピ' => 'ミ', 'プ' => 'ム', 'ペ' => 'メ', 'ポ' => 'モ'
44
+ }.freeze
45
+
46
+ def self.convert(text)
47
+ result = ''
48
+ chars = text.chars
49
+ i = 0
50
+
51
+ while i < chars.length
52
+ # Check if current position starts with a proper noun
53
+ matched_noun = nil
54
+ PROPER_NOUNS.each do |noun|
55
+ if text[i, noun.length] == noun
56
+ matched_noun = noun
57
+ break
58
+ end
59
+ end
60
+
61
+ if matched_noun
62
+ # Skip proper nouns (don't convert)
63
+ result += matched_noun
64
+ i += matched_noun.length
65
+ else
66
+ char = chars[i]
67
+ result += if CONVERSION_TABLE.key?(char)
68
+ CONVERSION_TABLE[char]
69
+ else
70
+ char
71
+ end
72
+ i += 1
73
+ end
74
+ end
75
+
76
+ result
77
+ end
78
+
79
+ # Convert decimal to base-9 Grongigo number
80
+ def self.num2grg(num)
81
+ return 'ゼゼソ' if num == 0
82
+
83
+ digits = %w[ゼゼソ パパン ドググ グシギ ズゴゴ ズガギ ギブグ ゲズン ゲギド]
84
+
85
+ # Convert to base-9
86
+ base9_digits = []
87
+ n = num
88
+ while n > 0
89
+ base9_digits.unshift(n % 9)
90
+ n /= 9
91
+ end
92
+
93
+ # Convert to Grongigo representation
94
+ return digits[base9_digits[0]] if base9_digits.length == 1
95
+
96
+ # Multiple digits case
97
+ result_parts = []
98
+ base9_digits.reverse.each_with_index do |digit, power|
99
+ next if digit == 0
100
+
101
+ if power == 0
102
+ result_parts.unshift(digits[digit])
103
+ else
104
+ # バギン (9) to the power × digit
105
+ multiplier = 'バギン' + ('グバギン' * (power - 1))
106
+ if digit == 1
107
+ result_parts.unshift(multiplier)
108
+ else
109
+ result_parts.unshift("#{multiplier}グ#{digits[digit]}")
110
+ end
111
+ end
112
+ end
113
+
114
+ result_parts.join('ド')
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,349 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'constants'
4
+ require_relative 'token'
5
+
6
+ module Grongigo
7
+ # Lexical analyzer
8
+ class Lexer
9
+ # Token types
10
+ TOKEN_TYPES = %i[
11
+ type_keyword
12
+ control_keyword
13
+ other_keyword
14
+ operator
15
+ number
16
+ identifier
17
+ string_literal
18
+ char_literal
19
+ open_brace
20
+ close_brace
21
+ open_paren
22
+ close_paren
23
+ open_bracket
24
+ close_bracket
25
+ comma
26
+ semicolon
27
+ colon
28
+ newline
29
+ eof
30
+ ].freeze
31
+
32
+ def initialize(source)
33
+ @source = source
34
+ @pos = 0
35
+ @line = 1
36
+ @column = 1
37
+ @tokens = []
38
+ end
39
+
40
+ def tokenize
41
+ @tokens = []
42
+
43
+ until eof?
44
+ skip_whitespace_and_comments
45
+ break if eof?
46
+
47
+ token = next_token
48
+ @tokens << token if token
49
+ end
50
+
51
+ @tokens << Token.new(:eof, nil, @line, @column)
52
+ @tokens
53
+ end
54
+
55
+ private
56
+
57
+ def eof?
58
+ @pos >= @source.length
59
+ end
60
+
61
+ def current_char
62
+ @source[@pos]
63
+ end
64
+
65
+ def peek(n = 1)
66
+ @source[@pos, n]
67
+ end
68
+
69
+ def advance(n = 1)
70
+ n.times do
71
+ if current_char == "\n"
72
+ @line += 1
73
+ @column = 1
74
+ else
75
+ @column += 1
76
+ end
77
+ @pos += 1
78
+ end
79
+ end
80
+
81
+ def skip_whitespace_and_comments
82
+ loop do
83
+ # Skip whitespace (except newlines)
84
+ advance while !eof? && current_char =~ /[ \t\r]/
85
+
86
+ # Newline
87
+ if !eof? && current_char == "\n"
88
+ advance
89
+ next
90
+ end
91
+
92
+ # Comment: ゴレン to end of line (custom definition for comments)
93
+ # Simple line comment starting with "ゴゴ" (equivalent to //)
94
+ if peek(2) == 'ゴゴ'
95
+ advance(2)
96
+ advance until eof? || current_char == "\n"
97
+ next
98
+ end
99
+
100
+ # Block comment (equivalent to /* */): "ゴビ" to "ビゴ"
101
+ if peek(2) == 'ゴビ'
102
+ advance(2)
103
+ until eof?
104
+ if peek(2) == 'ビゴ'
105
+ advance(2)
106
+ break
107
+ end
108
+ advance
109
+ end
110
+ next
111
+ end
112
+
113
+ break
114
+ end
115
+ end
116
+
117
+ def next_token
118
+ start_line = @line
119
+ start_column = @column
120
+
121
+ # String literal 「」
122
+ return scan_string_literal(start_line, start_column) if current_char == '「'
123
+
124
+ # Character literal 『』
125
+ return scan_char_literal(start_line, start_column) if current_char == '『'
126
+
127
+ # Brackets and delimiters
128
+ case current_char
129
+ when '(', '('
130
+ advance
131
+ return Token.new(:open_paren, '(', start_line, start_column)
132
+ when ')', ')'
133
+ advance
134
+ return Token.new(:close_paren, ')', start_line, start_column)
135
+ when '[', '['
136
+ advance
137
+ return Token.new(:open_bracket, '[', start_line, start_column)
138
+ when ']', ']'
139
+ advance
140
+ return Token.new(:close_bracket, ']', start_line, start_column)
141
+ when '、', ','
142
+ advance
143
+ return Token.new(:comma, ',', start_line, start_column)
144
+ when '。'
145
+ advance
146
+ return Token.new(:semicolon, ';', start_line, start_column)
147
+ when ':', ':'
148
+ advance
149
+ return Token.new(:colon, ':', start_line, start_column)
150
+ end
151
+
152
+ # Katakana-based identifier/keyword/number
153
+ return scan_katakana_word(start_line, start_column) if current_char =~ /\p{Katakana}/
154
+
155
+ # ASCII characters (allowed as variable names)
156
+ return scan_ascii_identifier(start_line, start_column) if current_char =~ /[a-zA-Z_]/
157
+
158
+ # Digits (decimal literals allowed, converted to base-9 later)
159
+ return scan_decimal_number(start_line, start_column) if current_char =~ /[0-9]/
160
+
161
+ # Unknown character
162
+ char = current_char
163
+ advance
164
+ Token.new(:unknown, char, start_line, start_column)
165
+ end
166
+
167
+ def scan_string_literal(start_line, start_column)
168
+ advance # Skip opening 「
169
+ value = ''
170
+ until eof? || current_char == '」'
171
+ value += current_char
172
+ advance
173
+ end
174
+ advance if current_char == '」' # Skip closing 」
175
+ Token.new(:string_literal, value, start_line, start_column)
176
+ end
177
+
178
+ def scan_char_literal(start_line, start_column)
179
+ advance # Skip opening 『
180
+ value = ''
181
+ until eof? || current_char == '』'
182
+ value += current_char
183
+ advance
184
+ end
185
+ advance if current_char == '』' # Skip closing 』
186
+ Token.new(:char_literal, value, start_line, start_column)
187
+ end
188
+
189
+ def scan_katakana_word(start_line, start_column)
190
+ word = ''
191
+ while !eof? && current_char =~ /[\p{Katakana}ー]/
192
+ word += current_char
193
+ advance
194
+ end
195
+
196
+ # Classify as keyword, operator, or number
197
+ token_type, token_value = classify_katakana_word(word)
198
+ Token.new(token_type, token_value, start_line, start_column)
199
+ end
200
+
201
+ def classify_katakana_word(word)
202
+ # Block delimiters
203
+ return [:open_brace, '{'] if word == 'ザジレ'
204
+ return [:close_brace, '}'] if word == 'ゴパシ'
205
+
206
+ # Type keywords
207
+ return [:type_keyword, TYPE_KEYWORDS[word]] if TYPE_KEYWORDS.key?(word)
208
+
209
+ # Control keywords
210
+ return [:control_keyword, CONTROL_KEYWORDS[word]] if CONTROL_KEYWORDS.key?(word)
211
+
212
+ # Other keywords
213
+ if OTHER_KEYWORDS.key?(word)
214
+ value = OTHER_KEYWORDS[word]
215
+ return [:other_keyword, value.empty? ? word : value]
216
+ end
217
+
218
+ # Operators
219
+ return [:operator, OPERATORS[word]] if OPERATORS.key?(word)
220
+
221
+ # Try to parse as number
222
+ number = try_parse_number(word)
223
+ return [:number, number] if number
224
+
225
+ # Identifier (variable name, etc.)
226
+ [:identifier, word]
227
+ end
228
+
229
+ def try_parse_number(word)
230
+ # Single digit
231
+ return DIGITS[word] if DIGITS.key?(word)
232
+
233
+ # Parse compound number (e.g., バギンドパパン = 9 + 1 = 10)
234
+ parse_compound_number(word)
235
+ end
236
+
237
+ def parse_compound_number(word)
238
+ return nil if word.empty?
239
+
240
+ # Tokenize the expression
241
+ tokens = tokenize_number_expression(word)
242
+ return nil if tokens.empty?
243
+
244
+ # Evaluate the expression
245
+ evaluate_number_expression(tokens)
246
+ rescue StandardError
247
+ nil
248
+ end
249
+
250
+ def tokenize_number_expression(word)
251
+ tokens = []
252
+ remaining = word
253
+
254
+ until remaining.empty?
255
+ matched = false
256
+
257
+ # Check digits (longest first)
258
+ DIGITS.keys.sort_by { |k| -k.length }.each do |digit_word|
259
+ next unless remaining.start_with?(digit_word)
260
+
261
+ tokens << [:digit, DIGITS[digit_word]]
262
+ remaining = remaining[digit_word.length..]
263
+ matched = true
264
+ break
265
+ end
266
+ next if matched
267
+
268
+ # Check operators
269
+ if remaining.start_with?('ド')
270
+ tokens << [:add, nil]
271
+ remaining = remaining[1..]
272
+ matched = true
273
+ elsif remaining.start_with?('グ')
274
+ tokens << [:multiply, nil]
275
+ remaining = remaining[1..]
276
+ matched = true
277
+ end
278
+
279
+ break unless matched
280
+ end
281
+
282
+ return [] unless remaining.empty?
283
+
284
+ tokens
285
+ end
286
+
287
+ def evaluate_number_expression(tokens)
288
+ return nil if tokens.empty?
289
+
290
+ # Process multiplication first by grouping
291
+ # Example: バギングバギンドパパン = (9*9) + 1 = 82
292
+
293
+ result = 0
294
+ current_product = nil
295
+
296
+ i = 0
297
+ while i < tokens.length
298
+ token = tokens[i]
299
+
300
+ case token[0]
301
+ when :digit
302
+ # Error if digit appears without preceding operator
303
+ return nil unless current_product.nil?
304
+
305
+ current_product = token[1]
306
+ when :multiply
307
+ # Multiply with next digit
308
+ i += 1
309
+ return nil if i >= tokens.length || tokens[i][0] != :digit
310
+
311
+ current_product = (current_product || 1) * tokens[i][1]
312
+ when :add
313
+ # Add current product to result and reset
314
+ result += current_product if current_product
315
+ current_product = nil
316
+ end
317
+
318
+ i += 1
319
+ end
320
+
321
+ result += current_product if current_product
322
+ result
323
+ end
324
+
325
+ def scan_ascii_identifier(start_line, start_column)
326
+ word = ''
327
+ while !eof? && current_char =~ /[a-zA-Z0-9_]/
328
+ word += current_char
329
+ advance
330
+ end
331
+ Token.new(:identifier, word, start_line, start_column)
332
+ end
333
+
334
+ def scan_decimal_number(start_line, start_column)
335
+ word = ''
336
+ while !eof? && current_char =~ /[0-9.]/
337
+ word += current_char
338
+ advance
339
+ end
340
+
341
+ # Determine if integer or floating point
342
+ if word.include?('.')
343
+ Token.new(:number, word.to_f, start_line, start_column)
344
+ else
345
+ Token.new(:number, word.to_i, start_line, start_column)
346
+ end
347
+ end
348
+ end
349
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Grongigo
4
+ # Parse error
5
+ class ParseError < StandardError
6
+ attr_reader :token
7
+
8
+ def initialize(message, token = nil)
9
+ super(message)
10
+ @token = token
11
+ end
12
+ end
13
+ end