koara 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ class Token
2
+ attr_accessor :kind
3
+ attr_accessor :begin_line
4
+ attr_accessor :begin_column
5
+ attr_accessor :end_line
6
+ attr_accessor :end_column
7
+ attr_accessor :image
8
+ attr_accessor :next
9
+
10
+ def initialize(kind = 0, begin_line = nil, begin_column = nil, end_line = nil, end_column = nil, image = nil)
11
+ @kind = kind
12
+ @begin_line = begin_line
13
+ @begin_column = begin_column
14
+ @end_line = end_line
15
+ @end_column = end_column
16
+ @image = image
17
+ end
18
+
19
+ end
@@ -0,0 +1,349 @@
1
+ require_relative 'token'
2
+
3
+
4
+ class TokenManager
5
+
6
+ EOF = 0
7
+ ASTERISK = 1
8
+ BACKSLASH = 2
9
+ BACKTICK = 3
10
+ CHAR_SEQUENCE = 4
11
+ COLON = 5
12
+ DASH = 6
13
+ DIGITS = 7
14
+ DOT = 8
15
+ EOL = 9
16
+ EQ = 10
17
+ ESCAPED_CHAR = 11
18
+ GT = 12
19
+ IMAGE_LABEL = 13
20
+ LBRACK = 14
21
+ LPAREN = 15
22
+ LT = 16
23
+ RBRACK = 17
24
+ RPAREN = 18
25
+ SPACE = 19
26
+ TAB = 20
27
+ UNDERSCORE = 21
28
+
29
+ def initialize(stream)
30
+ @jj_rounds = Array.new(8, 0)
31
+ @jj_state_set = Array.new(16, 0)
32
+ @jj_next_states = [2, 3, 5]
33
+ @cs = stream
34
+ @round = 0
35
+ end
36
+
37
+ def get_next_token
38
+ # begin
39
+ while true
40
+ begin
41
+ @cur_char = @cs.begin_token
42
+ rescue
43
+ @matched_kind = 0
44
+ @matched_pos = -1
45
+ return fill_token
46
+ end
47
+
48
+ @matched_kind = 2147483647
49
+ @matched_pos = 0
50
+ cur_pos = move_string_literal_dfa0
51
+
52
+ if @matched_kind != 2147483647
53
+ if (@matched_pos + 1) < cur_pos
54
+ @cs.backup(cur_pos - @matched_pos - 1)
55
+ end
56
+ return fill_token
57
+ end
58
+ end
59
+ # rescue => err
60
+ # return nil
61
+ # end
62
+ end
63
+
64
+ def fill_token
65
+ Token.new(@matched_kind, @cs.begin_line, @cs.begin_column, @cs.end_line, @cs.end_column, @cs.image)
66
+ end
67
+
68
+ def move_string_literal_dfa0
69
+ case @cur_char.ord
70
+ when 9
71
+ return start_nfa_with_states(0, TAB, 8)
72
+ when 32
73
+ return start_nfa_with_states(0, SPACE, 8)
74
+ when 40
75
+ return stop_at_pos(0, LPAREN)
76
+ when 41
77
+ return stop_at_pos(0, RPAREN)
78
+ when 42
79
+ return stop_at_pos(0, ASTERISK)
80
+ when 45
81
+ return stop_at_pos(0, DASH)
82
+ when 46
83
+ return stop_at_pos(0, DOT)
84
+ when 58
85
+ return stop_at_pos(0, COLON)
86
+ when 60
87
+ return stop_at_pos(0, LT)
88
+ when 61
89
+ return stop_at_pos(0, EQ)
90
+ when 62
91
+ return stop_at_pos(0, GT)
92
+ when 73
93
+ return move_string_literal_dfa1(0x2000)
94
+ when 91
95
+ return stop_at_pos(0, LBRACK)
96
+ when 92
97
+ return start_nfa_with_states(0, BACKSLASH, 7)
98
+ when 93
99
+ return stop_at_pos(0, RBRACK)
100
+ when 95
101
+ return stop_at_pos(0, UNDERSCORE)
102
+ when 96
103
+ return stop_at_pos(0, BACKTICK)
104
+ when 105
105
+ return move_string_literal_dfa1(0x2000)
106
+ else
107
+ return move_nfa(6, 0)
108
+ end
109
+ end
110
+
111
+ def start_nfa_with_states(pos, kind, state)
112
+ @matched_kind = kind
113
+ @matched_pos = pos
114
+ begin
115
+ @cur_char = @cs.read_char
116
+ rescue
117
+ return pos + 1
118
+ end
119
+ move_nfa(state, pos + 1)
120
+ end
121
+
122
+ def stop_at_pos(pos, kind)
123
+ @matched_kind = kind
124
+ @matched_pos = pos
125
+ pos + 1
126
+ end
127
+
128
+ def move_string_literal_dfa1(active)
129
+ @cur_char = @cs.read_char
130
+ if @cur_char.ord == 77 || @cur_char.ord == 109
131
+ return move_string_literal_dfa2(active, 0x2000)
132
+ end
133
+ start_nfa(0, active)
134
+ end
135
+
136
+ def move_string_literal_dfa2(old, active)
137
+ @cur_char = @cs.read_char
138
+ if @cur_char.ord == 65 || @cur_char.ord == 97
139
+ return move_string_literal_dfa3(active, 0x2000)
140
+ end
141
+ start_nfa(1, active)
142
+ end
143
+
144
+ def move_string_literal_dfa3(old, active)
145
+ @cur_char = @cs.read_char
146
+ if @cur_char.ord == 71 || @cur_char.ord == 103
147
+ return move_string_literal_dfa4(active, 0x2000)
148
+ end
149
+ start_nfa(2, active)
150
+ end
151
+
152
+ def move_string_literal_dfa4(old, active)
153
+ @cur_char = @cs.read_char
154
+ if @cur_char.ord == 69 || @cur_char.ord == 101
155
+ return move_string_literal_dfa5(active, 0x2000)
156
+ end
157
+ start_nfa(3, active)
158
+ end
159
+
160
+ def move_string_literal_dfa5(old, active)
161
+ @cur_char = @cs.read_char()
162
+ if @cur_char.ord == 58 && ((active & 0x2000) != 0)
163
+ return stop_at_pos(5, 13)
164
+ end
165
+ start_nfa(4, active)
166
+ end
167
+
168
+ def start_nfa(pos, active)
169
+ move_nfa(stop_string_literal_dfa(pos, active), pos + 1)
170
+ end
171
+
172
+ def move_nfa(start_state, cur_pos)
173
+ starts_at = 0
174
+ @jj_new_state_cnt = 8
175
+ i = 1
176
+ @jj_state_set[0] = start_state
177
+ kind = 0x7fffffff
178
+
179
+ while true
180
+ if (@round += 1) == 0x7fffffff
181
+ @round = 0x80000001
182
+ end
183
+ if @cur_char.ord < 64
184
+ l = 1 << @cur_char.ord
185
+ loop do
186
+ i-=1
187
+
188
+ case @jj_state_set[i]
189
+ when 6
190
+ if (0x880098feffffd9ff & l) != 0
191
+ kind = 4 if kind > 4
192
+ check_n_add(0)
193
+ elsif ((0x3ff000000000000 & l) != 0)
194
+ kind = 7 if kind > 7
195
+ check_n_add(1)
196
+ elsif (0x2400 & l) != 0
197
+ kind = 9 if kind > 9
198
+ elsif (0x100000200 & l) != 0
199
+ check_n_add_states(0, 2)
200
+ end
201
+ if @cur_char.ord == 13
202
+ @jj_state_set[@jj_new_state_cnt+=1] = 4
203
+ end
204
+ when 8
205
+ if ((0x2400 & l) != 0)
206
+ kind = 9 if kind > 9
207
+ elsif (0x100000200 & l) != 0
208
+ check_n_add_states(0, 2)
209
+ end
210
+ if @cur_char.ord == 13
211
+ @jj_state_set[@jj_new_state_cnt+=1] = 4
212
+ end
213
+ when 0
214
+ if (0x880098feffffd9ff & l) != 0
215
+ kind = 4
216
+ check_n_add(0)
217
+ end
218
+ when 1
219
+ if (0x3ff000000000000 & l) != 0
220
+ kind = 7 if kind > 7
221
+ check_n_add(1)
222
+ end
223
+ when 2
224
+ if (0x100000200 & l) != 0
225
+ check_n_add_states(0, 2)
226
+ end
227
+ when 3
228
+ if (0x2400 & l) != 0 && kind > 9
229
+ kind = 9
230
+ end
231
+ when 4
232
+ if @cur_char.ord == 10 && kind > 9
233
+ kind = 9
234
+ end
235
+ when 5
236
+ if @cur_char.ord == 13
237
+ @jj_state_set[@jj_new_state_cnt+=1] = 4
238
+ end
239
+ when 7
240
+ if (0x77ff670000000000 & l) != 0 && kind > 11
241
+ kind = 11
242
+ end
243
+ end
244
+ break if (i == starts_at)
245
+ end
246
+ elsif @cur_char.ord < 128
247
+ l = 1 << (@cur_char.ord & 077)
248
+ loop do
249
+ i -= 1
250
+ case @jj_state_set[i]
251
+ when 6
252
+ if l != 0
253
+ kind = 4 if kind > 4
254
+ check_n_add(0)
255
+ elsif @cur_char == 92
256
+ @jj_state_set[@jj_new_state_cnt+=1] = 7
257
+ end
258
+ when 0
259
+ if (0xfffffffe47ffffff & l) != 0
260
+ kind = 4
261
+ check_n_add(0)
262
+ end
263
+ when 7
264
+ if (0x1b8000000 & l) != 0 && kind > 11
265
+ kind = 11
266
+ end
267
+ end
268
+ break if (i == starts_at)
269
+ end
270
+ else
271
+ loop do
272
+ i-=1
273
+ case @jj_state_set[i]
274
+ when 6
275
+ kind = 4 if kind > 4
276
+ check_n_add(0)
277
+ when 0
278
+ kind = 4 if kind > 4
279
+ check_n_add(0)
280
+ end
281
+ break if i == starts_at
282
+ end
283
+ end
284
+ if kind != 0x7fffffff
285
+ @matched_kind = kind
286
+ @matched_pos = cur_pos
287
+ kind = 0x7fffffff
288
+ end
289
+ cur_pos += 1
290
+
291
+ if (i = @jj_new_state_cnt) == (starts_at = 8 - (@jj_new_state_cnt = starts_at))
292
+ return cur_pos
293
+ end
294
+
295
+ begin
296
+ @cur_char = @cs.read_char
297
+ rescue => error
298
+ return cur_pos
299
+ end
300
+ end
301
+ end
302
+
303
+ def check_n_add_states(start, ending)
304
+ loop do
305
+ check_n_add(@jj_next_states[start])
306
+ break if start == ending
307
+ start += 1
308
+ end
309
+ end
310
+
311
+ def check_n_add(state)
312
+ if @jj_rounds[state] != @round
313
+ @jj_state_set[@jj_new_state_cnt] = state
314
+ @jj_new_state_cnt += 1
315
+ @jj_rounds[state] = @round
316
+ end
317
+ end
318
+
319
+ def stop_string_literal_dfa(pos, active)
320
+ if pos == 0
321
+ if (active & 0x2000) != 0
322
+ @matched_kind = 4
323
+ return 0
324
+ elsif (active & 0x180000) != 0
325
+ return 8
326
+ elsif (active & 0x4) != 0
327
+ return 7
328
+ end
329
+ elsif pos == 1 && (active & 0x2000) != 0
330
+ @matched_kind = 4
331
+ @matched_pos = 1
332
+ return 0
333
+ elsif pos == 2 && (active & 0x2000) != 0
334
+ @matched_kind = 4
335
+ @matched_pos = 2
336
+ return 0
337
+ elsif pos == 3 && (active & 0x2000) != 0
338
+ @matched_kind = 4
339
+ @matched_pos = 3
340
+ return 0
341
+ elsif pos == 4 && (active & 0x2000) != 0
342
+ @matched_kind = 4
343
+ @matched_pos = 4
344
+ return 0
345
+ end
346
+ -1
347
+ end
348
+
349
+ end
@@ -0,0 +1,46 @@
1
+ class TreeState
2
+ def initialize
3
+ @nodes = []
4
+ @marks = []
5
+ @nodes_on_stack = 0
6
+ @current_mark = 0
7
+ end
8
+
9
+ def open_scope
10
+ @marks.push(@current_mark)
11
+ @current_mark = @nodes_on_stack
12
+ end
13
+
14
+ def close_scope(n)
15
+ a = node_arity
16
+ @current_mark = @marks.delete_at(@marks.size - 1)
17
+ while a > 0
18
+ a -= 1
19
+ c = pop_node
20
+ c.parent = n
21
+ n.add(c, a)
22
+ end
23
+ push_node(n)
24
+ end
25
+
26
+ def add_single_value(n, t)
27
+ open_scope
28
+ n.value = t.image
29
+ close_scope(n)
30
+ end
31
+
32
+ def node_arity
33
+ @nodes_on_stack - @current_mark
34
+ end
35
+
36
+ def pop_node
37
+ @nodes_on_stack -= 1
38
+ @nodes.delete_at(@nodes.size - 1)
39
+ end
40
+
41
+ def push_node(n)
42
+ @nodes.push(n)
43
+ @nodes_on_stack += 1
44
+ end
45
+
46
+ end
@@ -0,0 +1,76 @@
1
+ require 'koara'
2
+ require 'minitest/autorun'
3
+ require "stringio"
4
+
5
+ class CharStreamTest < MiniTest::Unit::TestCase
6
+ def test_begin_token
7
+ cs = CharStream.new(StringReader.new('abcd'))
8
+ assert_equal('a', cs.begin_token)
9
+ assert_equal(1, cs.begin_column)
10
+ assert_equal(1, cs.begin_line)
11
+ assert_equal(1, cs.end_column)
12
+ assert_equal(1, cs.end_line)
13
+ end
14
+
15
+ def test_read_char
16
+ cs = CharStream.new(StringReader.new('abcd'))
17
+ assert_equal('a', cs.read_char())
18
+ assert_equal('b', cs.read_char())
19
+ assert_equal('c', cs.read_char())
20
+ assert_equal('d', cs.read_char())
21
+ end
22
+
23
+ def test_read_char_till_eof
24
+ assert_raises IOError do
25
+ cs = CharStream.new(StringReader.new('abcd'))
26
+ cs.read_char
27
+ cs.read_char
28
+ cs.read_char
29
+ cs.read_char
30
+ cs.read_char
31
+ end
32
+ end
33
+
34
+ def test_get_image
35
+ cs = CharStream.new(StringReader.new('abcd'))
36
+ cs.read_char
37
+ cs.read_char
38
+ assert_equal('ab', cs.image)
39
+ end
40
+
41
+ def test_begin_token_with_unicode
42
+ cs = CharStream.new(StringReader.new('ðinæ'))
43
+ assert_equal('ð', cs.begin_token)
44
+ assert_equal(1, cs.begin_column)
45
+ assert_equal(1, cs.begin_line)
46
+ assert_equal(1, cs.end_column)
47
+ assert_equal(1, cs.end_line)
48
+ end
49
+
50
+ def test_read_char_with_unicode
51
+ cs = CharStream.new(StringReader.new('ðinæ'))
52
+ assert_equal('ð', cs.read_char)
53
+ assert_equal('i', cs.read_char)
54
+ assert_equal('n', cs.read_char)
55
+ assert_equal('æ', cs.read_char)
56
+ end
57
+
58
+ def test_read_char_till_eof_with_unicode
59
+ assert_raises IOError do
60
+ cs = CharStream.new(StringReader.new('ðinæ'))
61
+ cs.read_char
62
+ cs.read_char
63
+ cs.read_char
64
+ cs.read_char
65
+ cs.read_char
66
+ end
67
+ end
68
+
69
+ def test_get_image_with_unicode
70
+ cs = CharStream.new(StringReader.new('ðinæ'))
71
+ cs.read_char
72
+ cs.read_char
73
+ assert_equal('ði', cs.image)
74
+ end
75
+
76
+ end