statsailr 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.travis.yml +6 -0
- data/Gemfile +7 -0
- data/HISTORY.md +15 -0
- data/LICENSE.txt +675 -0
- data/README.md +287 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/example/blank.slr +3 -0
- data/example/category.slr +5 -0
- data/example/example_read.slr +10 -0
- data/example/iris.csv +151 -0
- data/example/mtcars.rda +0 -0
- data/example/new_mtcars.csv +33 -0
- data/example/new_mtcars.rda +0 -0
- data/example/plot_reg_example.slr +55 -0
- data/example/scatter.png +0 -0
- data/exe/sailr +54 -0
- data/exe/sailrREPL +75 -0
- data/lib/statsailr.rb +7 -0
- data/lib/statsailr/block_builder/sts_block.rb +167 -0
- data/lib/statsailr/block_builder/sts_block_parse_proc_opts.rb +168 -0
- data/lib/statsailr/block_to_r/proc_setting_support/proc_opt_validator.rb +52 -0
- data/lib/statsailr/block_to_r/proc_setting_support/proc_setting_manager.rb +49 -0
- data/lib/statsailr/block_to_r/proc_setting_support/proc_setting_module.rb +44 -0
- data/lib/statsailr/block_to_r/sts_block_to_r.rb +98 -0
- data/lib/statsailr/block_to_r/sts_lazy_func_gen.rb +236 -0
- data/lib/statsailr/block_to_r/top_stmt/top_stmt_to_r_func.rb +182 -0
- data/lib/statsailr/parser/sts_gram_node.rb +9 -0
- data/lib/statsailr/parser/sts_parse.output +831 -0
- data/lib/statsailr/parser/sts_parse.ry +132 -0
- data/lib/statsailr/parser/sts_parse.tab.rb +682 -0
- data/lib/statsailr/scanner/sample1.sts +37 -0
- data/lib/statsailr/scanner/sts_scanner.rb +433 -0
- data/lib/statsailr/scanner/test_sample1.rb +8 -0
- data/lib/statsailr/sts_build_exec.rb +304 -0
- data/lib/statsailr/sts_controller.rb +66 -0
- data/lib/statsailr/sts_output/output_manager.rb +192 -0
- data/lib/statsailr/sts_runner.rb +17 -0
- data/lib/statsailr/sts_server.rb +85 -0
- data/lib/statsailr/version.rb +3 -0
- data/statsailr.gemspec +32 -0
- metadata +133 -0
@@ -0,0 +1,37 @@
|
|
1
|
+
// comment
|
2
|
+
/*
|
3
|
+
multipe line comments
|
4
|
+
*/
|
5
|
+
|
6
|
+
LOAD builtin = "mtcars"
|
7
|
+
|
8
|
+
DATA new_mtcars set=mtcars
|
9
|
+
if(cyl > 4){
|
10
|
+
powerful = 1
|
11
|
+
}else{
|
12
|
+
powerful = 0
|
13
|
+
}
|
14
|
+
END
|
15
|
+
|
16
|
+
PROC FREQ data=new_mtcars
|
17
|
+
/* Calculate frequencies & show in list format
|
18
|
+
*/
|
19
|
+
tables cyl * powerful // categorize by cyl and powerful
|
20
|
+
END
|
21
|
+
|
22
|
+
/*
|
23
|
+
PROC SAVE data=new_mtcars
|
24
|
+
to "./new_mtcars.RData"
|
25
|
+
END
|
26
|
+
*/
|
27
|
+
|
28
|
+
LOAD csv = "./iris.csv" as = iris sep = "," head = 1
|
29
|
+
|
30
|
+
/*
|
31
|
+
PROC GLM data=iris
|
32
|
+
model Speices ~ Sepal.Length Sepal.Width
|
33
|
+
family binomial
|
34
|
+
END
|
35
|
+
*/
|
36
|
+
|
37
|
+
|
@@ -0,0 +1,433 @@
|
|
1
|
+
require "strscan"
|
2
|
+
|
3
|
+
module STSConstants
|
4
|
+
IDENT_PATTERN = /[a-zA-Z\-_.][0-9a-zA-Z\-_.]*/
|
5
|
+
PROC_INST_PATTERN = /[a-zA-Z\-_.][0-9a-zA-Z\-_.]*/
|
6
|
+
FLOATP_PATTERN = /[-+]?(([1-9][0-9]*)|0)\.[0-9]*/
|
7
|
+
INT_PATTERN = /[-+]?([1-9][0-9]*)|0/
|
8
|
+
SQ_STR_PATTERN = /'(\\'|[^'\n])*'/
|
9
|
+
DQ_STR_PATTERN = /"(\\"|[^"\n])*"/
|
10
|
+
end
|
11
|
+
|
12
|
+
module STSScannerSupport
|
13
|
+
def interpret_escape_sequences(str)
|
14
|
+
# This deals with escape sequences in double quoted string literals
|
15
|
+
# The behavior should be same as libsailr (or datasailr)
|
16
|
+
new_str = ""
|
17
|
+
str_array = str.split(//)
|
18
|
+
idx = 0
|
19
|
+
while( idx < str_array.size) do
|
20
|
+
c = str_array[idx]
|
21
|
+
if(c == "\\")
|
22
|
+
idx = idx + 1
|
23
|
+
c = str_array[idx]
|
24
|
+
raise "Tokenizer error: double quoted string literal should never end with \\" if idx >= str_array.size
|
25
|
+
case c
|
26
|
+
when 't'
|
27
|
+
new_str << "\t"
|
28
|
+
when 'n'
|
29
|
+
new_str << "\n"
|
30
|
+
when 'r'
|
31
|
+
new_str << "\r"
|
32
|
+
when "\\"
|
33
|
+
new_str << "\\"
|
34
|
+
when "\'"
|
35
|
+
new_str << "\'"
|
36
|
+
when "\""
|
37
|
+
new_str << "\""
|
38
|
+
when '?'
|
39
|
+
new_str << '?'
|
40
|
+
else
|
41
|
+
new_str << c
|
42
|
+
end
|
43
|
+
else
|
44
|
+
new_str << c
|
45
|
+
end
|
46
|
+
idx = idx + 1
|
47
|
+
end
|
48
|
+
return new_str
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
class STSScanner
|
53
|
+
include ::STSConstants
|
54
|
+
include ::STSScannerSupport
|
55
|
+
|
56
|
+
# Initialization & Terminating methods
|
57
|
+
|
58
|
+
def initialize( script )
|
59
|
+
@script = script
|
60
|
+
end
|
61
|
+
|
62
|
+
def start()
|
63
|
+
@scanner = StringScanner.new(@script)
|
64
|
+
@scan_state = :TOP
|
65
|
+
end
|
66
|
+
|
67
|
+
def terminate()
|
68
|
+
@scanner.terminate()
|
69
|
+
end
|
70
|
+
|
71
|
+
# Delegate corresponding methods to StringScanner
|
72
|
+
|
73
|
+
def scan(pattern)
|
74
|
+
@scanner.scan(pattern)
|
75
|
+
end
|
76
|
+
|
77
|
+
def scan_until(pattern)
|
78
|
+
@scanner.scan_until(pattern)
|
79
|
+
end
|
80
|
+
|
81
|
+
def skip_until(pattern)
|
82
|
+
@scanner.skip_until(pattern)
|
83
|
+
end
|
84
|
+
|
85
|
+
def matched()
|
86
|
+
@scanner.matched
|
87
|
+
end
|
88
|
+
|
89
|
+
def eos?()
|
90
|
+
@scanner.eos?
|
91
|
+
end
|
92
|
+
|
93
|
+
def bol?()
|
94
|
+
@scanner.bol?
|
95
|
+
end
|
96
|
+
|
97
|
+
def check(pattern)
|
98
|
+
@scanner.check(pattern)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Additional scanner methods
|
102
|
+
|
103
|
+
def skip_spaces()
|
104
|
+
scan(/[ \t]*/)
|
105
|
+
end
|
106
|
+
|
107
|
+
def skip_line()
|
108
|
+
scan_until(/\n/)
|
109
|
+
end
|
110
|
+
|
111
|
+
def skip_rest_after_comment_sign()
|
112
|
+
if scan(/[ \t]*(\/\/).*\n/) # line after //
|
113
|
+
return true
|
114
|
+
else
|
115
|
+
return false
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def skip_empty_line()
|
120
|
+
if scan(/[ \t]*\n/) # Empty line
|
121
|
+
return true
|
122
|
+
else
|
123
|
+
return false
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def skip_multiple_line_comment()
|
128
|
+
if scan(/\s*\/\*(.|\n)+?\*\//)
|
129
|
+
return true
|
130
|
+
else
|
131
|
+
return false
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def scan_ident()
|
136
|
+
scan(IDENT_PATTERN)
|
137
|
+
end
|
138
|
+
|
139
|
+
def tokenize_options()
|
140
|
+
tokens = []
|
141
|
+
while 1 do
|
142
|
+
case
|
143
|
+
when eos? || scan(/\n/)
|
144
|
+
break
|
145
|
+
when scan(/=/)
|
146
|
+
tokens << [:ASSIGN, matched ]
|
147
|
+
when scan(IDENT_PATTERN)
|
148
|
+
tokens << [:IDENT, matched ]
|
149
|
+
when scan(FLOATP_PATTERN)
|
150
|
+
tokens << [:NUMBER, matched.to_f ]
|
151
|
+
when scan(INT_PATTERN)
|
152
|
+
tokens << [:NUMBER, matched.to_i ]
|
153
|
+
when scan(SQ_STR_PATTERN)
|
154
|
+
tokens << [:STRING, matched[Range.new(1, -2)] ]
|
155
|
+
when scan(DQ_STR_PATTERN)
|
156
|
+
tokens << [:STRING, interpret_escape_sequences(matched[Range.new(1, -2)]) ]
|
157
|
+
when scan(/[ \t]/)
|
158
|
+
#ignore
|
159
|
+
else
|
160
|
+
raise "options cannot be tokenized."
|
161
|
+
end
|
162
|
+
end
|
163
|
+
return tokens
|
164
|
+
end
|
165
|
+
|
166
|
+
def scan_whole_line()
|
167
|
+
scan(/.*\n/)
|
168
|
+
end
|
169
|
+
|
170
|
+
def scan_end_line?()
|
171
|
+
if( scan(/[ \t]*E[Nn][Dd][ \t]*/))
|
172
|
+
if scan(/\S+/)
|
173
|
+
puts "Script after END is ignored (" + matched + ")"
|
174
|
+
end
|
175
|
+
scan_until(/\n/) # Move to the end of line
|
176
|
+
return true
|
177
|
+
else
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# Additional features for TOPLEVEL
|
183
|
+
|
184
|
+
def scan_toplevel_instruction
|
185
|
+
inst = Regexp.new( /\s*/.source() + IDENT_PATTERN.source() )
|
186
|
+
scan( inst )
|
187
|
+
end
|
188
|
+
|
189
|
+
# Additional features for data script
|
190
|
+
|
191
|
+
def prepare_data_script()
|
192
|
+
@data_script = ""
|
193
|
+
end
|
194
|
+
|
195
|
+
def append_to_data_script( script )
|
196
|
+
@data_script << script
|
197
|
+
end
|
198
|
+
|
199
|
+
def get_data_script()
|
200
|
+
return @data_script
|
201
|
+
end
|
202
|
+
|
203
|
+
# Additional features for proc stmts
|
204
|
+
|
205
|
+
def prepare_proc_tokens()
|
206
|
+
@proc_tokens = []
|
207
|
+
end
|
208
|
+
|
209
|
+
def get_proc_tokens()
|
210
|
+
return @proc_tokens
|
211
|
+
end
|
212
|
+
|
213
|
+
def append_to_proc_tokens( tokens )
|
214
|
+
@proc_tokens.concat tokens
|
215
|
+
end
|
216
|
+
|
217
|
+
def scan_proc_inst()
|
218
|
+
skip_spaces()
|
219
|
+
scan(PROC_INST_PATTERN)
|
220
|
+
end
|
221
|
+
|
222
|
+
def scan_proc_special()
|
223
|
+
case
|
224
|
+
when scan(/\=/)
|
225
|
+
return :P_EQ
|
226
|
+
when scan(/\*/)
|
227
|
+
return :P_MULT
|
228
|
+
when scan(/\+/)
|
229
|
+
return :P_PLUS
|
230
|
+
when scan(/\-/)
|
231
|
+
return :P_MINUS
|
232
|
+
when scan(/\^/)
|
233
|
+
return :P_HAT
|
234
|
+
when scan(/\%in\%/)
|
235
|
+
return :P_IN
|
236
|
+
when scan(/\%in\%/)
|
237
|
+
return :P_PERC
|
238
|
+
when scan(/\~/)
|
239
|
+
return :P_TILDA
|
240
|
+
when scan(/\:/)
|
241
|
+
return :P_COLON
|
242
|
+
when scan(/\(/)
|
243
|
+
return :P_LPAR
|
244
|
+
when scan(/\)/)
|
245
|
+
return :P_RPAR
|
246
|
+
when scan(/\[/)
|
247
|
+
return :P_LSQBR
|
248
|
+
when scan(/\]/)
|
249
|
+
return :P_RSQBR
|
250
|
+
when scan(/\,/)
|
251
|
+
return :P_COMMA
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
def tokenize_proc_line()
|
256
|
+
tokens = []
|
257
|
+
while 1 do
|
258
|
+
case
|
259
|
+
when eos? || scan(/\n/)
|
260
|
+
break
|
261
|
+
when scan(IDENT_PATTERN)
|
262
|
+
tokens << [:IDENT, matched ]
|
263
|
+
when scan(FLOATP_PATTERN)
|
264
|
+
tokens << [:NUMBER, matched.to_f ]
|
265
|
+
when scan(INT_PATTERN)
|
266
|
+
tokens << [:NUMBER, matched.to_i ]
|
267
|
+
when scan(SQ_STR_PATTERN)
|
268
|
+
tokens << [:STRING, matched[Range.new(1, -2)] ]
|
269
|
+
when scan(DQ_STR_PATTERN)
|
270
|
+
tokens << [:STRING, interpret_escape_sequences(matched[Range.new(1, -2)]) ]
|
271
|
+
when type = scan_proc_special()
|
272
|
+
tokens << [ type , matched ]
|
273
|
+
when scan(/[ \t]/) # Separators
|
274
|
+
#ignore
|
275
|
+
when scan(/\/\//) # Start comment
|
276
|
+
@scanner.unscan
|
277
|
+
break
|
278
|
+
when scan(/\/\*/) # Start comment
|
279
|
+
@scanner.unscan
|
280
|
+
break
|
281
|
+
when scan(/\//) # slash to start options
|
282
|
+
tokens << [:SEP_SLASH, matched]
|
283
|
+
else
|
284
|
+
scan(/.*\n/)
|
285
|
+
raise "Current PROC line cannot be tokenized." + matched
|
286
|
+
end
|
287
|
+
end
|
288
|
+
return tokens
|
289
|
+
end
|
290
|
+
|
291
|
+
# Manage scan states
|
292
|
+
|
293
|
+
def scan_state_top?()
|
294
|
+
if @scan_state == :TOP
|
295
|
+
return true
|
296
|
+
else
|
297
|
+
return false
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
def scan_state_data?()
|
302
|
+
if @scan_state == :DATA
|
303
|
+
return true
|
304
|
+
else
|
305
|
+
return false
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
def scan_state_proc?()
|
310
|
+
if @scan_state == :PROC
|
311
|
+
return true
|
312
|
+
else
|
313
|
+
return false
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
def scan_state_set_top()
|
318
|
+
@scan_state = :TOP
|
319
|
+
end
|
320
|
+
|
321
|
+
def scan_state_set_data()
|
322
|
+
@scan_state = :DATA
|
323
|
+
end
|
324
|
+
|
325
|
+
def scan_state_set_proc()
|
326
|
+
@scan_state = :PROC
|
327
|
+
end
|
328
|
+
|
329
|
+
def get_scan_state()
|
330
|
+
return @scan_state
|
331
|
+
end
|
332
|
+
end
|
333
|
+
|
334
|
+
class STSScanDriver
|
335
|
+
def initialize( path )
|
336
|
+
@source_path = path
|
337
|
+
end
|
338
|
+
|
339
|
+
def tokenize
|
340
|
+
s = STSScanner.new(@source_path)
|
341
|
+
s.start()
|
342
|
+
@tokens = []
|
343
|
+
|
344
|
+
while ! s.eos? do
|
345
|
+
case
|
346
|
+
when s.scan_state_top?
|
347
|
+
case
|
348
|
+
when s.bol? && s.scan(/[ \t]*D[Aa][Tt][Aa]/)
|
349
|
+
@tokens << [:DATA_START, "DATA" ]
|
350
|
+
s.skip_spaces()
|
351
|
+
if s.scan(/\:/)
|
352
|
+
@tokens << [:COLON, s.matched]
|
353
|
+
s.skip_spaces
|
354
|
+
s.scan_ident()
|
355
|
+
@tokens << [:IDENT, s.matched]
|
356
|
+
s.skip_spaces()
|
357
|
+
end
|
358
|
+
s.scan_ident()
|
359
|
+
@tokens << [:IDENT, s.matched]
|
360
|
+
s.skip_spaces()
|
361
|
+
@tokens.concat s.tokenize_options()
|
362
|
+
@tokens << [:TERMIN, "\n"]
|
363
|
+
s.scan_state_set_data()
|
364
|
+
s.prepare_data_script()
|
365
|
+
when s.bol? && s.scan(/[ \t]*P[Rr][Oo][Cc]/)
|
366
|
+
@tokens << [:PROC_START, "PROC" ]
|
367
|
+
s.skip_spaces()
|
368
|
+
s.scan_ident()
|
369
|
+
@tokens << [:IDENT, s.matched]
|
370
|
+
s.skip_spaces()
|
371
|
+
@tokens.concat s.tokenize_options()
|
372
|
+
@tokens << [:TERMIN, "\n"]
|
373
|
+
s.scan_state_set_proc()
|
374
|
+
s.prepare_proc_tokens()
|
375
|
+
else
|
376
|
+
if s.skip_rest_after_comment_sign()
|
377
|
+
elsif s.skip_empty_line()
|
378
|
+
elsif s.skip_multiple_line_comment()
|
379
|
+
elsif s.scan_toplevel_instruction
|
380
|
+
@tokens << [:TOP_INST, s.matched() ]
|
381
|
+
@tokens.concat s.tokenize_options()
|
382
|
+
@tokens << [:TOP_INST_END, "TOP_INST_END"]
|
383
|
+
@tokens << [:TERMIN, "\n"]
|
384
|
+
else
|
385
|
+
print("Unknown part on TOP LEVEL(" + s.get_scan_state.to_s + ") " )
|
386
|
+
p s.check(/...../) # Show five letters
|
387
|
+
if s.skip_until(/\n/)
|
388
|
+
# Discard the current line
|
389
|
+
else
|
390
|
+
s.skip_until(/$/) # Last line
|
391
|
+
end
|
392
|
+
end
|
393
|
+
end
|
394
|
+
when s.scan_state_data?
|
395
|
+
if s.scan_end_line?
|
396
|
+
@tokens << [:DATA_SCRIPT, s.get_data_script() ]
|
397
|
+
@tokens << [:DATA_END, "END"]
|
398
|
+
@tokens << [:TERMIN, "D_TERMIN"]
|
399
|
+
s.scan_state_set_top() # Return to TOP_LEVEL
|
400
|
+
else
|
401
|
+
# Store data lines to one token
|
402
|
+
s.scan_whole_line()
|
403
|
+
line = s.matched()
|
404
|
+
s.append_to_data_script( line )
|
405
|
+
end
|
406
|
+
when s.scan_state_proc?
|
407
|
+
if s.scan_end_line?
|
408
|
+
@tokens.concat s.get_proc_tokens()
|
409
|
+
@tokens << [:PROC_END, "END"]
|
410
|
+
@tokens << [:TERMIN, "P_TERMIN"]
|
411
|
+
s.scan_state_set_top() # Return to TOP_LEVEL
|
412
|
+
elsif s.skip_rest_after_comment_sign() # Ignore after comment sign.
|
413
|
+
elsif s.skip_empty_line() # Ignore the empty line.
|
414
|
+
elsif s.skip_multiple_line_comment() # Ignore multiple comment.
|
415
|
+
else
|
416
|
+
s.scan_proc_inst()
|
417
|
+
s.append_to_proc_tokens( [].append [:PROC_INST , s.matched ])
|
418
|
+
proc_tokens = s.tokenize_proc_line() # Tokenize the current line
|
419
|
+
s.append_to_proc_tokens(proc_tokens)
|
420
|
+
s.append_to_proc_tokens( [].append [:TERMIN, "TERMIN"] )
|
421
|
+
end
|
422
|
+
else
|
423
|
+
raise "Error: StatSailrScanner has an invalid scan state " + s.get_scan_state
|
424
|
+
end
|
425
|
+
end
|
426
|
+
|
427
|
+
s.terminate()
|
428
|
+
return @tokens
|
429
|
+
end
|
430
|
+
|
431
|
+
end
|
432
|
+
|
433
|
+
|