statsailr 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/.travis.yml +6 -0
  4. data/Gemfile +7 -0
  5. data/HISTORY.md +15 -0
  6. data/LICENSE.txt +675 -0
  7. data/README.md +287 -0
  8. data/Rakefile +10 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/example/blank.slr +3 -0
  12. data/example/category.slr +5 -0
  13. data/example/example_read.slr +10 -0
  14. data/example/iris.csv +151 -0
  15. data/example/mtcars.rda +0 -0
  16. data/example/new_mtcars.csv +33 -0
  17. data/example/new_mtcars.rda +0 -0
  18. data/example/plot_reg_example.slr +55 -0
  19. data/example/scatter.png +0 -0
  20. data/exe/sailr +54 -0
  21. data/exe/sailrREPL +75 -0
  22. data/lib/statsailr.rb +7 -0
  23. data/lib/statsailr/block_builder/sts_block.rb +167 -0
  24. data/lib/statsailr/block_builder/sts_block_parse_proc_opts.rb +168 -0
  25. data/lib/statsailr/block_to_r/proc_setting_support/proc_opt_validator.rb +52 -0
  26. data/lib/statsailr/block_to_r/proc_setting_support/proc_setting_manager.rb +49 -0
  27. data/lib/statsailr/block_to_r/proc_setting_support/proc_setting_module.rb +44 -0
  28. data/lib/statsailr/block_to_r/sts_block_to_r.rb +98 -0
  29. data/lib/statsailr/block_to_r/sts_lazy_func_gen.rb +236 -0
  30. data/lib/statsailr/block_to_r/top_stmt/top_stmt_to_r_func.rb +182 -0
  31. data/lib/statsailr/parser/sts_gram_node.rb +9 -0
  32. data/lib/statsailr/parser/sts_parse.output +831 -0
  33. data/lib/statsailr/parser/sts_parse.ry +132 -0
  34. data/lib/statsailr/parser/sts_parse.tab.rb +682 -0
  35. data/lib/statsailr/scanner/sample1.sts +37 -0
  36. data/lib/statsailr/scanner/sts_scanner.rb +433 -0
  37. data/lib/statsailr/scanner/test_sample1.rb +8 -0
  38. data/lib/statsailr/sts_build_exec.rb +304 -0
  39. data/lib/statsailr/sts_controller.rb +66 -0
  40. data/lib/statsailr/sts_output/output_manager.rb +192 -0
  41. data/lib/statsailr/sts_runner.rb +17 -0
  42. data/lib/statsailr/sts_server.rb +85 -0
  43. data/lib/statsailr/version.rb +3 -0
  44. data/statsailr.gemspec +32 -0
  45. metadata +133 -0
@@ -0,0 +1,37 @@
1
+ // comment
2
+ /*
3
+ multipe line comments
4
+ */
5
+
6
+ LOAD builtin = "mtcars"
7
+
8
+ DATA new_mtcars set=mtcars
9
+ if(cyl > 4){
10
+ powerful = 1
11
+ }else{
12
+ powerful = 0
13
+ }
14
+ END
15
+
16
+ PROC FREQ data=new_mtcars
17
+ /* Calculate frequencies & show in list format
18
+ */
19
+ tables cyl * powerful // categorize by cyl and powerful
20
+ END
21
+
22
+ /*
23
+ PROC SAVE data=new_mtcars
24
+ to "./new_mtcars.RData"
25
+ END
26
+ */
27
+
28
+ LOAD csv = "./iris.csv" as = iris sep = "," head = 1
29
+
30
+ /*
31
+ PROC GLM data=iris
32
+ model Speices ~ Sepal.Length Sepal.Width
33
+ family binomial
34
+ END
35
+ */
36
+
37
+
@@ -0,0 +1,433 @@
1
+ require "strscan"
2
+
3
+ module STSConstants
4
+ IDENT_PATTERN = /[a-zA-Z\-_.][0-9a-zA-Z\-_.]*/
5
+ PROC_INST_PATTERN = /[a-zA-Z\-_.][0-9a-zA-Z\-_.]*/
6
+ FLOATP_PATTERN = /[-+]?(([1-9][0-9]*)|0)\.[0-9]*/
7
+ INT_PATTERN = /[-+]?([1-9][0-9]*)|0/
8
+ SQ_STR_PATTERN = /'(\\'|[^'\n])*'/
9
+ DQ_STR_PATTERN = /"(\\"|[^"\n])*"/
10
+ end
11
+
12
+ module STSScannerSupport
13
+ def interpret_escape_sequences(str)
14
+ # This deals with escape sequences in double quoted string literals
15
+ # The behavior should be same as libsailr (or datasailr)
16
+ new_str = ""
17
+ str_array = str.split(//)
18
+ idx = 0
19
+ while( idx < str_array.size) do
20
+ c = str_array[idx]
21
+ if(c == "\\")
22
+ idx = idx + 1
23
+ c = str_array[idx]
24
+ raise "Tokenizer error: double quoted string literal should never end with \\" if idx >= str_array.size
25
+ case c
26
+ when 't'
27
+ new_str << "\t"
28
+ when 'n'
29
+ new_str << "\n"
30
+ when 'r'
31
+ new_str << "\r"
32
+ when "\\"
33
+ new_str << "\\"
34
+ when "\'"
35
+ new_str << "\'"
36
+ when "\""
37
+ new_str << "\""
38
+ when '?'
39
+ new_str << '?'
40
+ else
41
+ new_str << c
42
+ end
43
+ else
44
+ new_str << c
45
+ end
46
+ idx = idx + 1
47
+ end
48
+ return new_str
49
+ end
50
+ end
51
+
52
+ class STSScanner
53
+ include ::STSConstants
54
+ include ::STSScannerSupport
55
+
56
+ # Initialization & Terminating methods
57
+
58
+ def initialize( script )
59
+ @script = script
60
+ end
61
+
62
+ def start()
63
+ @scanner = StringScanner.new(@script)
64
+ @scan_state = :TOP
65
+ end
66
+
67
+ def terminate()
68
+ @scanner.terminate()
69
+ end
70
+
71
+ # Delegate corresponding methods to StringScanner
72
+
73
+ def scan(pattern)
74
+ @scanner.scan(pattern)
75
+ end
76
+
77
+ def scan_until(pattern)
78
+ @scanner.scan_until(pattern)
79
+ end
80
+
81
+ def skip_until(pattern)
82
+ @scanner.skip_until(pattern)
83
+ end
84
+
85
+ def matched()
86
+ @scanner.matched
87
+ end
88
+
89
+ def eos?()
90
+ @scanner.eos?
91
+ end
92
+
93
+ def bol?()
94
+ @scanner.bol?
95
+ end
96
+
97
+ def check(pattern)
98
+ @scanner.check(pattern)
99
+ end
100
+
101
+ # Additional scanner methods
102
+
103
+ def skip_spaces()
104
+ scan(/[ \t]*/)
105
+ end
106
+
107
+ def skip_line()
108
+ scan_until(/\n/)
109
+ end
110
+
111
+ def skip_rest_after_comment_sign()
112
+ if scan(/[ \t]*(\/\/).*\n/) # line after //
113
+ return true
114
+ else
115
+ return false
116
+ end
117
+ end
118
+
119
+ def skip_empty_line()
120
+ if scan(/[ \t]*\n/) # Empty line
121
+ return true
122
+ else
123
+ return false
124
+ end
125
+ end
126
+
127
+ def skip_multiple_line_comment()
128
+ if scan(/\s*\/\*(.|\n)+?\*\//)
129
+ return true
130
+ else
131
+ return false
132
+ end
133
+ end
134
+
135
+ def scan_ident()
136
+ scan(IDENT_PATTERN)
137
+ end
138
+
139
+ def tokenize_options()
140
+ tokens = []
141
+ while 1 do
142
+ case
143
+ when eos? || scan(/\n/)
144
+ break
145
+ when scan(/=/)
146
+ tokens << [:ASSIGN, matched ]
147
+ when scan(IDENT_PATTERN)
148
+ tokens << [:IDENT, matched ]
149
+ when scan(FLOATP_PATTERN)
150
+ tokens << [:NUMBER, matched.to_f ]
151
+ when scan(INT_PATTERN)
152
+ tokens << [:NUMBER, matched.to_i ]
153
+ when scan(SQ_STR_PATTERN)
154
+ tokens << [:STRING, matched[Range.new(1, -2)] ]
155
+ when scan(DQ_STR_PATTERN)
156
+ tokens << [:STRING, interpret_escape_sequences(matched[Range.new(1, -2)]) ]
157
+ when scan(/[ \t]/)
158
+ #ignore
159
+ else
160
+ raise "options cannot be tokenized."
161
+ end
162
+ end
163
+ return tokens
164
+ end
165
+
166
+ def scan_whole_line()
167
+ scan(/.*\n/)
168
+ end
169
+
170
+ def scan_end_line?()
171
+ if( scan(/[ \t]*E[Nn][Dd][ \t]*/))
172
+ if scan(/\S+/)
173
+ puts "Script after END is ignored (" + matched + ")"
174
+ end
175
+ scan_until(/\n/) # Move to the end of line
176
+ return true
177
+ else
178
+ return false
179
+ end
180
+ end
181
+
182
+ # Additional features for TOPLEVEL
183
+
184
+ def scan_toplevel_instruction
185
+ inst = Regexp.new( /\s*/.source() + IDENT_PATTERN.source() )
186
+ scan( inst )
187
+ end
188
+
189
+ # Additional features for data script
190
+
191
+ def prepare_data_script()
192
+ @data_script = ""
193
+ end
194
+
195
+ def append_to_data_script( script )
196
+ @data_script << script
197
+ end
198
+
199
+ def get_data_script()
200
+ return @data_script
201
+ end
202
+
203
+ # Additional features for proc stmts
204
+
205
+ def prepare_proc_tokens()
206
+ @proc_tokens = []
207
+ end
208
+
209
+ def get_proc_tokens()
210
+ return @proc_tokens
211
+ end
212
+
213
+ def append_to_proc_tokens( tokens )
214
+ @proc_tokens.concat tokens
215
+ end
216
+
217
+ def scan_proc_inst()
218
+ skip_spaces()
219
+ scan(PROC_INST_PATTERN)
220
+ end
221
+
222
+ def scan_proc_special()
223
+ case
224
+ when scan(/\=/)
225
+ return :P_EQ
226
+ when scan(/\*/)
227
+ return :P_MULT
228
+ when scan(/\+/)
229
+ return :P_PLUS
230
+ when scan(/\-/)
231
+ return :P_MINUS
232
+ when scan(/\^/)
233
+ return :P_HAT
234
+ when scan(/\%in\%/)
235
+ return :P_IN
236
+ when scan(/\%in\%/)
237
+ return :P_PERC
238
+ when scan(/\~/)
239
+ return :P_TILDA
240
+ when scan(/\:/)
241
+ return :P_COLON
242
+ when scan(/\(/)
243
+ return :P_LPAR
244
+ when scan(/\)/)
245
+ return :P_RPAR
246
+ when scan(/\[/)
247
+ return :P_LSQBR
248
+ when scan(/\]/)
249
+ return :P_RSQBR
250
+ when scan(/\,/)
251
+ return :P_COMMA
252
+ end
253
+ end
254
+
255
+ def tokenize_proc_line()
256
+ tokens = []
257
+ while 1 do
258
+ case
259
+ when eos? || scan(/\n/)
260
+ break
261
+ when scan(IDENT_PATTERN)
262
+ tokens << [:IDENT, matched ]
263
+ when scan(FLOATP_PATTERN)
264
+ tokens << [:NUMBER, matched.to_f ]
265
+ when scan(INT_PATTERN)
266
+ tokens << [:NUMBER, matched.to_i ]
267
+ when scan(SQ_STR_PATTERN)
268
+ tokens << [:STRING, matched[Range.new(1, -2)] ]
269
+ when scan(DQ_STR_PATTERN)
270
+ tokens << [:STRING, interpret_escape_sequences(matched[Range.new(1, -2)]) ]
271
+ when type = scan_proc_special()
272
+ tokens << [ type , matched ]
273
+ when scan(/[ \t]/) # Separators
274
+ #ignore
275
+ when scan(/\/\//) # Start comment
276
+ @scanner.unscan
277
+ break
278
+ when scan(/\/\*/) # Start comment
279
+ @scanner.unscan
280
+ break
281
+ when scan(/\//) # slash to start options
282
+ tokens << [:SEP_SLASH, matched]
283
+ else
284
+ scan(/.*\n/)
285
+ raise "Current PROC line cannot be tokenized." + matched
286
+ end
287
+ end
288
+ return tokens
289
+ end
290
+
291
+ # Manage scan states
292
+
293
+ def scan_state_top?()
294
+ if @scan_state == :TOP
295
+ return true
296
+ else
297
+ return false
298
+ end
299
+ end
300
+
301
+ def scan_state_data?()
302
+ if @scan_state == :DATA
303
+ return true
304
+ else
305
+ return false
306
+ end
307
+ end
308
+
309
+ def scan_state_proc?()
310
+ if @scan_state == :PROC
311
+ return true
312
+ else
313
+ return false
314
+ end
315
+ end
316
+
317
+ def scan_state_set_top()
318
+ @scan_state = :TOP
319
+ end
320
+
321
+ def scan_state_set_data()
322
+ @scan_state = :DATA
323
+ end
324
+
325
+ def scan_state_set_proc()
326
+ @scan_state = :PROC
327
+ end
328
+
329
+ def get_scan_state()
330
+ return @scan_state
331
+ end
332
+ end
333
+
334
+ class STSScanDriver
335
+ def initialize( path )
336
+ @source_path = path
337
+ end
338
+
339
+ def tokenize
340
+ s = STSScanner.new(@source_path)
341
+ s.start()
342
+ @tokens = []
343
+
344
+ while ! s.eos? do
345
+ case
346
+ when s.scan_state_top?
347
+ case
348
+ when s.bol? && s.scan(/[ \t]*D[Aa][Tt][Aa]/)
349
+ @tokens << [:DATA_START, "DATA" ]
350
+ s.skip_spaces()
351
+ if s.scan(/\:/)
352
+ @tokens << [:COLON, s.matched]
353
+ s.skip_spaces
354
+ s.scan_ident()
355
+ @tokens << [:IDENT, s.matched]
356
+ s.skip_spaces()
357
+ end
358
+ s.scan_ident()
359
+ @tokens << [:IDENT, s.matched]
360
+ s.skip_spaces()
361
+ @tokens.concat s.tokenize_options()
362
+ @tokens << [:TERMIN, "\n"]
363
+ s.scan_state_set_data()
364
+ s.prepare_data_script()
365
+ when s.bol? && s.scan(/[ \t]*P[Rr][Oo][Cc]/)
366
+ @tokens << [:PROC_START, "PROC" ]
367
+ s.skip_spaces()
368
+ s.scan_ident()
369
+ @tokens << [:IDENT, s.matched]
370
+ s.skip_spaces()
371
+ @tokens.concat s.tokenize_options()
372
+ @tokens << [:TERMIN, "\n"]
373
+ s.scan_state_set_proc()
374
+ s.prepare_proc_tokens()
375
+ else
376
+ if s.skip_rest_after_comment_sign()
377
+ elsif s.skip_empty_line()
378
+ elsif s.skip_multiple_line_comment()
379
+ elsif s.scan_toplevel_instruction
380
+ @tokens << [:TOP_INST, s.matched() ]
381
+ @tokens.concat s.tokenize_options()
382
+ @tokens << [:TOP_INST_END, "TOP_INST_END"]
383
+ @tokens << [:TERMIN, "\n"]
384
+ else
385
+ print("Unknown part on TOP LEVEL(" + s.get_scan_state.to_s + ") " )
386
+ p s.check(/...../) # Show five letters
387
+ if s.skip_until(/\n/)
388
+ # Discard the current line
389
+ else
390
+ s.skip_until(/$/) # Last line
391
+ end
392
+ end
393
+ end
394
+ when s.scan_state_data?
395
+ if s.scan_end_line?
396
+ @tokens << [:DATA_SCRIPT, s.get_data_script() ]
397
+ @tokens << [:DATA_END, "END"]
398
+ @tokens << [:TERMIN, "D_TERMIN"]
399
+ s.scan_state_set_top() # Return to TOP_LEVEL
400
+ else
401
+ # Store data lines to one token
402
+ s.scan_whole_line()
403
+ line = s.matched()
404
+ s.append_to_data_script( line )
405
+ end
406
+ when s.scan_state_proc?
407
+ if s.scan_end_line?
408
+ @tokens.concat s.get_proc_tokens()
409
+ @tokens << [:PROC_END, "END"]
410
+ @tokens << [:TERMIN, "P_TERMIN"]
411
+ s.scan_state_set_top() # Return to TOP_LEVEL
412
+ elsif s.skip_rest_after_comment_sign() # Ignore after comment sign.
413
+ elsif s.skip_empty_line() # Ignore the empty line.
414
+ elsif s.skip_multiple_line_comment() # Ignore multiple comment.
415
+ else
416
+ s.scan_proc_inst()
417
+ s.append_to_proc_tokens( [].append [:PROC_INST , s.matched ])
418
+ proc_tokens = s.tokenize_proc_line() # Tokenize the current line
419
+ s.append_to_proc_tokens(proc_tokens)
420
+ s.append_to_proc_tokens( [].append [:TERMIN, "TERMIN"] )
421
+ end
422
+ else
423
+ raise "Error: StatSailrScanner has an invalid scan state " + s.get_scan_state
424
+ end
425
+ end
426
+
427
+ s.terminate()
428
+ return @tokens
429
+ end
430
+
431
+ end
432
+
433
+