statsailr 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/.travis.yml +6 -0
  4. data/Gemfile +7 -0
  5. data/HISTORY.md +15 -0
  6. data/LICENSE.txt +675 -0
  7. data/README.md +287 -0
  8. data/Rakefile +10 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/example/blank.slr +3 -0
  12. data/example/category.slr +5 -0
  13. data/example/example_read.slr +10 -0
  14. data/example/iris.csv +151 -0
  15. data/example/mtcars.rda +0 -0
  16. data/example/new_mtcars.csv +33 -0
  17. data/example/new_mtcars.rda +0 -0
  18. data/example/plot_reg_example.slr +55 -0
  19. data/example/scatter.png +0 -0
  20. data/exe/sailr +54 -0
  21. data/exe/sailrREPL +75 -0
  22. data/lib/statsailr.rb +7 -0
  23. data/lib/statsailr/block_builder/sts_block.rb +167 -0
  24. data/lib/statsailr/block_builder/sts_block_parse_proc_opts.rb +168 -0
  25. data/lib/statsailr/block_to_r/proc_setting_support/proc_opt_validator.rb +52 -0
  26. data/lib/statsailr/block_to_r/proc_setting_support/proc_setting_manager.rb +49 -0
  27. data/lib/statsailr/block_to_r/proc_setting_support/proc_setting_module.rb +44 -0
  28. data/lib/statsailr/block_to_r/sts_block_to_r.rb +98 -0
  29. data/lib/statsailr/block_to_r/sts_lazy_func_gen.rb +236 -0
  30. data/lib/statsailr/block_to_r/top_stmt/top_stmt_to_r_func.rb +182 -0
  31. data/lib/statsailr/parser/sts_gram_node.rb +9 -0
  32. data/lib/statsailr/parser/sts_parse.output +831 -0
  33. data/lib/statsailr/parser/sts_parse.ry +132 -0
  34. data/lib/statsailr/parser/sts_parse.tab.rb +682 -0
  35. data/lib/statsailr/scanner/sample1.sts +37 -0
  36. data/lib/statsailr/scanner/sts_scanner.rb +433 -0
  37. data/lib/statsailr/scanner/test_sample1.rb +8 -0
  38. data/lib/statsailr/sts_build_exec.rb +304 -0
  39. data/lib/statsailr/sts_controller.rb +66 -0
  40. data/lib/statsailr/sts_output/output_manager.rb +192 -0
  41. data/lib/statsailr/sts_runner.rb +17 -0
  42. data/lib/statsailr/sts_server.rb +85 -0
  43. data/lib/statsailr/version.rb +3 -0
  44. data/statsailr.gemspec +32 -0
  45. metadata +133 -0
@@ -0,0 +1,52 @@
1
+ class ProcOptValidator
2
+ attr :validator_rules
3
+
4
+ def initialize( )
5
+ @validator_rules = {}
6
+ end
7
+
8
+ def rule( option_name , is_a: nil, as: nil , required: false)
9
+ @validator_rules[option_name] = {"is_a" => is_a , "as" => as, "required" => required }
10
+ end
11
+
12
+ def check_and_modify( param_manager )
13
+ if ( ! @validator_rules.nil? ) && (! @validator_rules.empty?)
14
+ @validator_rules.each{| opt_name, validator |
15
+ class_name_in_param_manager = param_manager.param_hash[opt_name].class.name.split('::').last
16
+ if validator["required"] == true
17
+ if ! param_manager.param_hash.has_key?(opt_name)
18
+ raise "#{opt_name} is required for this PROC option"
19
+ end
20
+ end
21
+ if ! validator["is_a"].nil?
22
+ if validator["is_a"].is_a?(Array)
23
+ if ! validator["is_a"].include? class_name_in_param_manager
24
+ raise "#{opt_name} needs to be one of #{validator["is_a"].join("|")}, but #{class_name_in_param_manager} is assigned"
25
+ end
26
+ else
27
+ if validator["is_a"] != class_name_in_param_manager
28
+ raise "#{opt_name} needs to be #{validator["is_a"]}, but #{class_name_in_param_manager} is assigned"
29
+ end
30
+ end
31
+
32
+ end
33
+ if ! validator["as"].nil?
34
+ if validator["as"] != class_name_in_param_manager
35
+ case validator["as"]
36
+ when "SymbolR"
37
+ param_manager.param_hash[opt_name] = RBridge::SymbolR.new(param_manager.param_hash[opt_name])
38
+ when "String"
39
+ param_manager.param_hash[opt_name] = param_manager.param_hash[opt_name].to_s
40
+ when "Integer"
41
+ param_manager.param_hash[opt_name] = param_manager.param_hash[opt_name].to_i
42
+ when "Float"
43
+ param_manager.param_hash[opt_name] = param_manager.param_hash[opt_name].to_f
44
+ else
45
+ raise "We need to convert type but do not know how to do it. #{class_name_in_param_manager} => #{validator["as"]}"
46
+ end
47
+ end
48
+ end
49
+ }
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,49 @@
1
+ require "pathname"
2
+
3
+ class ProcSettingManager
4
+ def initialize
5
+ @proc_settings = {} # "command(downcase)" => { "path" => Pathname(abslute path) , "loaded" => boolean }
6
+ end
7
+
8
+ def add_proc_settings_from_dir( dir )
9
+ raise "add_proc_settings_from_directory() requires String" unless dir.is_a? String
10
+ dir_pathname = Pathname.new(dir)
11
+ if ! dir_pathname.absolute?
12
+ raise "dir should be specified in absolute path."
13
+ end
14
+
15
+ dir_pathname.opendir{|d|
16
+ d.each(){|f|
17
+ if((dir_pathname + f).file?)
18
+ if( f =~ /proc_([a-zA-Z0-9]+)\.rb/ )
19
+ command_name = $1
20
+ proc_setting_path = dir_pathname + f
21
+ @proc_settings[command_name.downcase] = { "path" => proc_setting_path, "loaded" => false }
22
+ end
23
+ end
24
+ }
25
+ }
26
+ end
27
+
28
+ def is_loaded?( command )
29
+ command = command.downcase
30
+ if @proc_settings.has_key? command
31
+ return @proc_settings[command]["loaded"]
32
+ else
33
+ return false
34
+ end
35
+ end
36
+
37
+ def load_setting( command )
38
+ command = command.downcase
39
+ if @proc_settings.has_key? command
40
+ load( @proc_settings[command]["path"].to_s )
41
+ @proc_settings[command]["loaded"] = true
42
+ else
43
+ raise "specified #{command} proc command cannot be found. Loaded proc settings: #{@proc_settings.keys} "
44
+ end
45
+ end
46
+ end
47
+
48
+
49
+
@@ -0,0 +1,44 @@
1
+ require "pathname"
2
+ require_relative("./proc_opt_validator.rb")
3
+
4
+ module ProcSettingModule
5
+ def self.included base
6
+ base.extend ClassMethods
7
+ base.instance_variable_set(:@validator, ProcOptValidator.new())
8
+ base.send :include, InstanceMethods
9
+ end
10
+
11
+ module ClassMethods
12
+ def extend_object( extender )
13
+ extender.instance_variable_set(:@validator, @validator)
14
+ extender.singleton_class.__send__( :attr_accessor, :validator)
15
+ super
16
+ end
17
+
18
+ def source_r_file( abs_path_dir, filename)
19
+ raise "directory path should be specified in absolute path for " + __method__ unless Pathname.new(abs_path_dir).absolute?
20
+ r_path = abs_path_dir + "/" + filename
21
+ func = RBridge::create_function_call("source", { "file" => RBridge::create_strvec([r_path])} )
22
+ RBridge::exec_function(func)
23
+ end
24
+
25
+ def add_setting_from( abs_path_dir, filename)
26
+ raise "directory path should be specified in absolute path for " + __method__ unless Pathname.new(abs_path_dir).absolute?
27
+ require( abs_path_dir + "/" + filename )
28
+ klass_name = File.basename( filename , ".rb").split("_").map{|elem| elem.capitalize()}.join("") + "Setting" # e.g. dev_copy.rb => DevCopySetting
29
+ self.include(Object.const_get(klass_name))
30
+ end
31
+
32
+ def validate_option(opt_key, is_a: nil, as: nil , required: true)
33
+ @validator.rule( opt_key, is_a: is_a, as: as, required: required )
34
+ end
35
+
36
+ def validator
37
+ @validator
38
+ end
39
+ end
40
+
41
+ module InstanceMethods
42
+ end
43
+ end
44
+
@@ -0,0 +1,98 @@
1
+ require "r_bridge"
2
+
3
+ module BlockToRSupport
4
+ end
5
+
6
+ require_relative "top_stmt/top_stmt_to_r_func.rb"
7
+ module TopStmtToR
8
+ include BlockToRSupport
9
+ def self.create_function( blk )
10
+ r_func = nil
11
+ case blk.command
12
+ when /^(\w+)$/
13
+ method_to_create_r_function = "create_r_func_for_" + $1.downcase
14
+ if respond_to? method_to_create_r_function
15
+ r_func = send( method_to_create_r_function , blk.opts)
16
+ else
17
+ raise "#{method_to_create_r_function} cannot be found. Unknown top level command: #{blk.command}"
18
+ end
19
+ else
20
+ raise "Invalid TOPLEVEL command name: " + blk.command
21
+ end
22
+ puts "The following options are not used in " + blk.command + " : " + blk.opts.keys.join(" ") if ! blk.opts.empty?
23
+ return r_func
24
+ end
25
+ end
26
+
27
+ module DataBlockToR
28
+ include BlockToRSupport
29
+ @datasailr_library_loaded = false
30
+
31
+ def self.load_datasailr_library
32
+ if ! @datasailr_library_loaded
33
+ begin
34
+ lib_func1 = RBridge.create_library_function("datasailr")
35
+ result = RBridge.exec_function(lib_func1)
36
+ @datasailr_library_loaded = true
37
+ rescue => err
38
+ puts "ERROR: 'datasailr' package cannot be found in the following R library paths."
39
+ libpath = RBridge.create_function_call(".libPaths", {})
40
+ RBridge.exec_function_no_return( RBridge.create_function_call("print", {"x" => libpath}))
41
+ puts "Please make sure that the package is installed in one of the libraries."
42
+ err.set_backtrace([])
43
+ raise err, "DATA block evaluation failed"
44
+ end
45
+ end
46
+ end
47
+
48
+ def self.create_function( blk )
49
+ load_datasailr_library()
50
+
51
+ out_ds = blk.out
52
+ if set_ds = blk.opts["set"]
53
+ else
54
+ raise "DATA block requires set= option for input dataset"
55
+ end
56
+
57
+ puts "Processing data [input:#{set_ds.to_s} ouput:#{out_ds.to_s}]"
58
+
59
+ ds_script = blk.script
60
+
61
+ datasailr_func = RBridge.create_function_call( "sail" , {"df" => set_ds.to_r_symbol, "code" => RBridge.create_strvec([ds_script])} )
62
+ r_func = RBridge.create_assign_function( out_ds.to_s , datasailr_func )
63
+ end
64
+ end
65
+
66
+
67
+ require_relative("sts_lazy_func_gen.rb")
68
+ require_relative("proc_setting_support/proc_setting_module.rb")
69
+
70
+ module ProcBlockToR
71
+ include BlockToRSupport
72
+ def self.create_lazy_funcs( blk , proc_setting_manager )
73
+ proc_command = blk.command
74
+ param_manager = RBridge::RParamManager.new( blk.opts )
75
+ proc_stmts = blk.stmts
76
+
77
+ if ! proc_setting_manager.is_loaded?( proc_command )
78
+ proc_setting_manager.load_setting( proc_command )
79
+ p "#{proc_command} setting is loaded"
80
+ end
81
+
82
+ lzf_gen = LazyFuncGenerator.new
83
+ lzf_gen.extend(Object.const_get("Proc"+proc_command.capitalize))
84
+
85
+ validator = lzf_gen.validator
86
+ if ! validator.nil?
87
+ validator.check_and_modify( param_manager )
88
+ end
89
+
90
+ proc_lazy_funcs_with_print_result_opts = proc_stmts.map(){|proc_stmt|
91
+ lzf_gen.gen_lazy_func( proc_command, proc_stmt, param_manager )
92
+ }
93
+ return proc_lazy_funcs_with_print_result_opts
94
+ end
95
+ end
96
+
97
+
98
+
@@ -0,0 +1,236 @@
1
+ require "r_bridge"
2
+
3
+ module LazyFuncGeneratorSettingUtility
4
+ def read_as_formula(ary)
5
+ return RBridge.create_formula_from_syms( ary )
6
+ end
7
+
8
+ def read_as_strvec(ary)
9
+ return RBridge.create_strvec( ary.map(){|elem| elem.to_s } )
10
+ end
11
+
12
+ def read_as_one_str(ary)
13
+ return RBridge.create_strvec( [ ary.map(){|elem| elem.to_s }.join(" ") ] )
14
+ end
15
+
16
+ def read_as_numvec(ary)
17
+ if ary.any?(){|elem| elem.is_a?(Float) }
18
+ read_as_realvec(ary)
19
+ else
20
+ read_as_intvec(ary)
21
+ end
22
+ end
23
+
24
+ def read_as_intvec(ary)
25
+ return RBridge.create_intvec( ary )
26
+ end
27
+
28
+ def read_as_realvec(ary)
29
+ return RBridge.create_realvec( ary )
30
+ end
31
+
32
+ def read_as_symbol(ary)
33
+ raise "main argument is expected to be length of 1" unless ary.length == 1
34
+ raise "symbol is expected" unless ary[0].is_a?( RBridge::SymbolR )
35
+ return ary[0].to_r_symbol
36
+ end
37
+
38
+ def read_symbols_as_strvec(ary)
39
+ raise "symbol is expected as an element" unless ary.all?{|elem| elem.is_a?( RBridge::SymbolR )}
40
+ return RBridge.create_strvec( ary.map(){|elem| elem.to_s } )
41
+ end
42
+
43
+ def result( name , *addl )
44
+ if addl.empty?
45
+ return RBridge::RResultName.new(name)
46
+ else
47
+ ary = ([name] + addl).map(){|elem|
48
+ RBridge::RResultName.new(elem)
49
+ }
50
+ return RBridge::RResultNameArray.new(ary)
51
+ end
52
+ end
53
+
54
+ def param( name )
55
+ return RBridge::RParamName.new(name)
56
+ end
57
+
58
+ def previous_or( default_obj )
59
+ return RBridge::RResultPrevious.new( default_obj )
60
+ end
61
+
62
+ def r_obj( val )
63
+ return RBridge.convert_to_r_object( val )
64
+ end
65
+
66
+ def is_result_name?( val )
67
+ val.is_a? RBridge::RResultName
68
+ end
69
+
70
+ def is_result_name_array?( val )
71
+ val.is_a? RBridge::RResultNameArray
72
+ end
73
+
74
+ def is_param_name?( val )
75
+ val.is_a? RBridge::RParamName
76
+ end
77
+
78
+ def is_r_obj?(val)
79
+ RBridge.is_pointer?(val)
80
+ end
81
+
82
+ def one_from( name, *addl )
83
+ if addl.empty?
84
+ if ! ( is_result_name?(name) || is_result_name_array?(name) || is_param_name?(name) || is_pointer?(name) )
85
+ raise "one_from() create RBridge::RNameContainer, which only stores RResultName, RParamName or R object(pointer)."
86
+ end
87
+ return name
88
+ else
89
+ ary = ([name] + addl).map(){|elem|
90
+ elem
91
+ }
92
+ return RBridge::RNameContainer.new(ary)
93
+ end
94
+ end
95
+
96
+ end
97
+
98
+ class LazyFuncGeneratorSetting
99
+ include LazyFuncGeneratorSettingUtility
100
+
101
+ attr_accessor :libname, :envname, :func_name, :main_arg_and_how_to_treat, :runtime_args, :store_result, :print_opt, :plot_opt
102
+ def initialize
103
+ @libname = nil
104
+ @envname = nil
105
+ @func_name = nil
106
+
107
+ @main_arg_and_how_to_treat = nil
108
+ @runtime_args = nil
109
+
110
+ @store_result = true
111
+ @print_opt = nil
112
+ @plot_opt = nil
113
+ end
114
+
115
+ def create_func_arg_hash( main_arg, opt_args )
116
+ if ! @main_arg_and_how_to_treat.nil?
117
+ main_arg_name, how_to_treat, allow_nil = @main_arg_and_how_to_treat
118
+ else
119
+ main_arg_name, how_to_treat, allow_nil = [nil, nil, nil]
120
+ end
121
+ runtime_args = @runtime_args
122
+
123
+ if( ! main_arg_name.nil? )
124
+ if( ! main_arg.empty? )
125
+ if( how_to_treat.to_s =~ /^read/ )
126
+ r_main_arg_hash = {main_arg_name => self.send( how_to_treat, main_arg) }
127
+ else
128
+ raise "String element that specifies how_to_treat should start from 'read_'"
129
+ end
130
+ elsif allow_nil.to_s == "allow_nil"
131
+ r_main_arg_hash = {}
132
+ else
133
+ raise "main_arg needs needs to be specified or setting.main_arg_and_how_to_treat needs to allow nil"
134
+ end
135
+ else
136
+ r_main_arg_hash = {}
137
+ end
138
+
139
+ if( ! opt_args.nil? )
140
+ r_opt_arg_hash = convert_args_to_r_args( opt_args )
141
+ else
142
+ r_opt_arg_hash = {}
143
+ end
144
+
145
+ if( !runtime_args.nil?)
146
+ raise "rumtime_args needs to be Hash" if ! runtime_args.is_a?( Hash )
147
+ else
148
+ runtime_args = {}
149
+ end
150
+
151
+ r_args = {}
152
+ r_args.merge!( runtime_args )
153
+ r_args.merge!( r_opt_arg_hash )
154
+ r_args.merge!( r_main_arg_hash )
155
+ # The latter hashes have higher priority
156
+
157
+ return r_args
158
+ end
159
+
160
+ private
161
+ def convert_args_to_r_args( hash )
162
+ r_hash = {}
163
+ hash.each(){|key, value|
164
+ if value.is_a?(Hash) && value["type"] == :func
165
+ case value["fname"]
166
+ when "param"
167
+ raise "function for " + value["fname"] + " should have one argument." if value["fargs"].size > 1
168
+ r_hash[key] = param(value["fargs"][0].to_s)
169
+ when "result"
170
+ raise "function for " + value["fname"] + " should have one argument." if value["fargs"].size > 1
171
+ r_hash[key] = result(value["fargs"][0].to_s)
172
+ else
173
+ raise "unknown function name in optional argument: " + value.fname
174
+ end
175
+ elsif value.is_a?(Array)
176
+ r_hash[key] = RBridge::convert_to_r_object(value)
177
+ else
178
+ r_hash[key] = RBridge::convert_to_r_object(value)
179
+ end
180
+ }
181
+ return r_hash
182
+ end
183
+ end
184
+
185
+
186
+ class LazyFuncGenerator
187
+ include LazyFuncGeneratorSettingUtility
188
+ SETTING_FOR_PREFIX = "setting_for_"
189
+
190
+ def gen_lazy_func( command, proc_stmt , param_manager)
191
+
192
+ inst = proc_stmt[0]
193
+ main_arg = proc_stmt[1]
194
+ opt_args = proc_stmt[2]
195
+
196
+ underscored_inst = inst.gsub(/\./, "_")
197
+
198
+ setting = LazyFuncGeneratorSetting.new()
199
+ if respond_to?( SETTING_FOR_PREFIX + underscored_inst )
200
+ send( SETTING_FOR_PREFIX + underscored_inst , setting )
201
+ else
202
+ raise "method for this instruction(#{inst})" "is not defined: " + SETTING_FOR_PREFIX + underscored_inst
203
+ end
204
+ libname = setting.libname
205
+ envname = setting.envname
206
+ func_name = setting.func_name
207
+ func_hash = setting.create_func_arg_hash( main_arg, opt_args )
208
+ result_name = inst
209
+
210
+ store_result = setting.store_result
211
+ print_opt = setting.print_opt
212
+ plot_opt = setting.plot_opt
213
+
214
+ if libname.nil? || libname == ""
215
+ libname = nil
216
+ end
217
+ if envname.nil? || libname == ""
218
+ envname = nil
219
+ end
220
+
221
+ if ! libname.nil?
222
+ lazy_func = RBridge::create_ns_lazy_function( libname, func_name, func_hash, param_manager)
223
+ else
224
+ if ! envname.nil?
225
+ lazy_func = RBridge::create_env_lazy_function( envname, func_name, func_hash, param_manager)
226
+ else
227
+ lazy_func = RBridge::create_lazy_function( func_name, func_hash, param_manager)
228
+ end
229
+ end
230
+
231
+ return [ lazy_func, print_opt, plot_opt, store_result, result_name ]
232
+ end
233
+
234
+ end
235
+
236
+