statsailr_procs_base 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/Gemfile +10 -0
  4. data/LICENSE.txt +675 -0
  5. data/README.md +195 -0
  6. data/Rakefile +12 -0
  7. data/bin/console +15 -0
  8. data/bin/setup +8 -0
  9. data/lib/statsailr_procs_base.rb +16 -0
  10. data/lib/statsailr_procs_base/check_statsailr_version.rb +18 -0
  11. data/lib/statsailr_procs_base/path.rb +7 -0
  12. data/lib/statsailr_procs_base/proc_setting/common_utility.R +16 -0
  13. data/lib/statsailr_procs_base/proc_setting/proc_cat.R +65 -0
  14. data/lib/statsailr_procs_base/proc_setting/proc_cat.rb +44 -0
  15. data/lib/statsailr_procs_base/proc_setting/proc_common/dev_copy.R +6 -0
  16. data/lib/statsailr_procs_base/proc_setting/proc_common/dev_copy.rb +15 -0
  17. data/lib/statsailr_procs_base/proc_setting/proc_common/factor.R +7 -0
  18. data/lib/statsailr_procs_base/proc_setting/proc_common/factor.rb +15 -0
  19. data/lib/statsailr_procs_base/proc_setting/proc_common/numeric.R +7 -0
  20. data/lib/statsailr_procs_base/proc_setting/proc_common/numeric.rb +15 -0
  21. data/lib/statsailr_procs_base/proc_setting/proc_mult.R +12 -0
  22. data/lib/statsailr_procs_base/proc_setting/proc_mult.rb +35 -0
  23. data/lib/statsailr_procs_base/proc_setting/proc_plot.R +52 -0
  24. data/lib/statsailr_procs_base/proc_setting/proc_plot.rb +54 -0
  25. data/lib/statsailr_procs_base/proc_setting/proc_print.R +60 -0
  26. data/lib/statsailr_procs_base/proc_setting/proc_print.rb +44 -0
  27. data/lib/statsailr_procs_base/proc_setting/proc_reg.rb +17 -0
  28. data/lib/statsailr_procs_base/proc_setting/proc_two.R +33 -0
  29. data/lib/statsailr_procs_base/proc_setting/proc_two.rb +38 -0
  30. data/lib/statsailr_procs_base/proc_setting/proc_uni.R +105 -0
  31. data/lib/statsailr_procs_base/proc_setting/proc_uni.rb +27 -0
  32. data/lib/statsailr_procs_base/version.rb +7 -0
  33. data/statsailr_procs_base.gemspec +37 -0
  34. metadata +80 -0
@@ -0,0 +1,15 @@
1
+ module FactorSetting
2
+ include ProcSettingModule
3
+ source_r_file(__dir__, File.basename(__FILE__ , ".rb") + ".R")
4
+
5
+ def setting_for_factor( setting )
6
+ setting.libname = nil
7
+ setting.envname = "sts_factor"
8
+ setting.func_name = "convert_to_factor"
9
+ setting.main_arg_and_how_to_treat = ["vars", :read_symbols_as_strvec, :no_nil ]
10
+ setting.runtime_args = {"data" => one_from( result("factor", "numeric"), param("data")) }
11
+ setting.store_result = true
12
+ setting.print_opt = false
13
+ end
14
+ end
15
+
@@ -0,0 +1,7 @@
1
+ sts_numeric = new.env()
2
+
3
+ sts_numeric$convert_to_numeric = function( data, vars ){
4
+ data[vars] = lapply(data[vars], as.numeric)
5
+ print( paste( "as.numeric is applied to", paste( vars, collapse=",") , sep=" ") )
6
+ return( data )
7
+ }
@@ -0,0 +1,15 @@
1
+ module NumericSetting
2
+ include ProcSettingModule
3
+ source_r_file(__dir__, File.basename(__FILE__ , ".rb") + ".R")
4
+
5
+ def setting_for_factor( setting )
6
+ setting.libname = nil
7
+ setting.envname = "sts_numeric"
8
+ setting.func_name = "convert_to_numeric"
9
+ setting.main_arg_and_how_to_treat = ["vars", :read_symbols_as_strvec, :no_nil ]
10
+ setting.runtime_args = {"data" => one_from( result("factor", "numeric"), param("data")) }
11
+ setting.store_result = true
12
+ setting.print_opt = false
13
+ end
14
+ end
15
+
@@ -0,0 +1,12 @@
1
+ sts_mult = new.env()
2
+
3
+ sts_mult$p_adjust = function( x , method ){
4
+ ori_p = summary(x)[[1]][['Pr(>F)']]
5
+ result = p.adjust( ori_p, method )
6
+
7
+ cat( "p adjustment\n" )
8
+ cat( paste( "Method:", method, "\n", sep=" ", collapse=" " ) )
9
+ return(result)
10
+ }
11
+
12
+
@@ -0,0 +1,35 @@
1
+ module ProcMult
2
+ include ProcSettingModule
3
+
4
+ source_r_file(__dir__, File.basename(__FILE__ , ".rb") + ".R")
5
+ validate_option("data", is_a: ["SymbolR", "String"], as: "SymbolR" , required: true)
6
+
7
+ def setting_for_aov( setting )
8
+ setting.libname = "stats"
9
+ setting.func_name = "aov"
10
+ setting.main_arg_and_how_to_treat = [ "formula" , :read_as_formula, :no_nil]
11
+ setting.runtime_args = {"data" => param("data")}
12
+ setting.store_result = true
13
+ setting.print_opt = "summary"
14
+ end
15
+
16
+ def setting_for_tukey( setting )
17
+ setting.libname = "stats"
18
+ setting.func_name = "TukeyHSD"
19
+ setting.main_arg_and_how_to_treat = nil
20
+ setting.runtime_args = {"x" => result("aov")}
21
+ setting.store_result = false
22
+ setting.print_opt = true
23
+ end
24
+
25
+ def setting_for_p_adjust( setting )
26
+ setting.libname = nil
27
+ setting.envname = "sts_mult"
28
+ setting.func_name = "p_adjust"
29
+ setting.main_arg_and_how_to_treat = [ "method" , :read_as_strvec, :no_nil]
30
+ setting.runtime_args = {"x" => result("aov")}
31
+ setting.store_result = true
32
+ setting.print_opt = true
33
+ end
34
+ end
35
+
@@ -0,0 +1,52 @@
1
+ sts_plot = new.env()
2
+
3
+ sts_plot$legend = function( legend , ... ){
4
+ if( (! is.character(legend)) ){
5
+ stop("legend argument requires character vector")
6
+ }
7
+
8
+ args = list(...)
9
+ args$legend = legend
10
+ args$x = if_exist_else ( "x", args, paste( "topleft" ))
11
+
12
+ do.call( get_pkg_fun( "graphics::legend") , args )
13
+ }
14
+
15
+ sts_plot$hist = function( data , var , ... ){
16
+ if( (! is.character(var)) || length(var) != 1 ){
17
+ stop("vars argument requires character vector with size of 1")
18
+ }
19
+
20
+ args = list(...)
21
+ args$x = data[[var]]
22
+ args$main = if_exist_else ( "main", args, paste( "Frequency of" , var ))
23
+ args$xlab = if_exist_else ( "xlab", args, paste( var ))
24
+
25
+ do.call( get_pkg_fun( "graphics::hist") , args )
26
+ }
27
+
28
+ sts_plot$box = function( data , var , ... ){
29
+ if( (! is.character(var)) || length(var) != 1 ){
30
+ stop("vars argument requires character vector with size of 1")
31
+ }
32
+
33
+ args = list(...)
34
+ args$x = data[[var]]
35
+ args$main = if_exist_else ( "main", args, paste( var ))
36
+ args$lex.order = TRUE
37
+
38
+ do.call( get_pkg_fun( "graphics::boxplot"), args )
39
+ }
40
+
41
+ sts_plot$scatter = function( data , vars , ... ){
42
+ if( (! is.character(vars)) || length(vars) != 2 ){
43
+ stop("vars argument requires character vector with size of 2")
44
+ }
45
+
46
+ args = list(...)
47
+ args$x = data[vars]
48
+ args$main = if_exist_else ( "main", args, paste( vars[1], "vs", vars[2] ))
49
+
50
+ do.call( get_pkg_fun( "graphics::plot"), args )
51
+ }
52
+
@@ -0,0 +1,54 @@
1
+ module ProcPlot
2
+ include ProcSettingModule
3
+ add_setting_from( __dir__, "proc_common/dev_copy.rb" )
4
+
5
+ source_r_file( __dir__, "common_utility.R" )
6
+ source_r_file( __dir__, File.basename(__FILE__ , ".rb") + ".R")
7
+ validate_option("data", is_a: ["SymbolR", "String"], as: "SymbolR" , required: true)
8
+
9
+ def setting_for_legend( setting )
10
+ setting.libname = nil
11
+ setting.envname = "sts_plot"
12
+ setting.func_name = "legend"
13
+ setting.main_arg_and_how_to_treat = [ "legend" , :read_as_strvec, :no_nil]
14
+ setting.runtime_args = nil
15
+ setting.store_result = false
16
+ setting.print_opt = false
17
+ setting.plot_opt = false
18
+ end
19
+
20
+ def setting_for_hist( setting )
21
+ setting.libname = nil
22
+ setting.envname = "sts_plot"
23
+ setting.func_name = "hist"
24
+ setting.main_arg_and_how_to_treat = [ "var" , :read_as_strvec, :no_nil]
25
+ setting.runtime_args = {"data" => param("data") }
26
+ setting.store_result = false
27
+ setting.print_opt = false
28
+ setting.plot_opt = true
29
+ end
30
+
31
+ def setting_for_box( setting )
32
+ setting.libname = nil
33
+ setting.envname = "sts_plot"
34
+ setting.func_name = "box"
35
+ setting.main_arg_and_how_to_treat = [ "var" , :read_as_strvec, :no_nil]
36
+ setting.runtime_args = {"data" => param("data")}
37
+ setting.store_result = false
38
+ setting.print_opt = false
39
+ setting.plot_opt = true
40
+ end
41
+
42
+ def setting_for_scatter( setting )
43
+ setting.libname = nil
44
+ setting.envname = "sts_plot"
45
+ setting.func_name = "scatter"
46
+ setting.main_arg_and_how_to_treat = [ "vars" , :read_as_strvec, :no_nil]
47
+ setting.runtime_args = {"data" => param("data")}
48
+ setting.store_result = false
49
+ setting.print_opt = false
50
+ setting.plot_opt = true
51
+ end
52
+
53
+ end
54
+
@@ -0,0 +1,60 @@
1
+ sts_print = new.env()
2
+
3
+ sts_print$nth = function( x , positions ){
4
+
5
+ # positions are passed as strvec which is already splitted by spaces and signs
6
+ # group these elements
7
+
8
+ shift_vec = function( x, n , fill=NA ){
9
+ if( n == 0 ){
10
+ return( x )
11
+ }else if( n > 0 ){
12
+ return( c( rep(fill, n), head(x, -n)) )
13
+ }else if( n < 0 ){
14
+ return( c( tail(x, length(x) + n), rep(fill, -n)) )
15
+ }else{
16
+ print("inappropriate n.")
17
+ return(x)
18
+ }
19
+ }
20
+
21
+ colon_pos = (positions == ":")
22
+ colon_after_pos = shift_vec( colon_pos , 1, F )
23
+ group_num = cumsum( !( colon_pos | colon_after_pos) )
24
+
25
+ position_strvec = sapply( split(positions, group_num) , function(elem){ paste( elem, collapse="" )} )
26
+ names( position_strvec) = position_strvecp
27
+
28
+ # Convert them to a list of int vectors
29
+
30
+ position_list = lapply(position_strvec, function(elem){
31
+ range_sep = ":"
32
+ if( grepl( range_sep, elem, fixed = TRUE) ){ # grepl(pattern, x)
33
+ range = strsplit(elem, range_sep, fixed=TRUE)[[1]]
34
+ if( length(range) != 2){
35
+ print("Range should be specified with x:y form (meaning from x to y).")
36
+ return( 0 )
37
+ }else{
38
+ range_int = strtoi(range)
39
+ return( seq(range_int[1], range_int[2]) )
40
+ }
41
+ }else{
42
+ return( strtoi(elem))
43
+ }
44
+ })
45
+
46
+ # For each int vector, print corresponding rows
47
+
48
+ lapply( position_list, function(nth){
49
+ x[ nth , ]
50
+ })
51
+
52
+ }
53
+
54
+ sts_print$random = function( x , n ){
55
+ row_num = nrow(x)
56
+ index = ceiling(row_num * runif(n))
57
+ sorted_index = sort(index)
58
+ x[ sorted_index , ]
59
+ }
60
+
@@ -0,0 +1,44 @@
1
+ module ProcPrint
2
+ include ProcSettingModule
3
+
4
+ source_r_file(__dir__, File.basename(__FILE__ , ".rb") + ".R")
5
+ validate_option("data", is_a: ["SymbolR", "String"], as: "SymbolR" , required: true)
6
+
7
+ def setting_for_head( setting )
8
+ setting.libname = "utils"
9
+ setting.func_name = "head"
10
+ setting.main_arg_and_how_to_treat = [ "n", :read_as_intvec, :allow_nil ]
11
+ setting.runtime_args = { "x" => param("data")}
12
+ setting.store_result = true
13
+ setting.print_opt = true
14
+ end
15
+
16
+ def setting_for_tail( setting )
17
+ setting.libname = "utils"
18
+ setting.func_name = "tail"
19
+ setting.main_arg_and_how_to_treat = [ "n", :read_as_intvec, :allow_nil ]
20
+ setting.runtime_args = { "x" => param("data") }
21
+ setting.store_result = true
22
+ setting.print_opt = true
23
+ end
24
+
25
+ def setting_for_nth( setting )
26
+ setting.libname = nil
27
+ setting.envname = "sts_print"
28
+ setting.func_name = "nth"
29
+ setting.main_arg_and_how_to_treat = [ "positions", :read_as_strvec, :allow_nil ]
30
+ setting.runtime_args = { "x" => param("data") }
31
+ setting.store_result = true
32
+ setting.print_opt = true
33
+ end
34
+
35
+ def setting_for_random( setting )
36
+ setting.libname = nil
37
+ setting.envname = "sts_print"
38
+ setting.func_name = "random"
39
+ setting.main_arg_and_how_to_treat = [ "n", :read_as_intvec, :allow_nil ]
40
+ setting.runtime_args = { "x" => param("data") }
41
+ setting.store_result = true
42
+ setting.print_opt = true
43
+ end
44
+ end
@@ -0,0 +1,17 @@
1
+ module ProcReg
2
+ include ProcSettingModule
3
+
4
+ add_setting_from( __dir__, "proc_common/factor.rb" )
5
+ validate_option("data", is_a: ["SymbolR", "String"], as: "SymbolR" , required: true)
6
+
7
+ def setting_for_lm( setting )
8
+ setting.libname = "stats"
9
+ setting.func_name = "lm"
10
+ setting.main_arg_and_how_to_treat = ["formula", :read_as_formula, :no_nil ]
11
+ setting.runtime_args = {"data" => one_from( result("factor", "numeric"), param("data")) }
12
+ setting.store_result = true
13
+ setting.print_opt = "summary"
14
+ end
15
+ end
16
+
17
+
@@ -0,0 +1,33 @@
1
+ sts_two = new.env()
2
+
3
+ sts_two$t_test = function( data , vars , ... ){
4
+ if( (! is.character(vars)) || length(vars) != 2 ){
5
+ stop("vars argument requires character vector with size of 2")
6
+ }
7
+ x = vars[1]
8
+ y = vars[2]
9
+ result = t.test( data[[x]], data[[y]], ... )
10
+ return(result)
11
+ }
12
+
13
+ sts_two$paired = function( data , vars , ... ){
14
+ if( (! is.character(vars)) || length(vars) != 2 ){
15
+ stop("vars argument requires character vector with size of 2")
16
+ }
17
+ x = vars[1]
18
+ y = vars[2]
19
+ result = t.test( data[[x]], data[[y]], paired = TRUE, ... )
20
+ return(result)
21
+ }
22
+
23
+ sts_two$wilcox_test = function( data , vars , ... ){
24
+ if( (! is.character(vars)) || length(vars) != 2 ){
25
+ stop("vars argument requires character vector with size of 2")
26
+ }
27
+ x = vars[1]
28
+ y = vars[2]
29
+ result = wilcox.test( data[[x]], data[[y]], ... )
30
+ return(result)
31
+ }
32
+
33
+
@@ -0,0 +1,38 @@
1
+ module ProcTwo
2
+ include ProcSettingModule
3
+
4
+ source_r_file(__dir__, File.basename(__FILE__ , ".rb") + ".R")
5
+ validate_option("data", is_a: ["SymbolR", "String"], as: "SymbolR" , required: true)
6
+
7
+ def setting_for_t_test( setting )
8
+ setting.libname = nil
9
+ setting.envname = "sts_two"
10
+ setting.func_name = "t_test"
11
+ setting.main_arg_and_how_to_treat = [ "vars" , :read_as_strvec, :no_nil]
12
+ setting.runtime_args = {"data" => param("data")}
13
+ setting.store_result = true
14
+ setting.print_opt = true
15
+ end
16
+
17
+ def setting_for_paired( setting )
18
+ setting.libname = nil
19
+ setting.envname = "sts_two"
20
+ setting.func_name = "paired"
21
+ setting.main_arg_and_how_to_treat = [ "vars" , :read_as_strvec, :no_nil]
22
+ setting.runtime_args = {"data" => param("data")}
23
+ setting.store_result = true
24
+ setting.print_opt = true
25
+ end
26
+
27
+ def setting_for_wilcox_test( setting )
28
+ setting.libname = nil
29
+ setting.envname = "sts_two"
30
+ setting.func_name = "wilcox_test"
31
+ setting.main_arg_and_how_to_treat = [ "vars" , :read_as_strvec, :no_nil]
32
+ setting.runtime_args = {"data" => param("data")}
33
+ setting.store_result = true
34
+ setting.print_opt = true
35
+ end
36
+
37
+ end
38
+
@@ -0,0 +1,105 @@
1
+ sts_uni = new.env()
2
+
3
+ sts_uni$list2str = function ( lst, exclude=c() ){
4
+ str = ""
5
+ for( i in seq(1,length(lst))){
6
+ if (! names(lst)[i] %in% exclude ){
7
+ key = names(lst)[i]
8
+ value = lst[[i]]
9
+
10
+ if( length( value ) == 1 ){
11
+ str = paste0( str, key, "\t", value, "\n")
12
+ }else if( is.null( attr( value, "names"))){
13
+ str = paste0( str, key, "\t", paste( value , collapse=" ") , "\n")
14
+ }else{
15
+ str = paste0( str, key, "\n" )
16
+ str = paste0( str, paste( mapply( paste0, "\t", names(value), "\t" , value), collapse="\n") , "\n")
17
+ }
18
+ }
19
+ }
20
+ return( str )
21
+ }
22
+
23
+
24
+ sts_uni$var = function( data , vars , hist = FALSE , ... ){
25
+ if( (! is.character(vars)) || length(vars) == 0 ){
26
+ stop("vars argument requires character vector")
27
+ }
28
+ check_name_existance = vars %in% colnames(data)
29
+ if( ! all(check_name_existance) ){
30
+ print( paste( check_name_existance , "\n") )
31
+ stop("vars argument should be colnames of data")
32
+ }
33
+
34
+ results = list()
35
+ df = data
36
+
37
+ for( var in vars){
38
+ result = list()
39
+ result$vec = df[[ var ]]
40
+
41
+ result$mean = mean( df[[ var ]] )
42
+ result$N = length( df[[ var ]] )
43
+ missing_pos = is.na(df[[ var ]])
44
+
45
+ result$missing = sum( missing_pos )
46
+ result$n = sum( ! missing_pos )
47
+
48
+ non_missing = df[[ var ]][! missing_pos]
49
+
50
+ quantiles = quantile(non_missing, probs = c(1, 0.99, 0.95, 0.90, 0.75, 0.5, 0.25, 0.1, 0.05, 0.01, 0), na.rm = TRUE)
51
+ third_quart = quantiles[5]
52
+ first_quart = quantiles[7]
53
+ result$max = head(quantiles, 1)
54
+ result$min = tail(quantiles, 1)
55
+
56
+ result$mean = mean(non_missing)
57
+ result$deviation = sd(non_missing)
58
+ result$median = median(non_missing)
59
+ result$IQR = c( first_quart ,third_quart )
60
+ result$quantiles = quantiles
61
+
62
+ # To be implemented
63
+ # skewness
64
+ # kurtosis
65
+
66
+ results[[var]] = result
67
+ }
68
+
69
+ result_str = ""
70
+ result_num = length(results)
71
+ for( i in seq(1, result_num)){
72
+ result_str = paste0( result_str, names(results)[i], " ", "statistics\n")
73
+ result_str = paste0( result_str, sts_uni$list2str(results[[i]], exclude=c("vec")), "\n")
74
+ }
75
+
76
+ # output
77
+ if( hist ){
78
+ layout( matrix(seq(1, result_num), ncol = 1))
79
+ for( i in seq(1,result_num)){
80
+ hist( results[[ i ]]$vec , main = paste( "Histogram of" , names(results)[i] ), xlab=names(results)[i] )
81
+ }
82
+ layout( matrix(c(1), ncol = 1))
83
+ }
84
+
85
+ cat( result_str ) # output is done using cat()
86
+ return(results) # return value to be used by other instructions is results
87
+ }
88
+
89
+
90
+ sts_uni$qqplot = function( results , var = NULL , qqline = FALSE, ... ){
91
+ if ( (! is.null(var)) && (length(var) != 1) ){
92
+ stop("main argument needs to be length of 1 character vector")
93
+ }
94
+ if( is.null(var) ){
95
+ vec = results[[1]]$vec
96
+ }else{
97
+ vec = results[[var]]$vec
98
+ }
99
+ qqnorm(vec)
100
+ if( qqline ){
101
+ qqline(vec)
102
+ }
103
+ }
104
+
105
+