raka 0.3.1 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +343 -119
- data/VERSION +1 -1
- data/bin/raka +37 -12
- data/lib/{compile.rb → raka/compile.rb} +10 -4
- data/lib/{interface.rbs → raka/interface.rbs} +0 -0
- data/lib/{lang → raka/lang}/psql/impl.rb +0 -0
- data/lib/{lang → raka/lang}/python/impl.rb +0 -0
- data/lib/raka/lang/r/impl.rb +23 -0
- data/lib/{lang → raka/lang}/shell/impl.rb +0 -0
- data/lib/{output_type.rb → raka/output_type.rb} +0 -0
- data/lib/{protocol.rb → raka/protocol.rb} +0 -0
- data/lib/{token.rb → raka/token.rb} +0 -0
- data/lib/raka.rb +9 -4
- metadata +13 -14
- data/lib/lang/r/impl.rb +0 -38
- data/lib/lang/r/io.R +0 -113
- data/lib/temp.json +0 -9167
data/bin/raka
CHANGED
@@ -17,8 +17,6 @@ def detect_main
|
|
17
17
|
return rakas[0] if rakas.length == 1
|
18
18
|
end
|
19
19
|
|
20
|
-
entry = detect_main
|
21
|
-
|
22
20
|
options = { rake: {}, raka_finished: false }
|
23
21
|
def set_option(opts, key, value)
|
24
22
|
if opts[:raka_finished]
|
@@ -28,17 +26,31 @@ def set_option(opts, key, value)
|
|
28
26
|
end
|
29
27
|
end
|
30
28
|
parser = OptionParser.new do |opts|
|
31
|
-
opts.banner = 'Usage: raka [options]
|
29
|
+
opts.banner = 'Usage: raka [options] <output> -- [rake options]'
|
32
30
|
|
33
31
|
opts.on('-v', '--[no-]verbose', 'Run verbosely') do |v|
|
34
32
|
set_option(options, :verbose, v)
|
35
33
|
end
|
34
|
+
|
35
|
+
opts.on('-f', '--file FILE', String, 'Run even when up to date') do |s|
|
36
|
+
set_option(options, :file, s)
|
37
|
+
end
|
38
|
+
|
39
|
+
opts.on('-j', '--jobs JOBS', Integer, 'Run in parallel') do |n|
|
40
|
+
set_option(options, :jobs, n)
|
41
|
+
end
|
36
42
|
end
|
37
43
|
|
44
|
+
if ARGV.empty?
|
45
|
+
puts parser.help
|
46
|
+
exit(1)
|
47
|
+
end
|
38
48
|
both_args = ARGV.join(' ').split(' -- ')
|
39
|
-
extra_args = both_args[1]
|
40
49
|
self_args = both_args[0].split(/\s+/)
|
41
50
|
parser.parse!(self_args)
|
51
|
+
extra_args = (both_args[1] || ' ').lstrip
|
52
|
+
|
53
|
+
entry = options[:file] || detect_main
|
42
54
|
|
43
55
|
env = if options[:verbose]
|
44
56
|
'LOG_LEVEL=0 '
|
@@ -46,13 +58,26 @@ env = if options[:verbose]
|
|
46
58
|
''
|
47
59
|
end
|
48
60
|
targets = self_args.join(' ')
|
49
|
-
cmd =
|
61
|
+
cmd = ''
|
62
|
+
opt_str = "-f #{entry}"
|
63
|
+
opt_str += " -m -j #{options[:jobs]}" if options.key?(:jobs)
|
64
|
+
cmd += "#{env}rake #{opt_str} #{extra_args} #{targets}"
|
50
65
|
puts cmd
|
51
|
-
output
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
puts 'Error: rake returns the following information:'
|
66
|
+
output = []
|
67
|
+
ro, out = IO.pipe
|
68
|
+
re, err = IO.pipe
|
69
|
+
pid = fork do
|
70
|
+
status = system(cmd, out: out, err: err)
|
71
|
+
puts 'Error: rake returns the following information:' unless status
|
72
|
+
exit($CHILD_STATUS.exitstatus)
|
57
73
|
end
|
58
|
-
|
74
|
+
out.close
|
75
|
+
err.close
|
76
|
+
ro.each_line do |l|
|
77
|
+
puts l
|
78
|
+
output << l.chomp
|
79
|
+
end
|
80
|
+
re.each_line { |l| puts l; }
|
81
|
+
|
82
|
+
Process.wait(pid)
|
83
|
+
puts 'All targets are up to date' if output.empty? && $CHILD_STATUS.exitstatus == 0
|
@@ -28,8 +28,7 @@ class DSLCompiler
|
|
28
28
|
# Raka task structure, input task is rake's task pushed into blocks
|
29
29
|
def dsl_task(token, task)
|
30
30
|
name = task.name
|
31
|
-
deps = task.prerequisites
|
32
|
-
|
31
|
+
deps = task.prerequisites
|
33
32
|
output_info = token._parse_output_ name
|
34
33
|
task_info = {
|
35
34
|
name: name,
|
@@ -41,6 +40,10 @@ class DSLCompiler
|
|
41
40
|
OpenStruct.new(output_info.to_h.merge(task_info))
|
42
41
|
end
|
43
42
|
|
43
|
+
def stem(path)
|
44
|
+
File.basename(path, File.extname(path))
|
45
|
+
end
|
46
|
+
|
44
47
|
# resolve auto variables with only output info,
|
45
48
|
# useful when resolve extra deps (task is not available yet)
|
46
49
|
def resolve_by_output(target, output_info)
|
@@ -50,8 +53,10 @@ class DSLCompiler
|
|
50
53
|
.gsub('$(scope)', info.scope.nil? ? '' : info.scope)
|
51
54
|
.gsub('$(target_scope)', info.target_scope.nil? ? '' : info.target_scope)
|
52
55
|
.gsub('$(output)', info.output)
|
53
|
-
.gsub('$(output_stem)', info.stem)
|
56
|
+
.gsub('$(output_stem)', stem(info.stem))
|
54
57
|
.gsub('$(input_stem)', info.input_stem.nil? ? '' : info.input_stem)
|
58
|
+
.gsub('$(func)', info.func.nil? ? '' : info.func)
|
59
|
+
.gsub('$(ext)', info.ext)
|
55
60
|
.gsub('$@', info.name)
|
56
61
|
|
57
62
|
protect_percent_symbol text do |safe_text|
|
@@ -75,7 +80,8 @@ class DSLCompiler
|
|
75
80
|
|
76
81
|
protect_percent_symbol text do |safe_text|
|
77
82
|
# add numbered auto variables like $0, $2 referring to the first and third deps
|
78
|
-
safe_text.gsub(/\$\(dep(\d+)\)/, '%{\1}') % array_to_hash(task.deps)
|
83
|
+
safe_text = safe_text.gsub(/\$\(dep(\d+)\)/, '%{\1}') % array_to_hash(task.deps)
|
84
|
+
safe_text.gsub(/\$\(dep(\d+)_stem\)/, '%{\1}') % array_to_hash(task.deps.map {|d| stem(d)})
|
79
85
|
end
|
80
86
|
end
|
81
87
|
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../protocol'
|
4
|
+
|
5
|
+
# r language protocol
|
6
|
+
class R
|
7
|
+
def initialize(libs = [], **kwargs)
|
8
|
+
@libs = libs
|
9
|
+
super(**kwargs)
|
10
|
+
end
|
11
|
+
|
12
|
+
def build(code, _)
|
13
|
+
libraries = @libs.map { |name| "suppressPackageStartupMessages(library(#{name}))" }
|
14
|
+
|
15
|
+
[libraries, code].join "\n"
|
16
|
+
end
|
17
|
+
|
18
|
+
def run_script(env, fname, _task)
|
19
|
+
env.send :sh, "Rscript #{fname}"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
creator :r, R
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/lib/raka.rb
CHANGED
@@ -2,13 +2,14 @@
|
|
2
2
|
|
3
3
|
require 'logger'
|
4
4
|
|
5
|
-
require_relative './compile'
|
6
|
-
require_relative './protocol'
|
7
|
-
require_relative './token'
|
5
|
+
require_relative './raka/compile'
|
6
|
+
require_relative './raka/protocol'
|
7
|
+
require_relative './raka/token'
|
8
8
|
|
9
9
|
# initialize raka
|
10
10
|
class Raka
|
11
11
|
Pattern = Pattern
|
12
|
+
P = Pattern
|
12
13
|
attr_reader :logger
|
13
14
|
|
14
15
|
def create_logger(level)
|
@@ -53,7 +54,7 @@ class Raka
|
|
53
54
|
@options.input_types |= @options.output_types # any output can be used as intermediate
|
54
55
|
# specify root of scopes in options, scopes will append to each root
|
55
56
|
@scopes = options.scopes.empty? ? [] : [options.scopes]
|
56
|
-
@options.lang.each { |path| load File::join(File::dirname(__FILE__), "
|
57
|
+
@options.lang.each { |path| load File::join(File::dirname(__FILE__), "raka/#{path}/impl.rb") }
|
57
58
|
@options.user_lang.each { |path| load path.to_s + '.rb' }
|
58
59
|
|
59
60
|
# These are where the dsl starts
|
@@ -67,4 +68,8 @@ class Raka
|
|
67
68
|
block.call
|
68
69
|
@scopes.pop
|
69
70
|
end
|
71
|
+
|
72
|
+
def stem(path)
|
73
|
+
File.basename(path, File.extname(path))
|
74
|
+
end
|
70
75
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: raka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yarray
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-05-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -111,7 +111,8 @@ dependencies:
|
|
111
111
|
description: An extensible, concise and light weight DSL on Rake to automate data
|
112
112
|
processing tasks
|
113
113
|
email: '08to09@gmail.com'
|
114
|
-
executables:
|
114
|
+
executables:
|
115
|
+
- raka
|
115
116
|
extensions: []
|
116
117
|
extra_rdoc_files:
|
117
118
|
- LICENSE
|
@@ -121,18 +122,16 @@ files:
|
|
121
122
|
- README.md
|
122
123
|
- VERSION
|
123
124
|
- bin/raka
|
124
|
-
- lib/compile.rb
|
125
|
-
- lib/interface.rbs
|
126
|
-
- lib/lang/psql/impl.rb
|
127
|
-
- lib/lang/python/impl.rb
|
128
|
-
- lib/lang/r/impl.rb
|
129
|
-
- lib/lang/r/io.R
|
130
|
-
- lib/lang/shell/impl.rb
|
131
|
-
- lib/output_type.rb
|
132
|
-
- lib/protocol.rb
|
133
125
|
- lib/raka.rb
|
134
|
-
- lib/
|
135
|
-
- lib/
|
126
|
+
- lib/raka/compile.rb
|
127
|
+
- lib/raka/interface.rbs
|
128
|
+
- lib/raka/lang/psql/impl.rb
|
129
|
+
- lib/raka/lang/python/impl.rb
|
130
|
+
- lib/raka/lang/r/impl.rb
|
131
|
+
- lib/raka/lang/shell/impl.rb
|
132
|
+
- lib/raka/output_type.rb
|
133
|
+
- lib/raka/protocol.rb
|
134
|
+
- lib/raka/token.rb
|
136
135
|
homepage: http://github.com/yarray/raka
|
137
136
|
licenses:
|
138
137
|
- MIT
|
data/lib/lang/r/impl.rb
DELETED
@@ -1,38 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative '../protocol'
|
4
|
-
|
5
|
-
# r language protocol
|
6
|
-
class R
|
7
|
-
def initialize(src, libs = [], **kwargs)
|
8
|
-
@src = src
|
9
|
-
@libs = libs
|
10
|
-
super(**kwargs)
|
11
|
-
end
|
12
|
-
|
13
|
-
def build(code, _)
|
14
|
-
libraries = ([
|
15
|
-
:pipeR
|
16
|
-
] + @libs).map { |name| "suppressPackageStartupMessages(library(#{name}))" }
|
17
|
-
|
18
|
-
sources = ["source('#{File.dirname(__FILE__)}/io.R')"] +
|
19
|
-
(@src ? [@src] : []).map { |name| "source('#{SRC_DIR}/#{name}.R')" }
|
20
|
-
|
21
|
-
extra = [
|
22
|
-
'`|` <- `%>>%`',
|
23
|
-
"conn_args <- list(host='#{HOST}', user='#{USER}', dbname='#{DB}', port='#{PORT}')",
|
24
|
-
'args <- commandArgs(trailingOnly = T)',
|
25
|
-
'sql_input <- init_sql_input(conn_args, args[1])',
|
26
|
-
'table_input <- init_table_input(conn_args, args[1])',
|
27
|
-
'table_output <- init_table_output(conn_args, args[1])'
|
28
|
-
]
|
29
|
-
|
30
|
-
[libraries, sources, extra, code].join "\n"
|
31
|
-
end
|
32
|
-
|
33
|
-
def run_script(env, fname, task)
|
34
|
-
env.send :sh, "Rscript #{fname} '#{task.scope || 'public'}'"
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
creator :r, R
|
data/lib/lang/r/io.R
DELETED
@@ -1,113 +0,0 @@
|
|
1
|
-
suppressPackageStartupMessages(library(RPostgreSQL))
|
2
|
-
suppressPackageStartupMessages(library(ggplot2))
|
3
|
-
suppressPackageStartupMessages(library(gridExtra))
|
4
|
-
|
5
|
-
# input
|
6
|
-
# -----
|
7
|
-
file_input <- function(fileName, coltypes = NA) {
|
8
|
-
read.table(fileName, header = T, sep = ',', colClasses = coltypes)
|
9
|
-
}
|
10
|
-
|
11
|
-
join_file_input <- function(namesstr, key) {
|
12
|
-
files <- strsplit(namesstr, split = '\\+')[[1]]
|
13
|
-
dataList <- lapply(files, (function(f) fileInput(f)))
|
14
|
-
do.call(merge, c(dataList, by = key))
|
15
|
-
}
|
16
|
-
|
17
|
-
init_table_input <- function(conn_args, schema) {
|
18
|
-
function(name, cols = '*', where = 'true') {
|
19
|
-
colstr <- paste(cols, collapse = ',')
|
20
|
-
|
21
|
-
sql <- sprintf("SELECT %s FROM %s WHERE %s", colstr, name, where)
|
22
|
-
init_sql_input(conn_args, schema)(sql)
|
23
|
-
}
|
24
|
-
}
|
25
|
-
|
26
|
-
init_sql_input <- function(conn_args, schema) {
|
27
|
-
function(sql) {
|
28
|
-
conn <- do.call(dbConnect, c(dbDriver('PostgreSQL'), conn_args))
|
29
|
-
on.exit(dbDisconnect(conn))
|
30
|
-
dbGetQuery(conn, paste('SET search_path TO', paste(c(schema, 'public'), sep = ',')))
|
31
|
-
buffer <- dbSendQuery(conn, sql)
|
32
|
-
fetch(buffer, n=-1)
|
33
|
-
}
|
34
|
-
}
|
35
|
-
|
36
|
-
create_parents <- function(output) {
|
37
|
-
dir.create(dirname(output), showWarnings = FALSE, recursive = TRUE)
|
38
|
-
}
|
39
|
-
|
40
|
-
# Output
|
41
|
-
# ------
|
42
|
-
# TODO consider deprecating this, perhaps more specific ones like graph_output is still useful
|
43
|
-
auto_output <- function(report, ...) {
|
44
|
-
actual <- switch(class(report)[1],
|
45
|
-
gg = ggplot_output,
|
46
|
-
data.frame = csv_output,
|
47
|
-
print)
|
48
|
-
actual(report, ...)
|
49
|
-
}
|
50
|
-
|
51
|
-
rplot_output <- function(f, output, size = c(8, 8)) {
|
52
|
-
create_parents(output)
|
53
|
-
size <- ceiling(size / 2.54 * 300 / 96) # convert cm to inch
|
54
|
-
par(mar = c(0,0,0,0))
|
55
|
-
# use pdf since png can be very weird, e.g. radarplot
|
56
|
-
pdf(output, width = size[1], height = size[2], pointsize = 12)
|
57
|
-
f()
|
58
|
-
dev.off()
|
59
|
-
}
|
60
|
-
|
61
|
-
# use facet or latex subfigure, do not handle list anymore
|
62
|
-
ggplot_output <- function(report, output, size = c(15, 15), fontScale = 1, compact = F) {
|
63
|
-
create_parents(output)
|
64
|
-
if (compact) {
|
65
|
-
margin <- 1
|
66
|
-
} else {
|
67
|
-
margin <- 5
|
68
|
-
}
|
69
|
-
|
70
|
-
report <- report + theme_bw() +
|
71
|
-
theme(axis.title = element_text(size = 13 * fontScale),
|
72
|
-
axis.text = element_text(size = 11 * fontScale),
|
73
|
-
legend.title = element_text(size = 13 * fontScale),
|
74
|
-
legend.text = element_text(size = 11 * fontScale),
|
75
|
-
legend.key.size = unit(13 * fontScale, 'pt'),
|
76
|
-
legend.margin = unit(0, "cm"),
|
77
|
-
legend.background = element_rect(fill = alpha('white', 0)),
|
78
|
-
plot.margin = unit(c(margin, margin, 0, 0),"mm"))
|
79
|
-
|
80
|
-
width <- size[1]
|
81
|
-
height <- size[2]
|
82
|
-
|
83
|
-
ggsave(report, filename = output,
|
84
|
-
dpi = 300, units = 'cm', width = width, height = height, limitsize = FALSE)
|
85
|
-
}
|
86
|
-
|
87
|
-
csv_output <- function(report, output) {
|
88
|
-
create_parents(output)
|
89
|
-
write.table(format(report, digits = 4), file = toString(output), row.names = FALSE,
|
90
|
-
sep = ',', quote = FALSE)
|
91
|
-
}
|
92
|
-
|
93
|
-
txt_output <- function(report, output) {
|
94
|
-
create_parents(output)
|
95
|
-
write.table(format(report, digits = 4), file = toString(output), row.names = FALSE,
|
96
|
-
sep = ',', quote = FALSE)
|
97
|
-
}
|
98
|
-
|
99
|
-
init_table_output <- function(conn_args, schema) {
|
100
|
-
function(report, output, placeholder = NULL) {
|
101
|
-
conn <- do.call(dbConnect, c(dbDriver('PostgreSQL'), conn_args))
|
102
|
-
on.exit(dbDisconnect(conn))
|
103
|
-
dbGetQuery(conn, paste('SET search_path TO', paste(c(schema, 'public'), sep = ',')))
|
104
|
-
|
105
|
-
if (dbExistsTable(conn, output)) {
|
106
|
-
dbRemoveTable(conn, output)
|
107
|
-
}
|
108
|
-
dbWriteTable(conn, output, report, row.names = F)
|
109
|
-
if (!is.null(placeholder)) {
|
110
|
-
write.table(data.frame(), file = placeholder, col.names = FALSE)
|
111
|
-
}
|
112
|
-
}
|
113
|
-
}
|