raka 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +261 -0
- data/VERSION +1 -0
- data/lib/compile.rb +173 -0
- data/lib/interface.rbs +23 -0
- data/lib/lang/psql/impl.rb +59 -0
- data/lib/lang/python/impl.rb +32 -0
- data/lib/lang/r/impl.rb +38 -0
- data/lib/lang/r/io.R +113 -0
- data/lib/lang/shell/impl.rb +17 -0
- data/lib/output_type.rb +36 -0
- data/lib/protocol.rb +122 -0
- data/lib/raka.rb +70 -0
- data/lib/temp.json +9167 -0
- data/lib/token.rb +165 -0
- metadata +158 -0
data/lib/lang/r/io.R
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
suppressPackageStartupMessages(library(RPostgreSQL))
|
2
|
+
suppressPackageStartupMessages(library(ggplot2))
|
3
|
+
suppressPackageStartupMessages(library(gridExtra))
|
4
|
+
|
5
|
+
# input
|
6
|
+
# -----
|
7
|
+
file_input <- function(fileName, coltypes = NA) {
|
8
|
+
read.table(fileName, header = T, sep = ',', colClasses = coltypes)
|
9
|
+
}
|
10
|
+
|
11
|
+
join_file_input <- function(namesstr, key) {
|
12
|
+
files <- strsplit(namesstr, split = '\\+')[[1]]
|
13
|
+
dataList <- lapply(files, (function(f) fileInput(f)))
|
14
|
+
do.call(merge, c(dataList, by = key))
|
15
|
+
}
|
16
|
+
|
17
|
+
init_table_input <- function(conn_args, schema) {
|
18
|
+
function(name, cols = '*', where = 'true') {
|
19
|
+
colstr <- paste(cols, collapse = ',')
|
20
|
+
|
21
|
+
sql <- sprintf("SELECT %s FROM %s WHERE %s", colstr, name, where)
|
22
|
+
init_sql_input(conn_args, schema)(sql)
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
init_sql_input <- function(conn_args, schema) {
|
27
|
+
function(sql) {
|
28
|
+
conn <- do.call(dbConnect, c(dbDriver('PostgreSQL'), conn_args))
|
29
|
+
on.exit(dbDisconnect(conn))
|
30
|
+
dbGetQuery(conn, paste('SET search_path TO', paste(c(schema, 'public'), sep = ',')))
|
31
|
+
buffer <- dbSendQuery(conn, sql)
|
32
|
+
fetch(buffer, n=-1)
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
create_parents <- function(output) {
|
37
|
+
dir.create(dirname(output), showWarnings = FALSE, recursive = TRUE)
|
38
|
+
}
|
39
|
+
|
40
|
+
# Output
|
41
|
+
# ------
|
42
|
+
# TODO consider deprecating this, perhaps more specific ones like graph_output is still useful
|
43
|
+
auto_output <- function(report, ...) {
|
44
|
+
actual <- switch(class(report)[1],
|
45
|
+
gg = ggplot_output,
|
46
|
+
data.frame = csv_output,
|
47
|
+
print)
|
48
|
+
actual(report, ...)
|
49
|
+
}
|
50
|
+
|
51
|
+
rplot_output <- function(f, output, size = c(8, 8)) {
|
52
|
+
create_parents(output)
|
53
|
+
size <- ceiling(size / 2.54 * 300 / 96) # convert cm to inch
|
54
|
+
par(mar = c(0,0,0,0))
|
55
|
+
# use pdf since png can be very weird, e.g. radarplot
|
56
|
+
pdf(output, width = size[1], height = size[2], pointsize = 12)
|
57
|
+
f()
|
58
|
+
dev.off()
|
59
|
+
}
|
60
|
+
|
61
|
+
# use facet or latex subfigure, do not handle list anymore
|
62
|
+
ggplot_output <- function(report, output, size = c(15, 15), fontScale = 1, compact = F) {
|
63
|
+
create_parents(output)
|
64
|
+
if (compact) {
|
65
|
+
margin <- 1
|
66
|
+
} else {
|
67
|
+
margin <- 5
|
68
|
+
}
|
69
|
+
|
70
|
+
report <- report + theme_bw() +
|
71
|
+
theme(axis.title = element_text(size = 13 * fontScale),
|
72
|
+
axis.text = element_text(size = 11 * fontScale),
|
73
|
+
legend.title = element_text(size = 13 * fontScale),
|
74
|
+
legend.text = element_text(size = 11 * fontScale),
|
75
|
+
legend.key.size = unit(13 * fontScale, 'pt'),
|
76
|
+
legend.margin = unit(0, "cm"),
|
77
|
+
legend.background = element_rect(fill = alpha('white', 0)),
|
78
|
+
plot.margin = unit(c(margin, margin, 0, 0),"mm"))
|
79
|
+
|
80
|
+
width <- size[1]
|
81
|
+
height <- size[2]
|
82
|
+
|
83
|
+
ggsave(report, filename = output,
|
84
|
+
dpi = 300, units = 'cm', width = width, height = height, limitsize = FALSE)
|
85
|
+
}
|
86
|
+
|
87
|
+
csv_output <- function(report, output) {
|
88
|
+
create_parents(output)
|
89
|
+
write.table(format(report, digits = 4), file = toString(output), row.names = FALSE,
|
90
|
+
sep = ',', quote = FALSE)
|
91
|
+
}
|
92
|
+
|
93
|
+
txt_output <- function(report, output) {
|
94
|
+
create_parents(output)
|
95
|
+
write.table(format(report, digits = 4), file = toString(output), row.names = FALSE,
|
96
|
+
sep = ',', quote = FALSE)
|
97
|
+
}
|
98
|
+
|
99
|
+
init_table_output <- function(conn_args, schema) {
|
100
|
+
function(report, output, placeholder = NULL) {
|
101
|
+
conn <- do.call(dbConnect, c(dbDriver('PostgreSQL'), conn_args))
|
102
|
+
on.exit(dbDisconnect(conn))
|
103
|
+
dbGetQuery(conn, paste('SET search_path TO', paste(c(schema, 'public'), sep = ',')))
|
104
|
+
|
105
|
+
if (dbExistsTable(conn, output)) {
|
106
|
+
dbRemoveTable(conn, output)
|
107
|
+
}
|
108
|
+
dbWriteTable(conn, output, report, row.names = F)
|
109
|
+
if (!is.null(placeholder)) {
|
110
|
+
write.table(data.frame(), file = placeholder, col.names = FALSE)
|
111
|
+
}
|
112
|
+
}
|
113
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../protocol'
|
4
|
+
|
5
|
+
# shell(bash) protocol
|
6
|
+
class Shell
|
7
|
+
# @implements LanguageImpl
|
8
|
+
def build(code, _)
|
9
|
+
['set -e', code].join "\n"
|
10
|
+
end
|
11
|
+
|
12
|
+
def run_script(env, fname, _)
|
13
|
+
run_cmd(env, "bash #{fname}")
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
creator :shell, Shell
|
data/lib/output_type.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
class OutputType
|
2
|
+
class Default
|
3
|
+
# File name is for checking the existence of outputs
|
4
|
+
def file_name(rule_name, scope = '')
|
5
|
+
scope.to_s + (scope.empty? ? '' : '/') + rule_name
|
6
|
+
end
|
7
|
+
|
8
|
+
# Real name is for further processing, like schema.table_name in database
|
9
|
+
# defaultly the file is the output, but when the file is merely a placeholder
|
10
|
+
# the two will differ
|
11
|
+
def real_name(rule_name, scope = '')
|
12
|
+
file_name
|
13
|
+
end
|
14
|
+
|
15
|
+
def ext
|
16
|
+
res = self.class.to_s
|
17
|
+
res[0] = res[0].downcase
|
18
|
+
res
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class Table < Default
|
23
|
+
def real_name(file_name, scope = '')
|
24
|
+
stem = file_name.gsub(/\.table$/, '')
|
25
|
+
scope.to_s + (scope.empty? ? '' : '.') + stem
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def create(sym)
|
30
|
+
self.class.const_get(sym.capitalize).new || Default.new
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.parse_option(opt)
|
34
|
+
opt.map { |obj| obj.class == Symbol ? create(obj) : obj }
|
35
|
+
end
|
36
|
+
end
|
data/lib/protocol.rb
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'securerandom'
|
4
|
+
require 'open3'
|
5
|
+
|
6
|
+
def remove_common_indent(code)
|
7
|
+
code.gsub(/^#{code.scan(/^[ \t]+(?=\S)/).min}/, '')
|
8
|
+
end
|
9
|
+
|
10
|
+
def create_tmp(content)
|
11
|
+
tmpfile = "/tmp/#{SecureRandom.uuid}"
|
12
|
+
|
13
|
+
File.open(tmpfile, 'w') do |f|
|
14
|
+
f.write content
|
15
|
+
end
|
16
|
+
|
17
|
+
tmpfile
|
18
|
+
end
|
19
|
+
|
20
|
+
# protocol conforms the interface:
|
21
|
+
#
|
22
|
+
# call(env, task) resolve
|
23
|
+
#
|
24
|
+
# call :: rake's main -> dsl task(see compiler) -> void
|
25
|
+
# resolve :: str -> str
|
26
|
+
|
27
|
+
# There are two methods to provide code to a language protocol, either a string literal
|
28
|
+
# OR a ruby block. Cannot choose both.
|
29
|
+
class LanguageProtocol
|
30
|
+
attr_writer :block
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def wrap_template(code)
|
35
|
+
@script_template.gsub(/^\<code\>$/, code)
|
36
|
+
end
|
37
|
+
|
38
|
+
public
|
39
|
+
|
40
|
+
def initialize(language_impl, script_template: '<code>')
|
41
|
+
# contextual variables, will be passed later
|
42
|
+
@impl = language_impl
|
43
|
+
@script_template = script_template
|
44
|
+
@block = nil
|
45
|
+
@text = nil
|
46
|
+
end
|
47
|
+
|
48
|
+
# for syntax sugar like shell* <code text>
|
49
|
+
def *(text)
|
50
|
+
@text = text
|
51
|
+
[self]
|
52
|
+
end
|
53
|
+
|
54
|
+
# a block::str -> str should be given to resolve the bindings in code text
|
55
|
+
def call(env, task)
|
56
|
+
code = yield @text if @text
|
57
|
+
code = @block.call(task) if @block # do not resolve
|
58
|
+
|
59
|
+
env.logger.debug code
|
60
|
+
script_text = @impl.build(wrap_template(remove_common_indent(code)), task)
|
61
|
+
temp_script = create_tmp(script_text)
|
62
|
+
@impl.run_script env, temp_script, task
|
63
|
+
env.logger.debug script_text
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# A special language protocol, just a wrapper for action, pass block instead of
|
68
|
+
# string to execute
|
69
|
+
# named RubyP to avoid name collision
|
70
|
+
class RubyP
|
71
|
+
def initialize(&block)
|
72
|
+
@block = block
|
73
|
+
end
|
74
|
+
|
75
|
+
def call(_, task, &resolve)
|
76
|
+
@block.call(task, &resolve)
|
77
|
+
FileUtils.touch(task.name)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# use run instead of "ruby" to avoid name collision
|
82
|
+
def run(&block)
|
83
|
+
[RubyP.new(&block)]
|
84
|
+
end
|
85
|
+
|
86
|
+
# helper functions to implement LanguageImpl
|
87
|
+
def run_cmd(env, cmd)
|
88
|
+
env.logger.debug(cmd)
|
89
|
+
Open3.popen3(cmd) do |_stdin, stdout, stderr, _thread|
|
90
|
+
env.logger.debug(stdout.read)
|
91
|
+
err = stderr.read
|
92
|
+
env.logger.info(err) unless err.empty?
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def pick_kwargs(klass, kwargs)
|
97
|
+
param_ref = klass.instance_method(:initialize).parameters
|
98
|
+
.select { |arg| arg.size == 2 && (arg[0] == :key || arg[0] == :keyreq) }
|
99
|
+
.map { |arg| arg[1] }
|
100
|
+
kwargs.select do |key, _value|
|
101
|
+
param_ref.include? key
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def creator(name, klass, global_defaults = {})
|
106
|
+
global_config = global_defaults
|
107
|
+
define_singleton_method name do |*args, **kwargs, &block|
|
108
|
+
# pick keyword arguments for klass
|
109
|
+
kwargs = global_config.merge kwargs
|
110
|
+
impl = klass.new(*args, **pick_kwargs(klass, kwargs))
|
111
|
+
proto = LanguageProtocol.new(impl, **pick_kwargs(LanguageProtocol, kwargs))
|
112
|
+
if block
|
113
|
+
proto.block = block
|
114
|
+
[proto]
|
115
|
+
else
|
116
|
+
proto.define_singleton_method :config do |**config|
|
117
|
+
global_config = global_defaults.merge config
|
118
|
+
end
|
119
|
+
proto # if no block, allow configure or waiting for * to add code text
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
data/lib/raka.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
require_relative './compile'
|
6
|
+
require_relative './protocol'
|
7
|
+
require_relative './token'
|
8
|
+
|
9
|
+
# initialize raka
|
10
|
+
class Raka
|
11
|
+
Pattern = Pattern
|
12
|
+
attr_reader :logger
|
13
|
+
|
14
|
+
def create_logger(level)
|
15
|
+
@env.define_singleton_method :logger do
|
16
|
+
logger = Logger.new(STDOUT)
|
17
|
+
logger.level = level
|
18
|
+
logger
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def define_token_creator(ext, ext_alias = nil)
|
23
|
+
# closures
|
24
|
+
env = @env
|
25
|
+
options = @options
|
26
|
+
scopes = @scopes
|
27
|
+
@env.define_singleton_method(ext_alias || ext) do |*args|
|
28
|
+
# Here the compiler are bound with @options so that when we change @options
|
29
|
+
# using methods like scope in Rakefile, the subsequent rules defined will honor
|
30
|
+
# the new settings
|
31
|
+
# clone to fix the scopes when defining rule
|
32
|
+
inline_scope_pattern = !args.empty? ? args[0] : nil
|
33
|
+
Token.new(
|
34
|
+
DSLCompiler.new(env, options), Context.new(ext, scopes.clone),
|
35
|
+
[], inline_scope_pattern
|
36
|
+
)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def initialize(env, options)
|
41
|
+
@env = env
|
42
|
+
defaults = {
|
43
|
+
output_types: [:csv], input_types: [],
|
44
|
+
type_aliases: {},
|
45
|
+
scopes: [],
|
46
|
+
lang: ['lang/shell'],
|
47
|
+
user_lang: []
|
48
|
+
}
|
49
|
+
@options = options = OpenStruct.new(defaults.merge(options))
|
50
|
+
|
51
|
+
create_logger options.log_level || (ENV['LOG_LEVEL'] || Logger::INFO).to_i
|
52
|
+
|
53
|
+
@options.input_types |= @options.output_types # any output can be used as intermediate
|
54
|
+
# specify root of scopes in options, scopes will append to each root
|
55
|
+
@scopes = options.scopes.empty? ? [] : [options.scopes]
|
56
|
+
@options.lang.each { |path| load File::join(File::dirname(__FILE__), "#{path}/impl.rb") }
|
57
|
+
@options.user_lang.each { |path| load path.to_s + '.rb' }
|
58
|
+
|
59
|
+
# These are where the dsl starts
|
60
|
+
@options.output_types.each do |ext|
|
61
|
+
define_token_creator(ext, @options.type_aliases[ext])
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def scope(*names, &block)
|
66
|
+
@scopes.push(names)
|
67
|
+
block.call
|
68
|
+
@scopes.pop
|
69
|
+
end
|
70
|
+
end
|