raka 0.3.18 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/raka/compile.rb +0 -2
- data/lib/raka/lang/duckdb/impl.rb +65 -0
- data/lib/raka/lang/psql/impl.rb +6 -6
- data/lib/raka.rb +1 -0
- metadata +27 -30
- data/lib/raka/interface.rbs +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6a91b1fab962b4bd99bd333aacc88b52267750458e9430584668f13389aec117
|
4
|
+
data.tar.gz: a58613eddc635b0440e6cb0e061311abc137e7b893dd87b5c66f8f97c6f952c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf28cfc8c9e3c3be6f4a1d12edb525f8353dd225efd64e7e6d5904ed068173f036af496bcf13a3d5febb731249e3e57c1bee87eea1994a00b34334efe8a5a5c2
|
7
|
+
data.tar.gz: bdd4d58a420a46cdb4fd23a714ea60c14b11c7a0fe8b5bb6cfc343f860208e60ae163d28abab6839cf7abb2548c4c7a849d513026d9c4e091167e2d102f99161
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.1
|
data/lib/raka/compile.rb
CHANGED
@@ -34,7 +34,6 @@ class DSLCompiler
|
|
34
34
|
name: name,
|
35
35
|
deps: deps,
|
36
36
|
deps_str: deps.join(','),
|
37
|
-
deps_stem_str: (deps.map { |d| stem(d) }).join(','),
|
38
37
|
dep_scopes: deps.map { |d| File.dirname(d) },
|
39
38
|
input: deps.first || '',
|
40
39
|
task: task
|
@@ -78,7 +77,6 @@ class DSLCompiler
|
|
78
77
|
.gsub('$^', task.deps_str)
|
79
78
|
.gsub('$<', task.input || '')
|
80
79
|
.gsub('$(deps)', task.deps_str)
|
81
|
-
.gsub('$(deps_stem)', task.deps_stem_str)
|
82
80
|
.gsub('$(input)', task.input || '')
|
83
81
|
|
84
82
|
protect_percent_symbol text do |safe_text|
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../protocol'
|
4
|
+
|
5
|
+
def bash(env, cmd)
|
6
|
+
code = remove_common_indent(
|
7
|
+
%(set -e
|
8
|
+
set -o pipefail
|
9
|
+
|
10
|
+
#{cmd}
|
11
|
+
)
|
12
|
+
)
|
13
|
+
env.send :sh, 'bash ' + create_tmp(code)
|
14
|
+
end
|
15
|
+
|
16
|
+
# DuckDB protocol with two modes:
|
17
|
+
# 1. Persistent mode: operations on .db file with CREATE TABLE
|
18
|
+
# 2. Ad-hoc mode: parquet in/out using COPY operations
|
19
|
+
class Duckdb
|
20
|
+
def initialize(database: nil, params: {})
|
21
|
+
@params = params
|
22
|
+
@database = database
|
23
|
+
@mode = @database ? :persistent : :adhoc
|
24
|
+
end
|
25
|
+
|
26
|
+
def duckdb_cmd
|
27
|
+
case @mode
|
28
|
+
when :persistent
|
29
|
+
"duckdb #{@database}"
|
30
|
+
when :adhoc
|
31
|
+
'duckdb'
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def build(code, _task)
|
36
|
+
# Replace parameter placeholders
|
37
|
+
processed_code = code
|
38
|
+
(@params || {}).each do |key, value|
|
39
|
+
processed_code = processed_code.gsub("$#{key}", "'#{value}'")
|
40
|
+
end
|
41
|
+
|
42
|
+
case @mode
|
43
|
+
when :persistent
|
44
|
+
"DROP TABLE IF EXISTS :_name_; CREATE TABLE :_name_ AS (#{processed_code});"
|
45
|
+
when :adhoc
|
46
|
+
"COPY (#{processed_code}) TO ':output:' (FORMAT PARQUET);"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def run_script(env, fname, task)
|
51
|
+
case @mode
|
52
|
+
when :persistent
|
53
|
+
bash env, %(
|
54
|
+
#{duckdb_cmd} -c "$(cat #{fname} | sed 's|:_name_|#{task.output_stem}|g')" | tee #{fname}.log
|
55
|
+
echo "#{@database}" > #{task.name}
|
56
|
+
)
|
57
|
+
when :adhoc
|
58
|
+
bash env, %(
|
59
|
+
cat #{fname} | sed 's|:output:|#{task.name}|g' | #{duckdb_cmd} | tee #{fname}.log
|
60
|
+
)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
creator :duckdb, Duckdb
|
data/lib/raka/lang/psql/impl.rb
CHANGED
@@ -24,9 +24,10 @@ class Psql
|
|
24
24
|
end
|
25
25
|
|
26
26
|
# 1. do not add required argument here, so psql.config will work or we can only use psql(conn: xxx).config
|
27
|
-
def initialize(conn: nil, create: 'mview', params: {})
|
27
|
+
def initialize(conn: nil, create: 'mview', schema: '', params: {})
|
28
28
|
@create = create
|
29
29
|
@params = params
|
30
|
+
@schema = schema
|
30
31
|
@conn = conn
|
31
32
|
end
|
32
33
|
|
@@ -36,10 +37,10 @@ class Psql
|
|
36
37
|
|
37
38
|
if @create.to_s == 'table'
|
38
39
|
'DROP TABLE IF EXISTS :_schema_:_name_;' \
|
39
|
-
'CREATE TABLE :
|
40
|
+
'CREATE TABLE :schema:_name_ AS (' + code + ');'
|
40
41
|
elsif @create.to_s == 'mview'
|
41
42
|
'DROP MATERIALIZED VIEW IF EXISTS :_schema_:_name_;' \
|
42
|
-
'CREATE MATERIALIZED VIEW :
|
43
|
+
'CREATE MATERIALIZED VIEW :schema:_name_ AS (' + code + ');'
|
43
44
|
else
|
44
45
|
code
|
45
46
|
end
|
@@ -47,12 +48,11 @@ class Psql
|
|
47
48
|
|
48
49
|
def run_script(env, fname, task)
|
49
50
|
param_str = (@params || {}).map { |k, v| "-v #{k}=\"#{v}\"" }.join(' ')
|
50
|
-
schema = task.rule_scopes.join('__')
|
51
|
-
out_schema = (task.rule_scopes + (task.target_scope.nil? ? [] : [task.target_scope])).join('__')
|
51
|
+
schema = @schema.empty? ? task.rule_scopes.join('__') : @schema
|
52
52
|
|
53
53
|
bash env, %(
|
54
54
|
#{sh_cmd(schema)} #{param_str} -v _name_=#{task.output_stem} \
|
55
|
-
-v _schema_=#{
|
55
|
+
-v _schema_=#{schema.empty? ? '' : schema + '.'} -f #{fname} | tee #{fname}.log
|
56
56
|
mv #{fname}.log #{task.name}
|
57
57
|
)
|
58
58
|
end
|
data/lib/raka.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: raka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yarray
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: rake
|
@@ -16,98 +15,98 @@ dependencies:
|
|
16
15
|
requirements:
|
17
16
|
- - "~>"
|
18
17
|
- !ruby/object:Gem::Version
|
19
|
-
version: 13.
|
18
|
+
version: 13.3.0
|
20
19
|
type: :runtime
|
21
20
|
prerelease: false
|
22
21
|
version_requirements: !ruby/object:Gem::Requirement
|
23
22
|
requirements:
|
24
23
|
- - "~>"
|
25
24
|
- !ruby/object:Gem::Version
|
26
|
-
version: 13.
|
25
|
+
version: 13.3.0
|
27
26
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
27
|
+
name: bundler
|
29
28
|
requirement: !ruby/object:Gem::Requirement
|
30
29
|
requirements:
|
31
30
|
- - ">="
|
32
31
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
32
|
+
version: 2.1.0
|
34
33
|
type: :development
|
35
34
|
prerelease: false
|
36
35
|
version_requirements: !ruby/object:Gem::Requirement
|
37
36
|
requirements:
|
38
37
|
- - ">="
|
39
38
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
39
|
+
version: 2.1.0
|
41
40
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
41
|
+
name: juwelier
|
43
42
|
requirement: !ruby/object:Gem::Requirement
|
44
43
|
requirements:
|
45
|
-
- - "
|
44
|
+
- - "~>"
|
46
45
|
- !ruby/object:Gem::Version
|
47
|
-
version: 2.
|
46
|
+
version: 2.4.0
|
48
47
|
type: :development
|
49
48
|
prerelease: false
|
50
49
|
version_requirements: !ruby/object:Gem::Requirement
|
51
50
|
requirements:
|
52
|
-
- - "
|
51
|
+
- - "~>"
|
53
52
|
- !ruby/object:Gem::Version
|
54
|
-
version: 2.
|
53
|
+
version: 2.4.0
|
55
54
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
55
|
+
name: rdoc
|
57
56
|
requirement: !ruby/object:Gem::Requirement
|
58
57
|
requirements:
|
59
|
-
- - "
|
58
|
+
- - ">="
|
60
59
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
60
|
+
version: 6.3.1
|
62
61
|
type: :development
|
63
62
|
prerelease: false
|
64
63
|
version_requirements: !ruby/object:Gem::Requirement
|
65
64
|
requirements:
|
66
|
-
- - "
|
65
|
+
- - ">="
|
67
66
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
67
|
+
version: 6.3.1
|
69
68
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
69
|
+
name: reek
|
71
70
|
requirement: !ruby/object:Gem::Requirement
|
72
71
|
requirements:
|
73
72
|
- - "~>"
|
74
73
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
74
|
+
version: '6.0'
|
76
75
|
type: :development
|
77
76
|
prerelease: false
|
78
77
|
version_requirements: !ruby/object:Gem::Requirement
|
79
78
|
requirements:
|
80
79
|
- - "~>"
|
81
80
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
81
|
+
version: '6.0'
|
83
82
|
- !ruby/object:Gem::Dependency
|
84
83
|
name: rubocop
|
85
84
|
requirement: !ruby/object:Gem::Requirement
|
86
85
|
requirements:
|
87
86
|
- - "~>"
|
88
87
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
88
|
+
version: 1.79.0
|
90
89
|
type: :development
|
91
90
|
prerelease: false
|
92
91
|
version_requirements: !ruby/object:Gem::Requirement
|
93
92
|
requirements:
|
94
93
|
- - "~>"
|
95
94
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
95
|
+
version: 1.79.0
|
97
96
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
97
|
+
name: test-unit
|
99
98
|
requirement: !ruby/object:Gem::Requirement
|
100
99
|
requirements:
|
101
100
|
- - "~>"
|
102
101
|
- !ruby/object:Gem::Version
|
103
|
-
version:
|
102
|
+
version: 3.7.0
|
104
103
|
type: :development
|
105
104
|
prerelease: false
|
106
105
|
version_requirements: !ruby/object:Gem::Requirement
|
107
106
|
requirements:
|
108
107
|
- - "~>"
|
109
108
|
- !ruby/object:Gem::Version
|
110
|
-
version:
|
109
|
+
version: 3.7.0
|
111
110
|
description: An extensible, concise and light weight DSL on Rake to automate data
|
112
111
|
processing tasks
|
113
112
|
email: '08to09@gmail.com'
|
@@ -124,7 +123,7 @@ files:
|
|
124
123
|
- bin/raka
|
125
124
|
- lib/raka.rb
|
126
125
|
- lib/raka/compile.rb
|
127
|
-
- lib/raka/
|
126
|
+
- lib/raka/lang/duckdb/impl.rb
|
128
127
|
- lib/raka/lang/psql/impl.rb
|
129
128
|
- lib/raka/lang/python/impl.rb
|
130
129
|
- lib/raka/lang/r/impl.rb
|
@@ -136,7 +135,6 @@ homepage: http://github.com/yarray/raka
|
|
136
135
|
licenses:
|
137
136
|
- MIT
|
138
137
|
metadata: {}
|
139
|
-
post_install_message:
|
140
138
|
rdoc_options: []
|
141
139
|
require_paths:
|
142
140
|
- lib
|
@@ -151,8 +149,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
151
149
|
- !ruby/object:Gem::Version
|
152
150
|
version: '0'
|
153
151
|
requirements: []
|
154
|
-
rubygems_version: 3.
|
155
|
-
signing_key:
|
152
|
+
rubygems_version: 3.6.9
|
156
153
|
specification_version: 4
|
157
154
|
summary: Rake for data
|
158
155
|
test_files: []
|
data/lib/raka/interface.rbs
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
class RakaTask
|
2
|
-
attr_reader name: String
|
3
|
-
attr_reader stem: String
|
4
|
-
attr_reader func: String?
|
5
|
-
attr_reader input_stem: String?
|
6
|
-
attr_reader scope: String?
|
7
|
-
attr_reader target_scope: String?
|
8
|
-
attr_reader scopes: Array[String]
|
9
|
-
attr_reader target_scope_captures: Array[String]
|
10
|
-
attr_reader captures: Hash[String, String]
|
11
|
-
attr_reader deps: Array[String]
|
12
|
-
attr_reader deps_str: String
|
13
|
-
attr_reader input: String
|
14
|
-
attr_reader task: Object # RakeTask
|
15
|
-
end
|
16
|
-
|
17
|
-
class RakaEnv
|
18
|
-
end
|
19
|
-
|
20
|
-
class LanguageImpl
|
21
|
-
def build: (String code, RakaTask task) -> String
|
22
|
-
def run_script: (RakaEnv env, String fname, RakaTask task) -> nil
|
23
|
-
end
|