raka 0.3.18 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3f74a12c7ba0a07e2dd9b3f07fe4496b3829295151bfe29f5bdf6a88ae8a0ce9
4
- data.tar.gz: 9f0d169ba48237872b1564059abb2906c8d2960209d9f6972ce7985d0c9f2e8e
3
+ metadata.gz: 788df7b5953f68a917e57a8508b51aa89e1ca404b91480bcbd132b3cd7666905
4
+ data.tar.gz: 00123c1ab2294b5e274942e65ec507561834b259b45e4ad636e3ffe4f269e996
5
5
  SHA512:
6
- metadata.gz: 7d301a5e179ff2ca7f5182fe8b94a971c897b7893ea1fb3c12b77cfc81846ce310e5c9dd80b6407517b72a756a18ddbb409c73619c7358b9c71166655364cbba
7
- data.tar.gz: 95b61083e26783b07a63f3cd3b61e8088379cabf338e00ddeb8287d13fa54964bf7faa5fae8b3010e622b138f13bfc1a06f9fa93c69e417dc2614abe2f81ce2a
6
+ metadata.gz: 68cac774b3aa673896b68201e6e3667075cf9fc53d943c9f48aac130c5bb801903ce15f5e68523d46c53dc98c2d9df1b23210f84a0e7c628653c40861418d2cb
7
+ data.tar.gz: 3216c9889ca95b84814ca69c85f42c1f972c0e3093baae37338b09c9fce325064df5ed60d2709334e0250e0eabdd357cbf2cf23088c2dadeaed50fe2ea416369
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.18
1
+ 0.4.0
data/lib/raka/compile.rb CHANGED
@@ -34,7 +34,6 @@ class DSLCompiler
34
34
  name: name,
35
35
  deps: deps,
36
36
  deps_str: deps.join(','),
37
- deps_stem_str: (deps.map { |d| stem(d) }).join(','),
38
37
  dep_scopes: deps.map { |d| File.dirname(d) },
39
38
  input: deps.first || '',
40
39
  task: task
@@ -78,7 +77,6 @@ class DSLCompiler
78
77
  .gsub('$^', task.deps_str)
79
78
  .gsub('$<', task.input || '')
80
79
  .gsub('$(deps)', task.deps_str)
81
- .gsub('$(deps_stem)', task.deps_stem_str)
82
80
  .gsub('$(input)', task.input || '')
83
81
 
84
82
  protect_percent_symbol text do |safe_text|
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../protocol'
4
+
5
+ def bash(env, cmd)
6
+ code = remove_common_indent(
7
+ %(set -e
8
+ set -o pipefail
9
+
10
+ #{cmd}
11
+ )
12
+ )
13
+ env.send :sh, 'bash ' + create_tmp(code)
14
+ end
15
+
16
+ # DuckDB protocol with two modes:
17
+ # 1. Persistent mode: operations on .db file with CREATE TABLE
18
+ # 2. Ad-hoc mode: parquet in/out using COPY operations
19
+ class Duckdb
20
+ def initialize(database: nil, params: {})
21
+ @params = params
22
+ @database = database
23
+ @mode = @database ? :persistent : :adhoc
24
+ end
25
+
26
+ def duckdb_cmd
27
+ case @mode
28
+ when :persistent
29
+ "duckdb #{@database}"
30
+ when :adhoc
31
+ 'duckdb'
32
+ end
33
+ end
34
+
35
+ def build(code, _task)
36
+ # Replace parameter placeholders
37
+ processed_code = code
38
+ (@params || {}).each do |key, value|
39
+ processed_code = processed_code.gsub("$#{key}", "'#{value}'")
40
+ end
41
+
42
+ case @mode
43
+ when :persistent
44
+ "DROP TABLE IF EXISTS :_name_; CREATE TABLE :_name_ AS (#{processed_code});"
45
+ when :adhoc
46
+ "COPY (#{processed_code}) TO ':output:' (FORMAT PARQUET);"
47
+ end
48
+ end
49
+
50
+ def run_script(env, fname, task)
51
+ case @mode
52
+ when :persistent
53
+ bash env, %(
54
+ #{duckdb_cmd} -c "$(cat #{fname} | sed 's|:_name_|#{task.output_stem}|g')" | tee #{fname}.log
55
+ echo "#{@database}" > #{task.name}
56
+ )
57
+ when :adhoc
58
+ bash env, %(
59
+ cat #{fname} | sed 's|:output:|#{task.name}|g' | #{duckdb_cmd} | tee #{fname}.log
60
+ )
61
+ end
62
+ end
63
+ end
64
+
65
+ creator :duckdb, Duckdb
@@ -24,9 +24,10 @@ class Psql
24
24
  end
25
25
 
26
26
  # 1. do not add required argument here, so psql.config will work or we can only use psql(conn: xxx).config
27
- def initialize(conn: nil, create: 'mview', params: {})
27
+ def initialize(conn: nil, create: 'mview', schema: '', params: {})
28
28
  @create = create
29
29
  @params = params
30
+ @schema = schema
30
31
  @conn = conn
31
32
  end
32
33
 
@@ -36,10 +37,10 @@ class Psql
36
37
 
37
38
  if @create.to_s == 'table'
38
39
  'DROP TABLE IF EXISTS :_schema_:_name_;' \
39
- 'CREATE TABLE :_schema_:_name_ AS (' + code + ');'
40
+ 'CREATE TABLE :schema:_name_ AS (' + code + ');'
40
41
  elsif @create.to_s == 'mview'
41
42
  'DROP MATERIALIZED VIEW IF EXISTS :_schema_:_name_;' \
42
- 'CREATE MATERIALIZED VIEW :_schema_:_name_ AS (' + code + ');'
43
+ 'CREATE MATERIALIZED VIEW :schema:_name_ AS (' + code + ');'
43
44
  else
44
45
  code
45
46
  end
@@ -47,12 +48,11 @@ class Psql
47
48
 
48
49
  def run_script(env, fname, task)
49
50
  param_str = (@params || {}).map { |k, v| "-v #{k}=\"#{v}\"" }.join(' ')
50
- schema = task.rule_scopes.join('__')
51
- out_schema = (task.rule_scopes + (task.target_scope.nil? ? [] : [task.target_scope])).join('__')
51
+ schema = @schema.empty? ? task.rule_scopes.join('__') : @schema
52
52
 
53
53
  bash env, %(
54
54
  #{sh_cmd(schema)} #{param_str} -v _name_=#{task.output_stem} \
55
- -v _schema_=#{out_schema.empty? ? '' : out_schema + '.'} -f #{fname} | tee #{fname}.log
55
+ -v _schema_=#{schema.empty? ? '' : schema + '.'} -f #{fname} | tee #{fname}.log
56
56
  mv #{fname}.log #{task.name}
57
57
  )
58
58
  end
data/lib/raka.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'logger'
4
+ require 'ostruct'
4
5
 
5
6
  require_relative './raka/compile'
6
7
  require_relative './raka/protocol'
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: raka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.18
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yarray
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2023-03-21 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: rake
@@ -16,98 +15,98 @@ dependencies:
16
15
  requirements:
17
16
  - - "~>"
18
17
  - !ruby/object:Gem::Version
19
- version: 13.0.0
18
+ version: 13.3.0
20
19
  type: :runtime
21
20
  prerelease: false
22
21
  version_requirements: !ruby/object:Gem::Requirement
23
22
  requirements:
24
23
  - - "~>"
25
24
  - !ruby/object:Gem::Version
26
- version: 13.0.0
25
+ version: 13.3.0
27
26
  - !ruby/object:Gem::Dependency
28
- name: rdoc
27
+ name: bundler
29
28
  requirement: !ruby/object:Gem::Requirement
30
29
  requirements:
31
30
  - - ">="
32
31
  - !ruby/object:Gem::Version
33
- version: 6.3.1
32
+ version: 2.1.0
34
33
  type: :development
35
34
  prerelease: false
36
35
  version_requirements: !ruby/object:Gem::Requirement
37
36
  requirements:
38
37
  - - ">="
39
38
  - !ruby/object:Gem::Version
40
- version: 6.3.1
39
+ version: 2.1.0
41
40
  - !ruby/object:Gem::Dependency
42
- name: bundler
41
+ name: juwelier
43
42
  requirement: !ruby/object:Gem::Requirement
44
43
  requirements:
45
- - - ">="
44
+ - - "~>"
46
45
  - !ruby/object:Gem::Version
47
- version: 2.1.0
46
+ version: 2.4.0
48
47
  type: :development
49
48
  prerelease: false
50
49
  version_requirements: !ruby/object:Gem::Requirement
51
50
  requirements:
52
- - - ">="
51
+ - - "~>"
53
52
  - !ruby/object:Gem::Version
54
- version: 2.1.0
53
+ version: 2.4.0
55
54
  - !ruby/object:Gem::Dependency
56
- name: juwelier
55
+ name: rdoc
57
56
  requirement: !ruby/object:Gem::Requirement
58
57
  requirements:
59
- - - "~>"
58
+ - - ">="
60
59
  - !ruby/object:Gem::Version
61
- version: 2.1.0
60
+ version: 6.3.1
62
61
  type: :development
63
62
  prerelease: false
64
63
  version_requirements: !ruby/object:Gem::Requirement
65
64
  requirements:
66
- - - "~>"
65
+ - - ">="
67
66
  - !ruby/object:Gem::Version
68
- version: 2.1.0
67
+ version: 6.3.1
69
68
  - !ruby/object:Gem::Dependency
70
- name: test-unit
69
+ name: reek
71
70
  requirement: !ruby/object:Gem::Requirement
72
71
  requirements:
73
72
  - - "~>"
74
73
  - !ruby/object:Gem::Version
75
- version: 3.3.0
74
+ version: '6.0'
76
75
  type: :development
77
76
  prerelease: false
78
77
  version_requirements: !ruby/object:Gem::Requirement
79
78
  requirements:
80
79
  - - "~>"
81
80
  - !ruby/object:Gem::Version
82
- version: 3.3.0
81
+ version: '6.0'
83
82
  - !ruby/object:Gem::Dependency
84
83
  name: rubocop
85
84
  requirement: !ruby/object:Gem::Requirement
86
85
  requirements:
87
86
  - - "~>"
88
87
  - !ruby/object:Gem::Version
89
- version: 0.80.0
88
+ version: 1.79.0
90
89
  type: :development
91
90
  prerelease: false
92
91
  version_requirements: !ruby/object:Gem::Requirement
93
92
  requirements:
94
93
  - - "~>"
95
94
  - !ruby/object:Gem::Version
96
- version: 0.80.0
95
+ version: 1.79.0
97
96
  - !ruby/object:Gem::Dependency
98
- name: reek
97
+ name: test-unit
99
98
  requirement: !ruby/object:Gem::Requirement
100
99
  requirements:
101
100
  - - "~>"
102
101
  - !ruby/object:Gem::Version
103
- version: '6.0'
102
+ version: 3.7.0
104
103
  type: :development
105
104
  prerelease: false
106
105
  version_requirements: !ruby/object:Gem::Requirement
107
106
  requirements:
108
107
  - - "~>"
109
108
  - !ruby/object:Gem::Version
110
- version: '6.0'
109
+ version: 3.7.0
111
110
  description: An extensible, concise and light weight DSL on Rake to automate data
112
111
  processing tasks
113
112
  email: '08to09@gmail.com'
@@ -125,6 +124,7 @@ files:
125
124
  - lib/raka.rb
126
125
  - lib/raka/compile.rb
127
126
  - lib/raka/interface.rbs
127
+ - lib/raka/lang/duckdb/impl.rb
128
128
  - lib/raka/lang/psql/impl.rb
129
129
  - lib/raka/lang/python/impl.rb
130
130
  - lib/raka/lang/r/impl.rb
@@ -136,7 +136,6 @@ homepage: http://github.com/yarray/raka
136
136
  licenses:
137
137
  - MIT
138
138
  metadata: {}
139
- post_install_message:
140
139
  rdoc_options: []
141
140
  require_paths:
142
141
  - lib
@@ -151,8 +150,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
151
150
  - !ruby/object:Gem::Version
152
151
  version: '0'
153
152
  requirements: []
154
- rubygems_version: 3.1.2
155
- signing_key:
153
+ rubygems_version: 3.6.9
156
154
  specification_version: 4
157
155
  summary: Rake for data
158
156
  test_files: []