raka 0.4.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/raka/lang/duckdb/impl.rb +32 -6
- data/lib/raka/token.rb +2 -2
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c9181e13ec5517b0c40faba8c2e121a9b44c9da29de439bd5303d8fa5ee50870
|
4
|
+
data.tar.gz: a4c9f8d7e03f067f8266cf77cef52b2d153111c3b24c13a37f9927fe5b543fe5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 03cfe492010d2621bd84aa0c03f452be71de1d26d1d1e354a608893dca47d512ad666a42fb1c0e1d7b89522ad60acbcdd77a9466da388917a6992c9c9afbc8cd
|
7
|
+
data.tar.gz: f5f1d930083bf7342cac0285ba625219f6eb24fea7e1d61241e81bceda711a49496fa58b899de89759573143671adc74f142c730cfe7343b3663855ca91fcc1a
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.6.0
|
@@ -17,10 +17,12 @@ end
|
|
17
17
|
# 1. Persistent mode: operations on .db file with CREATE TABLE
|
18
18
|
# 2. Ad-hoc mode: parquet in/out using COPY operations
|
19
19
|
class Duckdb
|
20
|
-
def initialize(database: nil, params: {})
|
20
|
+
def initialize(database: nil, params: {}, before: nil, after: nil)
|
21
21
|
@params = params
|
22
22
|
@database = database
|
23
23
|
@mode = @database ? :persistent : :adhoc
|
24
|
+
@before = before
|
25
|
+
@after = after
|
24
26
|
end
|
25
27
|
|
26
28
|
def duckdb_cmd
|
@@ -32,26 +34,50 @@ class Duckdb
|
|
32
34
|
end
|
33
35
|
end
|
34
36
|
|
35
|
-
def
|
36
|
-
|
37
|
+
def process_params(code)
|
38
|
+
return code if code.nil?
|
39
|
+
|
37
40
|
processed_code = code
|
38
41
|
(@params || {}).each do |key, value|
|
39
42
|
processed_code = processed_code.gsub("$#{key}", "'#{value}'")
|
40
43
|
end
|
44
|
+
processed_code
|
45
|
+
end
|
46
|
+
|
47
|
+
def build(code, _task)
|
48
|
+
# Process parameter placeholders for all parts
|
49
|
+
main_sql = process_params(code)
|
50
|
+
before_sql = process_params(@before)
|
51
|
+
after_sql = process_params(@after)
|
41
52
|
|
53
|
+
# Build SQL parts as separate statements
|
54
|
+
sql_parts = []
|
55
|
+
|
56
|
+
# Add before hook if present
|
57
|
+
sql_parts << before_sql if before_sql
|
58
|
+
|
59
|
+
# Add main query based on mode
|
42
60
|
case @mode
|
43
61
|
when :persistent
|
44
|
-
"DROP TABLE IF EXISTS :_name_;
|
62
|
+
sql_parts << "DROP TABLE IF EXISTS :_name_;"
|
63
|
+
sql_parts << "CREATE TABLE :_name_ AS (#{main_sql});"
|
45
64
|
when :adhoc
|
46
|
-
"COPY (#{
|
65
|
+
sql_parts << "COPY (#{main_sql}) TO ':output:' (FORMAT PARQUET);"
|
47
66
|
end
|
67
|
+
|
68
|
+
# Add after hook if present
|
69
|
+
sql_parts << after_sql if after_sql
|
70
|
+
|
71
|
+
sql_parts.join("\n")
|
48
72
|
end
|
49
73
|
|
50
74
|
def run_script(env, fname, task)
|
51
75
|
case @mode
|
52
76
|
when :persistent
|
77
|
+
# Split the SQL into separate statements and execute them individually
|
53
78
|
bash env, %(
|
54
|
-
#
|
79
|
+
# Execute the combined SQL script with proper variable replacement
|
80
|
+
cat #{fname} | sed 's|:_name_|#{task.output_stem}|g' | #{duckdb_cmd} | tee #{fname}.log
|
55
81
|
echo "#{@database}" > #{task.name}
|
56
82
|
)
|
57
83
|
when :adhoc
|
data/lib/raka/token.rb
CHANGED
@@ -157,12 +157,12 @@ class Token
|
|
157
157
|
symbol = @chain.pop.to_s
|
158
158
|
# if the pattern contains child pattern like percent_(\d+), we change the capture to
|
159
159
|
# named capture so that it can be captured later. The name is symbol with the index, like func0
|
160
|
-
pattern = pattern.gsub(/\(\S+?\)/).with_index { |m, i| "(?<#{symbol}#{i}>#{m})" }
|
160
|
+
pattern = pattern.to_s.gsub(/\(\S+?\)/).with_index { |m, i| "(?<#{symbol}#{i}>#{m})" }
|
161
161
|
|
162
162
|
# if the symbol is _, \S+ will be put in chain, it indicates not to capture,
|
163
163
|
# so just replace it with the refined pattern
|
164
164
|
if symbol == Pattern::ANY # match-everything and not bound
|
165
|
-
@chain.push pattern
|
165
|
+
@chain.push "#{pattern}\\w*"
|
166
166
|
else
|
167
167
|
@chain.push "(?<#{symbol}>(#{pattern}\\w*))"
|
168
168
|
end
|