raka 0.8.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/raka/lang/duckdb/impl.rb +19 -11
- data/lib/raka/lang/psql/impl.rb +1 -1
- data/lib/raka/protocol.rb +3 -3
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b04ff22d6ec2fd1a4c986b4a7969596ea9fbe155669558eb69dd469b947aebc
|
4
|
+
data.tar.gz: 680f48420d9d1f3bb0e758b21df43b97f7de9af489385b19fa465cc76bb3f816
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 41de921265fc6b0cd178405054dfed5b8c65acb99819f9fe1dfd1808d1a45e08dc19ef871e01566047f4014d1d0cae1235a9c2f0b71e8d67646b7a69d8e58826
|
7
|
+
data.tar.gz: f5c274b5a85941fd347822ce67e9aff14969c5e9816a7472c77005bc38c655e9ccb7e4d865319646e04b140fa14c3b2d72117d668066ef727084c2818a5c94f4
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.9.0
|
@@ -17,12 +17,13 @@ end
|
|
17
17
|
# 1. Persistent mode: operations on .db file with CREATE TABLE
|
18
18
|
# 2. Ad-hoc mode: parquet in/out using COPY operations
|
19
19
|
class Duckdb
|
20
|
-
def initialize(database: nil, params: {}, before: nil, after: nil)
|
20
|
+
def initialize(database: nil, params: {}, before: nil, after: nil, format: nil)
|
21
21
|
@params = params
|
22
22
|
@database = database
|
23
23
|
@mode = @database ? :persistent : :adhoc
|
24
24
|
@before = before
|
25
25
|
@after = after
|
26
|
+
@format = format&.upcase
|
26
27
|
end
|
27
28
|
|
28
29
|
def duckdb_cmd
|
@@ -36,7 +37,7 @@ class Duckdb
|
|
36
37
|
|
37
38
|
def process_params(code)
|
38
39
|
return code if code.nil?
|
39
|
-
|
40
|
+
|
40
41
|
processed_code = code
|
41
42
|
(@params || {}).each do |key, value|
|
42
43
|
processed_code = processed_code.gsub("$#{key}", "'#{value}'")
|
@@ -44,7 +45,12 @@ class Duckdb
|
|
44
45
|
processed_code
|
45
46
|
end
|
46
47
|
|
47
|
-
def
|
48
|
+
def detect_format_from_extension(filename)
|
49
|
+
ext = File.extname(filename)[1..]&.upcase # Remove dot and convert to uppercase
|
50
|
+
ext || 'PARQUET' # Default fallback
|
51
|
+
end
|
52
|
+
|
53
|
+
def build(code, task)
|
48
54
|
# Process parameter placeholders for all parts
|
49
55
|
main_sql = process_params(code)
|
50
56
|
before_sql = process_params(@before)
|
@@ -52,22 +58,24 @@ class Duckdb
|
|
52
58
|
|
53
59
|
# Build SQL parts as separate statements
|
54
60
|
sql_parts = []
|
55
|
-
|
61
|
+
|
56
62
|
# Add before hook if present
|
57
63
|
sql_parts << before_sql if before_sql
|
58
|
-
|
64
|
+
|
59
65
|
# Add main query based on mode
|
60
66
|
case @mode
|
61
67
|
when :persistent
|
62
|
-
sql_parts <<
|
68
|
+
sql_parts << 'DROP TABLE IF EXISTS :_name_;'
|
63
69
|
sql_parts << "CREATE TABLE :_name_ AS (#{main_sql});"
|
64
70
|
when :adhoc
|
65
|
-
|
71
|
+
# Determine format: use explicit format if provided, otherwise detect from output filename
|
72
|
+
format = @format || detect_format_from_extension(task.name)
|
73
|
+
sql_parts << "COPY (#{main_sql}) TO ':output:' (FORMAT #{format});"
|
66
74
|
end
|
67
|
-
|
75
|
+
|
68
76
|
# Add after hook if present
|
69
77
|
sql_parts << after_sql if after_sql
|
70
|
-
|
78
|
+
|
71
79
|
sql_parts.join("\n")
|
72
80
|
end
|
73
81
|
|
@@ -77,12 +85,12 @@ class Duckdb
|
|
77
85
|
# Split the SQL into separate statements and execute them individually
|
78
86
|
bash env, %(
|
79
87
|
# Execute the combined SQL script with proper variable replacement
|
80
|
-
cat #{fname} | sed 's|:_name_|#{task.output_stem}|g' | #{duckdb_cmd} | sed -z '$ s
|
88
|
+
cat #{fname} | sed 's|:_name_|#{task.output_stem}|g' | #{duckdb_cmd} | sed -z '$ s/\\n$//' | tee #{fname}.log
|
81
89
|
echo "#{@database}" > #{task.name}
|
82
90
|
)
|
83
91
|
when :adhoc
|
84
92
|
bash env, %(
|
85
|
-
cat #{fname} | sed 's|:output:|#{task.name}|g' | #{duckdb_cmd} | sed -z '$ s
|
93
|
+
cat #{fname} | sed 's|:output:|#{task.name}|g' | #{duckdb_cmd} | sed -z '$ s/\\n$//' | tee #{fname}.log
|
86
94
|
)
|
87
95
|
end
|
88
96
|
end
|
data/lib/raka/lang/psql/impl.rb
CHANGED
@@ -52,7 +52,7 @@ class Psql
|
|
52
52
|
|
53
53
|
bash env, %(
|
54
54
|
#{sh_cmd(schema)} #{param_str} -v _name_=#{task.output_stem} \
|
55
|
-
-v _schema_=#{schema.empty? ? '' : schema + '.'} -f #{fname} | sed -z '$ s
|
55
|
+
-v _schema_=#{schema.empty? ? '' : schema + '.'} -f #{fname} | sed -z '$ s/\\n$//' | tee #{fname}.log
|
56
56
|
mv #{fname}.log #{task.name}
|
57
57
|
)
|
58
58
|
end
|
data/lib/raka/protocol.rb
CHANGED
@@ -56,11 +56,11 @@ class LanguageProtocol
|
|
56
56
|
code = yield @text if @text
|
57
57
|
code = @block.call(task) if @block # do not resolve
|
58
58
|
|
59
|
-
env.logger.debug code
|
59
|
+
env.logger.debug code.chomp
|
60
60
|
script_text = @impl.build(wrap_template(remove_common_indent(code)), task)
|
61
61
|
temp_script = create_tmp(script_text)
|
62
62
|
@impl.run_script env, temp_script, task
|
63
|
-
env.logger.debug script_text
|
63
|
+
env.logger.debug script_text.chomp
|
64
64
|
end
|
65
65
|
end
|
66
66
|
|
@@ -91,7 +91,7 @@ def run_cmd(env, cmd)
|
|
91
91
|
if env.logger.level <= 0
|
92
92
|
pid = spawn(cmd, out: out_w)
|
93
93
|
Thread.new do
|
94
|
-
env.logger.debug(out_r.gets) until out_r.eof
|
94
|
+
env.logger.debug(out_r.gets.chomp) until out_r.eof
|
95
95
|
end
|
96
96
|
elsif env.logger.level == 1
|
97
97
|
pid = spawn(cmd, out: out_w)
|