chdb-ruby 0.1.0.rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,196 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fileutils'
4
+ require 'mkmf'
5
+ require 'yaml'
6
+ require 'open-uri'
7
+
8
+ module ChDB
9
+ module ExtConf
10
+ class << self
11
+ def configure
12
+ configure_cross_compiler
13
+
14
+ download_and_extract
15
+
16
+ configure_extension
17
+
18
+ create_makefile('chdb/chdb_native')
19
+ end
20
+
21
+ def compiled?
22
+ return false if cross_build?
23
+
24
+ major_version = RUBY_VERSION.match(/(\d+\.\d+)/)[1]
25
+ version_dir = File.join(package_root_dir, 'lib', 'chdb', major_version)
26
+
27
+ extension = if determine_target_platform.include?('darwin')
28
+ 'bundle'
29
+ else
30
+ 'so'
31
+ end
32
+ lib_file = "#{libname}.#{extension}"
33
+
34
+ File.exist?(File.join(version_dir, lib_file))
35
+ end
36
+
37
+ def configure_cross_compiler
38
+ RbConfig::CONFIG['CC'] = RbConfig::MAKEFILE_CONFIG['CC'] = ENV['CC'] if ENV['CC']
39
+ ENV['CC'] = RbConfig::CONFIG['CC']
40
+ end
41
+
42
+ def cross_build?
43
+ enable_config('cross-build')
44
+ end
45
+
46
+ def libname
47
+ 'chdb_native'
48
+ end
49
+
50
+ def configure_extension
51
+ include_path = File.expand_path('ext/chdb/include', package_root_dir)
52
+ append_cppflags("-I#{include_path}")
53
+ abort_could_not_find('chdb.h') unless find_header('chdb.h', include_path)
54
+ end
55
+
56
+ def abort_could_not_find(missing)
57
+ message = <<~MSG
58
+ Could not find #{missing}.
59
+ Please visit https://github.com/chdb-io/chdb-ruby for installation instructions.
60
+ MSG
61
+ abort("\n#{message}\n")
62
+ end
63
+
64
+ def download_and_extract
65
+ target_platform = determine_target_platform
66
+ version = fetch_chdb_version
67
+ download_dir = determine_download_directory(target_platform, version)
68
+ need_download = false
69
+
70
+ if Dir.exist?(download_dir)
71
+ required_files = [
72
+ File.join(download_dir, 'chdb.h'),
73
+ File.join(download_dir, 'libchdb.so')
74
+ ]
75
+
76
+ need_download = !required_files.all? { |f| File.exist?(f) }
77
+ if need_download
78
+ puts 'Missing required files, cleaning download directory...'
79
+ FileUtils.rm_rf(Dir.glob("#{download_dir}/*"))
80
+ end
81
+ else
82
+ FileUtils.mkdir_p(download_dir)
83
+ need_download = true
84
+ end
85
+
86
+ if need_download
87
+ file_name = get_file_name(target_platform)
88
+ url = build_download_url(version, file_name)
89
+ download_tarball(url, download_dir, file_name)
90
+ extract_tarball(download_dir, file_name)
91
+ end
92
+
93
+ copy_files(download_dir, version)
94
+ end
95
+
96
+ private
97
+
98
+ def determine_target_platform
99
+ return ENV['TARGET'].strip if ENV['TARGET'] && !ENV['TARGET'].strip.empty?
100
+
101
+ case RUBY_PLATFORM
102
+ when /aarch64-linux/ then 'aarch64-linux'
103
+ when /x86_64-linux/ then 'x86_64-linux'
104
+ when /arm64-darwin/ then 'arm64-darwin'
105
+ when /x86_64-darwin/ then 'x86_64-darwin'
106
+ else
107
+ raise ArgumentError, "Unsupported platform: #{RUBY_PLATFORM}."
108
+ end
109
+ end
110
+
111
+ def fetch_chdb_version
112
+ dependencies = YAML.load_file(File.join(package_root_dir, 'dependencies.yml'), symbolize_names: true)
113
+ dependencies[:chdb][:version]
114
+ end
115
+
116
+ def determine_download_directory(target_platform, version)
117
+ File.join(package_root_dir, 'deps', version, target_platform)
118
+ end
119
+
120
+ def get_file_name(target_platform)
121
+ case target_platform
122
+ when 'aarch64-linux' then 'linux-aarch64-libchdb.tar.gz'
123
+ when 'x86_64-linux' then 'linux-x86_64-libchdb.tar.gz'
124
+ when 'arm64-darwin' then 'macos-arm64-libchdb.tar.gz'
125
+ when 'x86_64-darwin' then 'macos-x86_64-libchdb.tar.gz'
126
+ else raise "Unsupported platform: #{target_platform}"
127
+ end
128
+ end
129
+
130
+ def build_download_url(version, file_name)
131
+ "https://github.com/chdb-io/chdb/releases/download/v#{version}/#{file_name}"
132
+ end
133
+
134
+ def download_tarball(url, download_dir, file_name)
135
+ tarball = File.join(download_dir, file_name)
136
+ puts "Downloading chdb library for #{determine_target_platform}..."
137
+
138
+ max_retries = 3
139
+ retries = 0
140
+
141
+ begin
142
+ URI.open(url) do |remote| # rubocop:disable Security/Open
143
+ IO.copy_stream(remote, tarball)
144
+ end
145
+ rescue StandardError => e
146
+ raise "Failed to download after #{max_retries} attempts: #{e.message}" unless retries < max_retries
147
+
148
+ retries += 1
149
+ puts "Download failed: #{e.message}. Retrying (attempt #{retries}/#{max_retries})..."
150
+ retry
151
+ end
152
+ end
153
+
154
+ def extract_tarball(download_dir, file_name)
155
+ tarball = File.join(download_dir, file_name)
156
+ system("tar xzf #{tarball} -C #{download_dir}")
157
+ end
158
+
159
+ def copy_files(download_dir, _version)
160
+ [%w[*.h], %w[*.so]].each do |(glob_pattern)|
161
+ # Removed the unused variable src_dir
162
+ pattern = File.basename(glob_pattern)
163
+ dest_subdir = case pattern
164
+ when '*.h' then 'include'
165
+ else 'lib'
166
+ end
167
+ dest_dir = File.join(package_root_dir, 'ext/chdb', dest_subdir)
168
+ src_files = Dir.glob(File.join(download_dir, pattern))
169
+
170
+ extra_dirs = []
171
+ extra_dirs << File.join(package_root_dir, 'lib/chdb/lib') if pattern == '*.so'
172
+
173
+ ([dest_dir] + extra_dirs).each do |dest|
174
+ FileUtils.mkdir_p(dest)
175
+
176
+ src_files.each do |src_file|
177
+ dest_file = File.join(dest, File.basename(src_file))
178
+ FileUtils.ln_s(File.expand_path(src_file), dest_file, force: true)
179
+ end
180
+ end
181
+ end
182
+ end
183
+
184
+ def package_root_dir
185
+ File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
186
+ end
187
+ end
188
+ end
189
+ end
190
+
191
+ if arg_config('--download-dependencies')
192
+ ChDB::ExtConf.download_and_extract
193
+ exit!(0)
194
+ end
195
+
196
+ ChDB::ExtConf.configure
@@ -0,0 +1,81 @@
1
+ #include "local_result.h"
2
+
3
+ #include "constants.h"
4
+ #include "include/chdb.h"
5
+
6
+ VALUE cLocalResult;
7
+
8
+ static void local_result_free(void *ptr)
9
+ {
10
+ LocalResult *result = (LocalResult *)ptr;
11
+ DEBUG_PRINT("Freeing LocalResult: %p", (void*)result);
12
+ if (result->c_result)
13
+ {
14
+ free_result_v2(result->c_result);
15
+ }
16
+ free(result);
17
+ }
18
+
19
+ const rb_data_type_t LocalResultType =
20
+ {
21
+ "LocalResult",
22
+ {NULL, local_result_free, NULL},
23
+ };
24
+
25
+ void init_local_result()
26
+ {
27
+ VALUE mChDB = rb_define_module("ChDB");
28
+ cLocalResult = rb_define_class_under(mChDB, "LocalResult", rb_cObject);
29
+ rb_define_alloc_func(cLocalResult, local_result_alloc);
30
+ rb_define_method(cLocalResult, "buf", local_result_buf, 0);
31
+ rb_define_method(cLocalResult, "elapsed", local_result_elapsed, 0);
32
+ rb_define_method(cLocalResult, "rows_read", local_result_rows_read, 0);
33
+ rb_define_method(cLocalResult, "bytes_read", local_result_bytes_read, 0);
34
+ }
35
+
36
+ VALUE local_result_alloc(VALUE klass)
37
+ {
38
+ LocalResult *result = ALLOC(LocalResult);
39
+ DEBUG_PRINT("Allocating LocalResult: %p", (void*)result);
40
+ result->c_result = NULL;
41
+ return rb_data_typed_object_wrap(klass, result, &LocalResultType);
42
+ }
43
+
44
+ VALUE local_result_buf(VALUE self)
45
+ {
46
+ LocalResult *result;
47
+ TypedData_Get_Struct(self, LocalResult, &LocalResultType, result);
48
+
49
+ if (!result->c_result || !result->c_result->buf)
50
+ {
51
+ DEBUG_PRINT("Buffer access attempted on empty result");
52
+ return Qnil;
53
+ }
54
+
55
+ DEBUG_PRINT("Returning buffer of length %zu", result->c_result->len);
56
+ return rb_str_new(result->c_result->buf, result->c_result->len);
57
+ }
58
+
59
+ VALUE local_result_elapsed(VALUE self)
60
+ {
61
+ LocalResult *result;
62
+ TypedData_Get_Struct(self, LocalResult, &LocalResultType, result);
63
+ DEBUG_PRINT("Query elapsed time: %f", result->c_result->elapsed);
64
+ return DBL2NUM(result->c_result->elapsed);
65
+ }
66
+
67
+ VALUE local_result_rows_read(VALUE self)
68
+ {
69
+ LocalResult *result;
70
+ TypedData_Get_Struct(self, LocalResult, &LocalResultType, result);
71
+ DEBUG_PRINT("Rows read: %" PRIu64, result->c_result->rows_read);
72
+ return ULONG2NUM(result->c_result->rows_read);
73
+ }
74
+
75
+ VALUE local_result_bytes_read(VALUE self)
76
+ {
77
+ LocalResult *result;
78
+ TypedData_Get_Struct(self, LocalResult, &LocalResultType, result);
79
+ DEBUG_PRINT("Bytes read: %" PRIu64, result->c_result->bytes_read);
80
+ return ULONG2NUM(result->c_result->bytes_read);
81
+ }
@@ -0,0 +1,26 @@
1
+ #ifndef CHDB_LOCAL_RESULT_H
2
+ #define CHDB_LOCAL_RESULT_H
3
+
4
+ #include <ruby.h>
5
+
6
+ typedef struct
7
+ {
8
+ struct local_result_v2 *c_result;
9
+ } LocalResult;
10
+
11
+ extern VALUE cLocalResult;
12
+ extern const rb_data_type_t LocalResultType;
13
+
14
+ void init_local_result();
15
+
16
+ VALUE local_result_alloc(VALUE klass);
17
+
18
+ VALUE local_result_buf(VALUE self);
19
+
20
+ VALUE local_result_elapsed(VALUE self);
21
+
22
+ VALUE local_result_rows_read(VALUE self);
23
+
24
+ VALUE local_result_bytes_read(VALUE self);
25
+
26
+ #endif
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChDB
4
+ module Constants
5
+ end
6
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'cgi'
4
+ require 'fileutils'
5
+ require 'tempfile'
6
+ require 'chdb/constants'
7
+ require 'chdb/errors'
8
+
9
+ module ChDB
10
+ # Represents the data path configuration for ChDB.
11
+ # This class is responsible for initializing and managing the data path
12
+ # including parsing URIs, handling query parameters, and ensuring directory existence.
13
+ class DataPath
14
+ attr_reader :dir_path, :is_tmp, :query_params, :mode
15
+
16
+ def initialize(uri, options)
17
+ initialize_instance_variables
18
+ path = parse_uri(uri)
19
+ merge_options(options)
20
+ check_params
21
+ directory_path(path)
22
+ end
23
+
24
+ def generate_arguments # rubocop:disable Metrics/MethodLength
25
+ args = ['clickhouse', "--path=#{@dir_path}"]
26
+ excluded_keys = %i[results_as_hash readonly readwrite flags]
27
+
28
+ @query_params.each do |key, value|
29
+ next if excluded_keys.include?(key)
30
+
31
+ case key.to_s
32
+ when 'udf_path'
33
+ udf_value = value.to_s
34
+ args += ['--', "--user_scripts_path=#{udf_value}",
35
+ "--user_defined_executable_functions_config=#{udf_value}/*.xml"]
36
+ next
37
+ when '--'
38
+ args << '--'
39
+ next
40
+ end
41
+
42
+ key_str = key.to_s
43
+ args << if value.nil?
44
+ "--#{key_str}"
45
+ else
46
+ "--#{key_str}=#{value}"
47
+ end
48
+ end
49
+
50
+ args << '--readonly=1' if @mode.anybits?(Constants::Open::READONLY)
51
+
52
+ args
53
+ end
54
+
55
+ def close
56
+ FileUtils.remove_entry(@dir_path, true) if @is_tmp && Dir.exist?(@dir_path)
57
+ end
58
+
59
+ private
60
+
61
+ def initialize_instance_variables
62
+ @dir_path = nil
63
+ @is_tmp = false
64
+ @query_params = {}
65
+ @mode = 0
66
+ end
67
+
68
+ def parse_uri(uri)
69
+ path, query_str = uri.split('?', 2) unless uri.nil?
70
+ @query_params = CGI.parse(query_str.to_s).transform_values(&:last) unless query_str.nil?
71
+ remove_file_prefix(path)
72
+ end
73
+
74
+ def merge_options(options)
75
+ @query_params = @query_params.merge(options)
76
+ # @query_params = @query_params.merge(options.transform_keys(&:to_s))
77
+ end
78
+
79
+ def directory_path(path)
80
+ if path.nil? || path.empty? || path == ':memory:'
81
+ @is_tmp = true
82
+ @dir_path = Dir.mktmpdir('chdb_')
83
+ else
84
+ @dir_path = File.expand_path(path)
85
+ ensure_directory_exists
86
+ end
87
+ end
88
+
89
+ def ensure_directory_exists
90
+ if @mode.nobits?(Constants::Open::CREATE)
91
+ raise DirectoryNotFoundException, "Directory #{@dir_path} required" unless Dir.exist?(@dir_path)
92
+
93
+ return
94
+ end
95
+
96
+ FileUtils.mkdir_p(@dir_path, mode: 0o755)
97
+ end
98
+
99
+ def check_params # rubocop:disable Metrics/MethodLength
100
+ @mode = Constants::Open::READWRITE | Constants::Open::CREATE
101
+ @mode = Constants::Open::READONLY if @query_params[:readonly]
102
+
103
+ if @query_params[:readwrite]
104
+ raise InvalidArgumentException, 'conflicting options: readonly and readwrite' if @query_params[:readonly]
105
+
106
+ @mode = Constants::Open::READWRITE
107
+ end
108
+
109
+ return unless @query_params[:flags]
110
+ if @query_params[:readonly] || @query_params[:readwrite]
111
+ raise InvalidArgumentException, 'conflicting options: flags with readonly and/or readwrite'
112
+ end
113
+
114
+ @mode = @query_params[:flags]
115
+ end
116
+
117
+ def remove_file_prefix(str)
118
+ str.sub(/\Afile:/, '')
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ RUBY_VERSION =~ /(\d+\.\d+)/
5
+ require "chdb/#{Regexp.last_match(1)}/chdb_native"
6
+ rescue LoadError
7
+ require 'chdb/chdb_native'
8
+ end
9
+ require 'chdb/data_path'
10
+ require 'chdb/statement'
11
+
12
+ module ChDB
13
+ # Represents a database connection and provides methods to interact with the database.
14
+ class Database # rubocop:disable Metrics/ClassLength
15
+ class << self
16
+ # Without block works exactly as new.
17
+ # With block, like new closes the database at the end, but unlike new
18
+ # returns the result of the block instead of the database instance.
19
+ def open(*args)
20
+ database = new(*args)
21
+
22
+ if block_given?
23
+ begin
24
+ yield database
25
+ ensure
26
+ database.close
27
+ end
28
+ else
29
+ database
30
+ end
31
+ end
32
+ end
33
+
34
+ # A boolean that indicates whether rows in result sets should be returned
35
+ # as hashes or not. By default, rows are returned as arrays.
36
+ attr_accessor :results_as_hash, :conn
37
+
38
+ def initialize(file, options = {}) # rubocop:disable Metrics/MethodLength
39
+ file = file.to_path if file.respond_to? :to_path
40
+
41
+ @data_path = DataPath.new(file, options)
42
+ @results_as_hash = @data_path.query_params[:results_as_hash]
43
+ @readonly = @data_path.mode & Constants::Open::READONLY != 0
44
+
45
+ argv = @data_path.generate_arguments
46
+ @conn = ChDB::Connection.new(argv.size, argv)
47
+ @closed = false
48
+
49
+ return unless block_given?
50
+
51
+ begin
52
+ yield self
53
+ ensure
54
+ close
55
+ end
56
+ end
57
+
58
+ def close
59
+ return if defined?(@closed) && @closed
60
+
61
+ @data_path.close if @data_path.respond_to?(:close)
62
+ @conn.close if @conn.respond_to?(:close)
63
+ @closed = true
64
+ end
65
+
66
+ def closed?
67
+ defined?(@closed) && @closed
68
+ end
69
+
70
+ def prepare(sql)
71
+ stmt = ChDB::Statement.new(self, sql)
72
+ return stmt unless block_given?
73
+
74
+ yield stmt
75
+ end
76
+
77
+ def execute(sql, bind_vars = [], &block)
78
+ prepare(sql) do |stmt|
79
+ result = stmt.execute(bind_vars)
80
+
81
+ if block
82
+ result.each(&block)
83
+ else
84
+ result.to_a.freeze
85
+ end
86
+ end
87
+ end
88
+
89
+ def execute2(sql, *bind_vars, &) # rubocop:disable Metrics/MethodLength
90
+ prepare(sql) do |stmt|
91
+ result = stmt.execute(*bind_vars)
92
+ stmt.parse
93
+
94
+ if block_given?
95
+ yield stmt.columns
96
+ result.each(&)
97
+ else
98
+ return result.each_with_object([stmt.columns]) do |row, arr|
99
+ arr << row
100
+ end
101
+ end
102
+ end
103
+ end
104
+
105
+ def query(sql, bind_vars = [])
106
+ result = prepare(sql).execute(bind_vars)
107
+ if block_given?
108
+ yield result
109
+ else
110
+ result
111
+ end
112
+ end
113
+
114
+ def query_with_format(sql, bind_vars = [], format = 'CSV')
115
+ result = prepare(sql).execute_with_format(bind_vars, format)
116
+ if block_given?
117
+ yield result
118
+ else
119
+ result
120
+ end
121
+ end
122
+
123
+ def get_first_row(sql, *bind_vars)
124
+ execute(sql, *bind_vars).first
125
+ end
126
+
127
+ def get_first_value(sql, *bind_vars)
128
+ query(sql, bind_vars) do |rs|
129
+ if (row = rs.next)
130
+ return @results_as_hash ? row[rs.columns[0]] : row[0]
131
+ end
132
+ end
133
+ nil
134
+ end
135
+
136
+ # Returns +true+ if the database has been open in readonly mode
137
+ # A helper to check before performing any operation
138
+ def readonly?
139
+ @readonly
140
+ end
141
+
142
+ # Given a statement, return a result set.
143
+ # This is not intended for general consumption
144
+ # :nodoc:
145
+ def build_result_set(stmt)
146
+ if results_as_hash
147
+ HashResultSet.new(self, stmt)
148
+ else
149
+ ResultSet.new(self, stmt)
150
+ end
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChDB
4
+ # Represents a base exception class for the ChDB module.
5
+ # This class inherits from StandardError and provides a common
6
+ # structure for other ChDB-specific exceptions.
7
+ class Exception < ::StandardError
8
+ # A convenience for accessing the error code for this exception.
9
+ attr_reader :code
10
+
11
+ # If the error is associated with a SQL query, this is the query
12
+ attr_reader :sql
13
+ end
14
+
15
+ class SQLException < Exception; end
16
+
17
+ class InternalException < Exception; end
18
+
19
+ class DirectoryNotFoundException < ChDB::Exception; end
20
+
21
+ class InvalidArgumentException < ChDB::Exception; end
22
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChDB
4
+ # Represents the local result of a ChDB operation.
5
+ class LocalResult
6
+ attr_accessor :output_format
7
+
8
+ def to_s
9
+ buf
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChDB
4
+ # Documentation for the ParameterBinding module
5
+ # This module provides methods for binding parameters in a database query context.
6
+ module ParameterBinding
7
+ def bind_param(index, value)
8
+ @bind_vars[index - 1] = value
9
+ end
10
+
11
+ def bind_params(*bind_vars)
12
+ index = 1
13
+ bind_vars.flatten.each do |var|
14
+ if var.is_a?(Hash)
15
+ # TODO: Hash-style parameter binding not yet implemented
16
+ # Currently using positional parameters instead of named parameters
17
+ var.each { |key, val| bind_param key, val }
18
+ else
19
+ bind_param index, var
20
+ index += 1
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChDB
4
+ # This module provides a set of methods for handling the results of a ChDB query.
5
+ # It parses the output content into a CSV table and provides methods to iterate over the rows.
6
+ module ResultHandler
7
+ module_function
8
+
9
+ def parse_output(output_content)
10
+ csv_table = CSV.parse(output_content, headers: true)
11
+ [csv_table.headers, csv_table.map(&:fields)]
12
+ end
13
+
14
+ def step
15
+ return nil if @row_idx >= @parsed_data.size
16
+
17
+ current_row = @parsed_data[@row_idx]
18
+ @row_idx += 1
19
+ current_row
20
+ end
21
+
22
+ public
23
+
24
+ def done?
25
+ @row_idx >= @parsed_data.size
26
+ end
27
+ end
28
+ end