clickhouse-ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +80 -0
  3. data/LICENSE +21 -0
  4. data/README.md +251 -0
  5. data/lib/clickhouse_ruby/active_record/arel_visitor.rb +468 -0
  6. data/lib/clickhouse_ruby/active_record/connection_adapter.rb +723 -0
  7. data/lib/clickhouse_ruby/active_record/railtie.rb +192 -0
  8. data/lib/clickhouse_ruby/active_record/schema_statements.rb +693 -0
  9. data/lib/clickhouse_ruby/active_record.rb +121 -0
  10. data/lib/clickhouse_ruby/client.rb +471 -0
  11. data/lib/clickhouse_ruby/configuration.rb +145 -0
  12. data/lib/clickhouse_ruby/connection.rb +328 -0
  13. data/lib/clickhouse_ruby/connection_pool.rb +301 -0
  14. data/lib/clickhouse_ruby/errors.rb +144 -0
  15. data/lib/clickhouse_ruby/result.rb +189 -0
  16. data/lib/clickhouse_ruby/types/array.rb +183 -0
  17. data/lib/clickhouse_ruby/types/base.rb +77 -0
  18. data/lib/clickhouse_ruby/types/boolean.rb +68 -0
  19. data/lib/clickhouse_ruby/types/date_time.rb +163 -0
  20. data/lib/clickhouse_ruby/types/float.rb +115 -0
  21. data/lib/clickhouse_ruby/types/integer.rb +157 -0
  22. data/lib/clickhouse_ruby/types/low_cardinality.rb +58 -0
  23. data/lib/clickhouse_ruby/types/map.rb +249 -0
  24. data/lib/clickhouse_ruby/types/nullable.rb +73 -0
  25. data/lib/clickhouse_ruby/types/parser.rb +244 -0
  26. data/lib/clickhouse_ruby/types/registry.rb +148 -0
  27. data/lib/clickhouse_ruby/types/string.rb +83 -0
  28. data/lib/clickhouse_ruby/types/tuple.rb +206 -0
  29. data/lib/clickhouse_ruby/types/uuid.rb +84 -0
  30. data/lib/clickhouse_ruby/types.rb +69 -0
  31. data/lib/clickhouse_ruby/version.rb +5 -0
  32. data/lib/clickhouse_ruby.rb +101 -0
  33. metadata +150 -0
@@ -0,0 +1,244 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClickhouseRuby
4
+ module Types
5
+ # AST-based parser for ClickHouse type strings
6
+ #
7
+ # This parser correctly handles nested types like:
8
+ # - Array(Tuple(String, UInt64))
9
+ # - Map(String, Array(Nullable(Int32)))
10
+ # - Nullable(LowCardinality(String))
11
+ #
12
+ # The parser does NOT validate types - it only handles syntax.
13
+ # Type validation is delegated to ClickHouse.
14
+ #
15
+ # Grammar:
16
+ # type := simple_type | parameterized_type
17
+ # parameterized_type := identifier "(" type_list ")"
18
+ # type_list := type ("," type)*
19
+ # simple_type := identifier
20
+ # identifier := [a-zA-Z_][a-zA-Z0-9_]*
21
+ #
22
+ # @example
23
+ # parser = Parser.new
24
+ # parser.parse('String')
25
+ # # => { type: 'String' }
26
+ #
27
+ # parser.parse('Array(UInt64)')
28
+ # # => { type: 'Array', args: [{ type: 'UInt64' }] }
29
+ #
30
+ # parser.parse('Map(String, Array(Tuple(String, UInt64)))')
31
+ # # => { type: 'Map', args: [
32
+ # # { type: 'String' },
33
+ # # { type: 'Array', args: [
34
+ # # { type: 'Tuple', args: [
35
+ # # { type: 'String' },
36
+ # # { type: 'UInt64' }
37
+ # # ]}
38
+ # # ]}
39
+ # # ]}
40
+ #
41
+ class Parser
42
+ # Error raised when parsing fails
43
+ class ParseError < ClickhouseRuby::Error
44
+ attr_reader :position, :input
45
+
46
+ def initialize(message, position: nil, input: nil)
47
+ @position = position
48
+ @input = input
49
+ full_message = position ? "#{message} at position #{position}" : message
50
+ full_message += ": '#{input}'" if input
51
+ super(full_message)
52
+ end
53
+ end
54
+
55
+ # Parses a ClickHouse type string into an AST
56
+ #
57
+ # @param type_string [String] the type string to parse
58
+ # @return [Hash] the parsed AST with :type and optional :args keys
59
+ # @raise [ParseError] if the type string is invalid
60
+ def parse(type_string)
61
+ raise ParseError.new('Type string cannot be nil') if type_string.nil?
62
+
63
+ @input = type_string.strip
64
+ @pos = 0
65
+
66
+ raise ParseError.new('Type string cannot be empty', input: type_string) if @input.empty?
67
+
68
+ result = parse_type
69
+ skip_whitespace
70
+
71
+ # Ensure we consumed the entire input
72
+ unless @pos >= @input.length
73
+ raise ParseError.new("Unexpected character '#{@input[@pos]}'", position: @pos, input: type_string)
74
+ end
75
+
76
+ result
77
+ end
78
+
79
+ private
80
+
81
+ # Parses a single type (simple or parameterized) or literal value
82
+ #
83
+ # ClickHouse type parameters can be:
84
+ # - Type names: String, UInt64
85
+ # - Numeric literals: 3 (precision), 9 (scale)
86
+ # - String literals: 'UTC' (timezone)
87
+ #
88
+ # @return [Hash] the parsed type/value
89
+ def parse_type
90
+ skip_whitespace
91
+
92
+ # Handle numeric literals (e.g., DateTime64(3))
93
+ if numeric_char?(peek)
94
+ value = parse_numeric
95
+ return { type: value }
96
+ end
97
+
98
+ # Handle string literals (e.g., DateTime64(3, 'UTC'))
99
+ if peek == "'"
100
+ value = parse_string_literal
101
+ return { type: value }
102
+ end
103
+
104
+ name = parse_identifier
105
+
106
+ skip_whitespace
107
+ if peek == '('
108
+ consume('(')
109
+ args = parse_type_list
110
+ consume(')')
111
+ { type: name, args: args }
112
+ else
113
+ { type: name }
114
+ end
115
+ end
116
+
117
+ # Parses a comma-separated list of types
118
+ #
119
+ # @return [Array<Hash>] the list of parsed types
120
+ def parse_type_list
121
+ types = []
122
+ skip_whitespace
123
+
124
+ # Handle empty argument list
125
+ return types if peek == ')'
126
+
127
+ types << parse_type
128
+
129
+ while peek == ','
130
+ consume(',')
131
+ types << parse_type
132
+ end
133
+
134
+ types
135
+ end
136
+
137
+ # Parses an identifier (type name)
138
+ #
139
+ # @return [String] the identifier
140
+ # @raise [ParseError] if no identifier is found
141
+ def parse_identifier
142
+ skip_whitespace
143
+ start_pos = @pos
144
+
145
+ # First character must be letter or underscore
146
+ unless @pos < @input.length && identifier_start_char?(@input[@pos])
147
+ raise ParseError.new('Expected type name', position: @pos, input: @input)
148
+ end
149
+
150
+ @pos += 1
151
+
152
+ # Subsequent characters can be letters, digits, or underscores
153
+ while @pos < @input.length && identifier_char?(@input[@pos])
154
+ @pos += 1
155
+ end
156
+
157
+ @input[start_pos...@pos]
158
+ end
159
+
160
+ # Checks if a character can start an identifier
161
+ #
162
+ # @param char [String] the character to check
163
+ # @return [Boolean] true if valid
164
+ def identifier_start_char?(char)
165
+ char =~ /[a-zA-Z_]/
166
+ end
167
+
168
+ # Checks if a character can be part of an identifier
169
+ #
170
+ # @param char [String] the character to check
171
+ # @return [Boolean] true if valid
172
+ def identifier_char?(char)
173
+ char =~ /[a-zA-Z0-9_]/
174
+ end
175
+
176
+ # Checks if a character is numeric
177
+ #
178
+ # @param char [String] the character to check
179
+ # @return [Boolean] true if numeric
180
+ def numeric_char?(char)
181
+ char =~ /[0-9]/
182
+ end
183
+
184
+ # Parses a numeric literal
185
+ #
186
+ # @return [String] the numeric value
187
+ def parse_numeric
188
+ start_pos = @pos
189
+
190
+ while @pos < @input.length && numeric_char?(@input[@pos])
191
+ @pos += 1
192
+ end
193
+
194
+ @input[start_pos...@pos]
195
+ end
196
+
197
+ # Parses a string literal (single-quoted)
198
+ #
199
+ # @return [String] the string value (without quotes)
200
+ def parse_string_literal
201
+ consume("'")
202
+ start_pos = @pos
203
+
204
+ while @pos < @input.length && @input[@pos] != "'"
205
+ # Handle escaped quotes
206
+ @pos += 1 if @input[@pos] == '\\' && @pos + 1 < @input.length
207
+ @pos += 1
208
+ end
209
+
210
+ value = @input[start_pos...@pos]
211
+ consume("'")
212
+ value
213
+ end
214
+
215
+ # Returns the current character without consuming it
216
+ #
217
+ # @return [String, nil] the current character or nil if at end
218
+ def peek
219
+ skip_whitespace
220
+ @pos < @input.length ? @input[@pos] : nil
221
+ end
222
+
223
+ # Consumes an expected character
224
+ #
225
+ # @param expected [String] the expected character
226
+ # @raise [ParseError] if the character doesn't match
227
+ def consume(expected)
228
+ skip_whitespace
229
+ actual = @pos < @input.length ? @input[@pos] : 'end of input'
230
+
231
+ unless actual == expected
232
+ raise ParseError.new("Expected '#{expected}', got '#{actual}'", position: @pos, input: @input)
233
+ end
234
+
235
+ @pos += 1
236
+ end
237
+
238
+ # Skips whitespace characters
239
+ def skip_whitespace
240
+ @pos += 1 while @pos < @input.length && @input[@pos] =~ /\s/
241
+ end
242
+ end
243
+ end
244
+ end
@@ -0,0 +1,148 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClickhouseRuby
4
+ module Types
5
+ # Registry for ClickHouse type mappings
6
+ #
7
+ # Manages the mapping between ClickHouse type strings and Ruby type classes.
8
+ # Supports both simple types (String, UInt64) and parameterized types
9
+ # (Array, Map, Nullable).
10
+ #
11
+ # @example Register a custom type
12
+ # registry = Registry.new
13
+ # registry.register('MyType', MyTypeClass)
14
+ #
15
+ # @example Look up a type
16
+ # type = registry.lookup('Array(String)')
17
+ #
18
+ class Registry
19
+ # Types that wrap other types (parameterized types)
20
+ WRAPPER_TYPES = %w[Array Nullable LowCardinality].freeze
21
+
22
+ # Types that take multiple type arguments
23
+ MULTI_ARG_TYPES = %w[Map Tuple].freeze
24
+
25
+ # DateTime types that take precision/timezone parameters
26
+ DATETIME_TYPES = %w[DateTime DateTime64].freeze
27
+
28
+ def initialize
29
+ @types = {}
30
+ @cache = {}
31
+ end
32
+
33
+ # Registers a type class for a type name
34
+ #
35
+ # @param name [String] the type name (e.g., 'String', 'UInt64')
36
+ # @param type_class [Class] the type class
37
+ def register(name, type_class)
38
+ @types[name] = type_class
39
+ @cache.clear # Invalidate cache when types change
40
+ end
41
+
42
+ # Looks up a type by its ClickHouse type string
43
+ #
44
+ # @param type_string [String] the full type string (e.g., 'Array(String)')
45
+ # @return [Base] the type instance
46
+ def lookup(type_string)
47
+ # Check cache first
48
+ return @cache[type_string] if @cache.key?(type_string)
49
+
50
+ # Parse the type string
51
+ ast = Parser.new.parse(type_string)
52
+
53
+ # Build the type instance
54
+ type = build_type(ast)
55
+
56
+ # Cache for future lookups
57
+ @cache[type_string] = type
58
+
59
+ type
60
+ end
61
+
62
+ # Registers all default ClickHouse types
63
+ def register_defaults
64
+ # Integer types
65
+ register('Int8', Integer)
66
+ register('Int16', Integer)
67
+ register('Int32', Integer)
68
+ register('Int64', Integer)
69
+ register('Int128', Integer)
70
+ register('Int256', Integer)
71
+ register('UInt8', Integer)
72
+ register('UInt16', Integer)
73
+ register('UInt32', Integer)
74
+ register('UInt64', Integer)
75
+ register('UInt128', Integer)
76
+ register('UInt256', Integer)
77
+
78
+ # Float types
79
+ register('Float32', Float)
80
+ register('Float64', Float)
81
+
82
+ # String types
83
+ register('String', String)
84
+ register('FixedString', String)
85
+
86
+ # Date/Time types
87
+ register('Date', DateTime)
88
+ register('Date32', DateTime)
89
+ register('DateTime', DateTime)
90
+ register('DateTime64', DateTime)
91
+
92
+ # Other basic types
93
+ register('UUID', UUID)
94
+ register('Bool', Boolean)
95
+
96
+ # Complex/wrapper types
97
+ register('Array', Array)
98
+ register('Map', Map)
99
+ register('Tuple', Tuple)
100
+ register('Nullable', Nullable)
101
+ register('LowCardinality', LowCardinality)
102
+ end
103
+
104
+ private
105
+
106
+ # Builds a type instance from a parsed AST
107
+ #
108
+ # @param ast [Hash] the parsed type AST
109
+ # @return [Base] the type instance
110
+ def build_type(ast)
111
+ type_name = ast[:type]
112
+ args = ast[:args]
113
+
114
+ type_class = @types[type_name]
115
+
116
+ unless type_class
117
+ # For unknown types, return a generic type that passes through values
118
+ return Base.new(type_name)
119
+ end
120
+
121
+ if args && !args.empty?
122
+ if DATETIME_TYPES.include?(type_name)
123
+ # DateTime types: DateTime64(3) or DateTime64(3, 'UTC')
124
+ precision = args[0] ? args[0][:type].to_i : nil
125
+ timezone = args[1] ? args[1][:type] : nil
126
+ type_class.new(type_name, precision: precision, timezone: timezone)
127
+ elsif WRAPPER_TYPES.include?(type_name)
128
+ # Single-argument wrapper (Array, Nullable, LowCardinality)
129
+ arg_types = args.map { |arg| build_type(arg) }
130
+ type_class.new(type_name, element_type: arg_types.first)
131
+ elsif MULTI_ARG_TYPES.include?(type_name)
132
+ # Multi-argument type (Map, Tuple)
133
+ arg_types = args.map { |arg| build_type(arg) }
134
+ type_class.new(type_name, arg_types: arg_types)
135
+ else
136
+ # Other parameterized types - pass raw args
137
+ arg_types = args.map { |arg| build_type(arg) }
138
+ type_class.new(type_name, arg_types: arg_types)
139
+ end
140
+ else
141
+ # Simple type
142
+ type_class.new(type_name)
143
+ end
144
+ end
145
+
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClickhouseRuby
4
+ module Types
5
+ # Type handler for ClickHouse string types
6
+ #
7
+ # Handles: String, FixedString(N)
8
+ #
9
+ # ClickHouse strings are byte sequences, not necessarily valid UTF-8.
10
+ # However, most usage is with UTF-8 text.
11
+ #
12
+ class String < Base
13
+ # The fixed length for FixedString types
14
+ # @return [Integer, nil] the fixed length or nil for String type
15
+ attr_reader :length
16
+
17
+ def initialize(name, length: nil)
18
+ super(name)
19
+ @length = length
20
+ end
21
+
22
+ # Converts a Ruby value to a string
23
+ #
24
+ # @param value [Object] the value to convert
25
+ # @return [String, nil] the string value
26
+ def cast(value)
27
+ return nil if value.nil?
28
+
29
+ str = value.to_s
30
+
31
+ # For FixedString, pad or truncate to length
32
+ if @length
33
+ str = str.ljust(@length, "\0")[0, @length]
34
+ end
35
+
36
+ str
37
+ end
38
+
39
+ # Converts a value from ClickHouse to Ruby String
40
+ #
41
+ # @param value [Object] the value from ClickHouse
42
+ # @return [String, nil] the string value
43
+ def deserialize(value)
44
+ return nil if value.nil?
45
+
46
+ str = value.to_s
47
+
48
+ # For FixedString, remove trailing null bytes
49
+ if @length
50
+ str = str.gsub(/\0+\z/, '')
51
+ end
52
+
53
+ str
54
+ end
55
+
56
+ # Converts a string to SQL literal with proper escaping
57
+ #
58
+ # @param value [String, nil] the value to serialize
59
+ # @return [String] the SQL literal
60
+ def serialize(value)
61
+ return 'NULL' if value.nil?
62
+
63
+ escaped = escape_string(value.to_s)
64
+ "'#{escaped}'"
65
+ end
66
+
67
+ private
68
+
69
+ # Escapes a string for use in ClickHouse SQL
70
+ #
71
+ # @param value [String] the string to escape
72
+ # @return [String] the escaped string
73
+ def escape_string(value)
74
+ value.gsub("\\", "\\\\")
75
+ .gsub("'", "\\'")
76
+ .gsub("\n", "\\n")
77
+ .gsub("\r", "\\r")
78
+ .gsub("\t", "\\t")
79
+ .gsub("\0", "\\0")
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,206 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClickhouseRuby
4
+ module Types
5
+ # Type handler for ClickHouse Tuple type
6
+ #
7
+ # Tuples are fixed-size collections where each position has its own type.
8
+ # Similar to Ruby arrays but heterogeneous and fixed-size.
9
+ #
10
+ # @example
11
+ # type = Tuple.new('Tuple', arg_types: [String.new('String'), Integer.new('UInt64')])
12
+ # type.cast(['hello', 42])
13
+ # type.serialize(['hello', 42]) # => "('hello', 42)"
14
+ #
15
+ class Tuple < Base
16
+ # @return [Array<Base>] the types of each tuple element
17
+ attr_reader :element_types
18
+
19
+ # @param name [String] the type name
20
+ # @param arg_types [Array<Base>] the element types
21
+ def initialize(name, arg_types: nil)
22
+ super(name)
23
+ @element_types = arg_types || []
24
+ end
25
+
26
+ # Converts a Ruby value to a tuple (Array)
27
+ #
28
+ # @param value [Object] the value to convert
29
+ # @return [Array, nil] the tuple value
30
+ # @raise [TypeCastError] if the value cannot be converted
31
+ def cast(value)
32
+ return nil if value.nil?
33
+
34
+ arr = case value
35
+ when ::Array
36
+ value
37
+ when ::String
38
+ parse_tuple_string(value)
39
+ else
40
+ raise TypeCastError.new(
41
+ "Cannot cast #{value.class} to Tuple",
42
+ from_type: value.class.name,
43
+ to_type: to_s,
44
+ value: value
45
+ )
46
+ end
47
+
48
+ cast_elements(arr)
49
+ end
50
+
51
+ # Converts a value from ClickHouse to a Ruby Array
52
+ #
53
+ # @param value [Object] the value from ClickHouse
54
+ # @return [Array, nil] the tuple value
55
+ def deserialize(value)
56
+ return nil if value.nil?
57
+
58
+ arr = case value
59
+ when ::Array
60
+ value
61
+ when ::String
62
+ parse_tuple_string(value)
63
+ else
64
+ [value]
65
+ end
66
+
67
+ deserialize_elements(arr)
68
+ end
69
+
70
+ # Converts a tuple to SQL literal
71
+ #
72
+ # @param value [Array, nil] the value to serialize
73
+ # @return [String] the SQL literal
74
+ def serialize(value)
75
+ return 'NULL' if value.nil?
76
+
77
+ elements = value.each_with_index.map do |v, i|
78
+ type = @element_types[i] || Base.new('String')
79
+ type.serialize(v)
80
+ end
81
+
82
+ "(#{elements.join(', ')})"
83
+ end
84
+
85
+ # Returns the full type string including element types
86
+ #
87
+ # @return [String] the type string
88
+ def to_s
89
+ type_strs = @element_types.map(&:to_s).join(', ')
90
+ "Tuple(#{type_strs})"
91
+ end
92
+
93
+ private
94
+
95
+ # Casts each element using its corresponding type
96
+ #
97
+ # @param arr [Array] the array to cast
98
+ # @return [Array] the cast array
99
+ def cast_elements(arr)
100
+ arr.each_with_index.map do |v, i|
101
+ type = @element_types[i] || Base.new('String')
102
+ type.cast(v)
103
+ end
104
+ end
105
+
106
+ # Deserializes each element using its corresponding type
107
+ #
108
+ # @param arr [Array] the array to deserialize
109
+ # @return [Array] the deserialized array
110
+ def deserialize_elements(arr)
111
+ arr.each_with_index.map do |v, i|
112
+ type = @element_types[i] || Base.new('String')
113
+ type.deserialize(v)
114
+ end
115
+ end
116
+
117
+ # Parses a ClickHouse tuple string representation
118
+ #
119
+ # @param value [String] the string to parse
120
+ # @return [Array] the parsed tuple
121
+ def parse_tuple_string(value)
122
+ stripped = value.strip
123
+
124
+ # Handle empty tuple
125
+ return [] if stripped == '()'
126
+
127
+ # Remove outer parentheses
128
+ unless stripped.start_with?('(') && stripped.end_with?(')')
129
+ raise TypeCastError.new(
130
+ "Invalid tuple format: '#{value}'",
131
+ from_type: 'String',
132
+ to_type: to_s,
133
+ value: value
134
+ )
135
+ end
136
+
137
+ inner = stripped[1...-1]
138
+ return [] if inner.strip.empty?
139
+
140
+ # Parse elements
141
+ parse_elements(inner)
142
+ end
143
+
144
+ # Parses comma-separated elements, handling nesting and quotes
145
+ #
146
+ # @param str [String] the inner tuple string
147
+ # @return [Array] the parsed elements
148
+ def parse_elements(str)
149
+ elements = []
150
+ current = ''
151
+ depth = 0
152
+ in_string = false
153
+ escape_next = false
154
+
155
+ str.each_char do |char|
156
+ if escape_next
157
+ current += char
158
+ escape_next = false
159
+ next
160
+ end
161
+
162
+ case char
163
+ when '\\'
164
+ escape_next = true
165
+ current += char
166
+ when "'"
167
+ in_string = !in_string
168
+ current += char
169
+ when '(', '[', '{'
170
+ depth += 1 unless in_string
171
+ current += char
172
+ when ')', ']', '}'
173
+ depth -= 1 unless in_string
174
+ current += char
175
+ when ','
176
+ if depth.zero? && !in_string
177
+ elements << parse_element(current.strip)
178
+ current = ''
179
+ else
180
+ current += char
181
+ end
182
+ else
183
+ current += char
184
+ end
185
+ end
186
+
187
+ # Don't forget the last element
188
+ elements << parse_element(current.strip) unless current.strip.empty?
189
+
190
+ elements
191
+ end
192
+
193
+ # Parses a single element, removing quotes if necessary
194
+ #
195
+ # @param str [String] the element string
196
+ # @return [Object] the parsed element
197
+ def parse_element(str)
198
+ if str.start_with?("'") && str.end_with?("'")
199
+ str[1...-1].gsub("\\'", "'")
200
+ else
201
+ str
202
+ end
203
+ end
204
+ end
205
+ end
206
+ end