click_house-client 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClickHouse
4
+ module Client
5
+ class Database
6
+ attr_reader :database
7
+
8
+ def initialize(database:, url:, username:, password:, variables: {})
9
+ @database = database
10
+ @url = url
11
+ @username = username
12
+ @password = password
13
+ @variables = {
14
+ database:,
15
+ enable_http_compression: 1 # enable HTTP compression by default
16
+ }.merge(variables).freeze
17
+ end
18
+
19
+ def uri
20
+ @uri ||= build_custom_uri
21
+ end
22
+
23
+ def build_custom_uri(extra_variables: {})
24
+ parsed = Addressable::URI.parse(@url)
25
+ parsed.query_values = @variables.merge(extra_variables)
26
+ parsed
27
+ end
28
+
29
+ def headers
30
+ @headers ||= {
31
+ 'X-ClickHouse-User' => @username,
32
+ 'X-ClickHouse-Key' => @password,
33
+ 'X-ClickHouse-Format' => 'JSON' # always return JSON data
34
+ }.freeze
35
+ end
36
+
37
+ def with_default_database
38
+ self.class.new(
39
+ database: 'default',
40
+ url: @url,
41
+ username: @username,
42
+ password: @password,
43
+ variables: @variables.merge(database: 'default')
44
+ )
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClickHouse
4
+ module Client
5
+ class Formatter
6
+ DEFAULT = ->(value) { value }
7
+
8
+ BASIC_TYPE_CASTERS = {
9
+ 'UInt64' => ->(value) { Integer(value) },
10
+ "DateTime64(6, 'UTC')" => ->(value) { ActiveSupport::TimeZone['UTC'].parse(value) },
11
+ "IntervalSecond" => ->(value) { ActiveSupport::Duration.build(value.to_i) },
12
+ "IntervalMillisecond" => ->(value) { ActiveSupport::Duration.build(value.to_i / 1000.0) }
13
+ }.freeze
14
+
15
+ TYPE_CASTERS = BASIC_TYPE_CASTERS.merge(
16
+ BASIC_TYPE_CASTERS.transform_keys { |type| "Nullable(#{type})" }
17
+ .transform_values { |caster| ->(value) { value.nil? ? nil : caster.call(value) } }
18
+ )
19
+
20
+ def self.format(result)
21
+ name_type_mapping = result['meta'].each_with_object({}) do |column, hash|
22
+ hash[column['name']] = column['type']
23
+ end
24
+
25
+ result['data'].map do |row|
26
+ row.each_with_object({}) do |(column, value), casted_row|
27
+ caster = TYPE_CASTERS.fetch(name_type_mapping[column], DEFAULT)
28
+
29
+ casted_row[column] = caster.call(value)
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClickHouse
4
+ module Client
5
+ class Query < QueryLike
6
+ SUBQUERY_PLACEHOLDER_REGEX = /{\w+:Subquery}/ # example: {var:Subquery}, special "internal" type for subqueries
7
+ PLACEHOLDER_REGEX = /{\w+:(\w|[()])+}/ # example: {var:UInt8} or {var:Array(UInt8)}
8
+ PLACEHOLDER_NAME_REGEX = /{(\w+):/ # example: {var:UInt8} => var
9
+
10
+ def self.build(query)
11
+ return query if query.is_a?(ClickHouse::Client::QueryLike)
12
+
13
+ new(raw_query: query)
14
+ end
15
+
16
+ def initialize(raw_query:, placeholders: {})
17
+ raise QueryError, 'Empty query string given' if raw_query.blank?
18
+
19
+ @raw_query = raw_query
20
+ @placeholders = placeholders || {}
21
+ end
22
+
23
+ # List of placeholders to be sent to ClickHouse for replacement.
24
+ # If there are subqueries, merge their placeholders as well.
25
+ def prepared_placeholders
26
+ all_placeholders = placeholders.select { |_, v| !v.is_a?(QueryLike) }
27
+ all_placeholders.transform_values! { |v| prepared_placeholder_value(v) }
28
+
29
+ placeholders.each_value do |value|
30
+ next unless value.is_a?(QueryLike)
31
+
32
+ all_placeholders.merge!(value.prepared_placeholders) do |key, a, b|
33
+ raise QueryError, "mismatching values for the '#{key}' placeholder: #{a} vs #{b}"
34
+ end
35
+ end
36
+
37
+ all_placeholders
38
+ end
39
+
40
+ # Placeholder replacement is handled by ClickHouse, only subquery placeholders
41
+ # will be replaced.
42
+ def to_sql
43
+ raw_query.gsub(SUBQUERY_PLACEHOLDER_REGEX) do |placeholder_in_query|
44
+ value = placeholder_value(placeholder_in_query)
45
+
46
+ if value.is_a?(QueryLike)
47
+ value.to_sql
48
+ else
49
+ placeholder_in_query
50
+ end
51
+ end
52
+ end
53
+
54
+ def to_redacted_sql(bind_index_manager = BindIndexManager.new)
55
+ raw_query.gsub(PLACEHOLDER_REGEX) do |placeholder_in_query|
56
+ value = placeholder_value(placeholder_in_query)
57
+
58
+ if value.is_a?(QueryLike)
59
+ value.to_redacted_sql(bind_index_manager)
60
+ else
61
+ bind_index_manager.next_bind_str
62
+ end
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ attr_reader :raw_query, :placeholders
69
+
70
+ def placeholder_value(placeholder_in_query)
71
+ placeholder = placeholder_in_query[PLACEHOLDER_NAME_REGEX, 1]
72
+ placeholders.fetch(placeholder.to_sym)
73
+ end
74
+
75
+ def prepared_placeholder_value(value)
76
+ return value unless value.is_a?(Array)
77
+
78
+ Quoting.quote(value)
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_record'
4
+
5
+ module ClickHouse
6
+ module Client
7
+ class QueryBuilder < QueryLike
8
+ attr_reader :table
9
+ attr_accessor :conditions, :manager
10
+
11
+ VALID_NODES = [
12
+ Arel::Nodes::In,
13
+ Arel::Nodes::Equality,
14
+ Arel::Nodes::LessThan,
15
+ Arel::Nodes::LessThanOrEqual,
16
+ Arel::Nodes::GreaterThan,
17
+ Arel::Nodes::GreaterThanOrEqual,
18
+ Arel::Nodes::NamedFunction
19
+ ].freeze
20
+
21
+ def initialize(table_name)
22
+ @table = Arel::Table.new(table_name)
23
+ @manager = Arel::SelectManager.new(Arel::Table.engine).from(@table).project(Arel.star)
24
+ @conditions = []
25
+ end
26
+
27
+ # The `where` method currently only supports IN and equal to queries along
28
+ # with above listed VALID_NODES.
29
+ # For example, using a range (start_date..end_date) will result in incorrect SQL.
30
+ # If you need to query a range, use greater than and less than conditions with Arel.
31
+ #
32
+ # Correct usage:
33
+ # query.where(query.table[:created_at].lteq(Date.today)).to_sql
34
+ # "SELECT * FROM \"table\" WHERE \"table\".\"created_at\" <= '2023-08-01'"
35
+ #
36
+ # This also supports array conditions which will result in an IN query.
37
+ # query.where(entity_id: [1,2,3]).to_sql
38
+ # "SELECT * FROM \"table\" WHERE \"table\".\"entity_id\" IN (1, 2, 3)"
39
+ #
40
+ # Range support and more `Arel::Nodes` could be considered for future iterations.
41
+ # @return [ClickHouse::QueryBuilder] New instance of query builder.
42
+ def where(conditions)
43
+ validate_condition_type!(conditions)
44
+
45
+ deep_clone.tap do |new_instance|
46
+ if conditions.is_a?(Arel::Nodes::Node)
47
+ new_instance.conditions << conditions
48
+ else
49
+ add_conditions_to(new_instance, conditions)
50
+ end
51
+ end
52
+ end
53
+
54
+ def select(*fields)
55
+ deep_clone.tap do |new_instance|
56
+ existing_fields = new_instance.manager.projections.filter_map do |projection|
57
+ if projection.is_a?(Arel::Attributes::Attribute)
58
+ projection.name.to_s
59
+ elsif projection.to_s == '*'
60
+ nil
61
+ end
62
+ end
63
+
64
+ new_projections = (existing_fields + fields).map do |field|
65
+ if field.is_a?(Symbol)
66
+ field.to_s
67
+ else
68
+ field
69
+ end
70
+ end
71
+
72
+ new_instance.manager.projections = new_projections.uniq.map do |field|
73
+ if field.is_a?(Arel::Expressions)
74
+ field
75
+ else
76
+ new_instance.table[field.to_s]
77
+ end
78
+ end
79
+ end
80
+ end
81
+
82
+ def order(field, direction = :asc)
83
+ validate_order_direction!(direction)
84
+
85
+ deep_clone.tap do |new_instance|
86
+ table_field = new_instance.table[field]
87
+ new_order = direction.to_s.casecmp('desc').zero? ? table_field.desc : table_field.asc
88
+ new_instance.manager.order(new_order)
89
+ end
90
+ end
91
+
92
+ def group(*columns)
93
+ deep_clone.tap do |new_instance|
94
+ new_instance.manager.group(*columns)
95
+ end
96
+ end
97
+
98
+ def limit(count)
99
+ manager.take(count)
100
+ self
101
+ end
102
+
103
+ def offset(count)
104
+ manager.skip(count)
105
+ self
106
+ end
107
+
108
+ def to_sql
109
+ apply_conditions!
110
+
111
+ visitor = Arel::Visitors::ToSql.new(ClickHouse::Client::ArelEngine.new)
112
+ visitor.accept(manager.ast, Arel::Collectors::SQLString.new).value
113
+ end
114
+
115
+ def to_redacted_sql(bind_index_manager = ClickHouse::Client::BindIndexManager.new)
116
+ ClickHouse::Client::Redactor.redact(self, bind_index_manager)
117
+ end
118
+
119
+ private
120
+
121
+ def validate_condition_type!(condition)
122
+ return unless condition.is_a?(Arel::Nodes::Node) && VALID_NODES.exclude?(condition.class)
123
+
124
+ raise ArgumentError, "Unsupported Arel node type for QueryBuilder: #{condition.class.name}"
125
+ end
126
+
127
+ def add_conditions_to(instance, conditions)
128
+ conditions.each do |key, value|
129
+ instance.conditions << if value.is_a?(Array)
130
+ instance.table[key].in(value)
131
+ else
132
+ instance.table[key].eq(value)
133
+ end
134
+ end
135
+ end
136
+
137
+ def deep_clone
138
+ self.class.new(table.name).tap do |new_instance|
139
+ new_instance.manager = manager.clone
140
+ new_instance.conditions = conditions.map(&:clone)
141
+ end
142
+ end
143
+
144
+ def apply_conditions!
145
+ manager.constraints.clear
146
+ conditions.each { |condition| manager.where(condition) }
147
+ end
148
+
149
+ def validate_order_direction!(direction)
150
+ return if %w[asc desc].include?(direction.to_s.downcase)
151
+
152
+ raise ArgumentError, "Invalid order direction '#{direction}'. Must be :asc or :desc"
153
+ end
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClickHouse
4
+ module Client
5
+ class QueryLike
6
+ # Build a SQL string that can be executed on a ClickHouse database.
7
+ def to_sql
8
+ raise NotImplementedError
9
+ end
10
+
11
+ # Redacted version of the SQL query generated by the to_sql method where the
12
+ # placeholders are stripped. These queries are meant to be exported to external
13
+ # log aggregation systems.
14
+ def to_redacted_sql(bind_index_manager = BindIndexManager.new)
15
+ raise NotImplementedError
16
+ end
17
+
18
+ # Override when placeholders should be supported
19
+ def prepared_placeholders
20
+ {}
21
+ end
22
+
23
+ private
24
+
25
+ # Override when placeholders should be supported
26
+ def placeholders
27
+ {}
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClickHouse
4
+ module Client
5
+ module Quoting
6
+ class << self
7
+ def quote(value)
8
+ case value
9
+ when Numeric then value.to_s
10
+ when String, Symbol then "'#{value.gsub('\\', '\&\&').gsub("'", "''")}'"
11
+ when Array then "[#{value.map { |v| quote(v) }.join(',')}]"
12
+ when nil then "NULL"
13
+ else quote_str(value.to_s)
14
+ end
15
+ end
16
+
17
+ private
18
+
19
+ def quote_str(value)
20
+ "'#{value.gsub("'", "''")}'"
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClickHouse
4
+ module Client
5
+ module Redactor
6
+ # Redacts the SQL query represented by the query builder.
7
+ #
8
+ # @param query_builder [::ClickHouse::Querybuilder] The query builder object to be redacted.
9
+ # @return [String] The redacted SQL query as a string.
10
+ # @raise [ArgumentError] when the condition in the query is of an unsupported type.
11
+ #
12
+ # Example:
13
+ # query_builder = ClickHouse::QueryBuilder.new('users').where(name: 'John Doe')
14
+ # redacted_query = ClickHouse::Redactor.redact(query_builder)
15
+ # # The redacted_query will contain the SQL query with values replaced by placeholders.
16
+ # output: "SELECT * FROM \"users\" WHERE \"users\".\"name\" = $1"
17
+ def self.redact(query_builder, bind_manager = ClickHouse::Client::BindIndexManager.new)
18
+ cloned_query_builder = query_builder.clone
19
+
20
+ cloned_query_builder.conditions = cloned_query_builder.conditions.map do |condition|
21
+ redact_condition(condition, bind_manager)
22
+ end
23
+
24
+ cloned_query_builder.manager.constraints.clear
25
+ cloned_query_builder.conditions.each do |condition|
26
+ cloned_query_builder.manager.where(condition)
27
+ end
28
+
29
+ visitor = Arel::Visitors::ToSql.new(ClickHouse::Client::ArelEngine.new)
30
+ visitor.accept(cloned_query_builder.manager.ast, Arel::Collectors::SQLString.new).value
31
+ end
32
+
33
+ def self.redact_condition(condition, bind_manager)
34
+ case condition
35
+ when Arel::Nodes::In
36
+ condition.left.in(Array.new(condition.right.size) { Arel.sql(bind_manager.next_bind_str) })
37
+ when Arel::Nodes::Equality
38
+ condition.left.eq(Arel.sql(bind_manager.next_bind_str))
39
+ when Arel::Nodes::LessThan
40
+ condition.left.lt(Arel.sql(bind_manager.next_bind_str))
41
+ when Arel::Nodes::LessThanOrEqual
42
+ condition.left.lteq(Arel.sql(bind_manager.next_bind_str))
43
+ when Arel::Nodes::GreaterThan
44
+ condition.left.gt(Arel.sql(bind_manager.next_bind_str))
45
+ when Arel::Nodes::GreaterThanOrEqual
46
+ condition.left.gteq(Arel.sql(bind_manager.next_bind_str))
47
+ when Arel::Nodes::NamedFunction
48
+ redact_named_function(condition, bind_manager)
49
+ else
50
+ raise ArgumentError, "Unsupported Arel node type for Redactor: #{condition.class}"
51
+ end
52
+ end
53
+
54
+ def self.redact_named_function(condition, bind_manager)
55
+ redacted_condition =
56
+ Arel::Nodes::NamedFunction.new(condition.name, condition.expressions.dup)
57
+
58
+ case redacted_condition.name
59
+ when 'startsWith'
60
+ redacted_condition.expressions[1] = Arel.sql(bind_manager.next_bind_str)
61
+ else
62
+ redacted_condition.expressions = redacted_condition.expressions.map { Arel.sql(bind_manager.next_bind_str) }
63
+ end
64
+
65
+ redacted_condition
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClickHouse
4
+ module Client
5
+ class Response
6
+ attr_reader :body, :headers
7
+
8
+ def initialize(body, http_status_code, headers = {})
9
+ @body = body
10
+ @http_status_code = http_status_code
11
+ @headers = headers
12
+ end
13
+
14
+ def success?
15
+ @http_status_code == 200
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClickHouse
4
+ module Client
5
+ VERSION = "0.3.0"
6
+ end
7
+ end
@@ -0,0 +1,137 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'addressable'
4
+ require 'json'
5
+ require 'active_support/deprecation'
6
+ require 'active_support/time'
7
+ require 'active_support/notifications'
8
+ require 'active_support/core_ext/string'
9
+ require 'active_support/core_ext/object/blank'
10
+ require_relative "client/version"
11
+ require_relative "client/database"
12
+ require_relative "client/configuration"
13
+ require_relative "client/bind_index_manager"
14
+ require_relative "client/quoting"
15
+ require_relative "client/arel_engine"
16
+ require_relative "client/query_like"
17
+ require_relative "client/query"
18
+ require_relative "client/redactor"
19
+ require_relative "client/query_builder"
20
+ require_relative "client/formatter"
21
+ require_relative "client/response"
22
+
23
+ module ClickHouse
24
+ module Client
25
+ class << self
26
+ def configuration
27
+ @configuration ||= Configuration.new
28
+ end
29
+
30
+ def configure
31
+ yield(configuration)
32
+ configuration.validate!
33
+ end
34
+ end
35
+
36
+ Error = Class.new(StandardError)
37
+ ConfigurationError = Class.new(Error)
38
+ DatabaseError = Class.new(Error)
39
+ QueryError = Class.new(Error)
40
+
41
+ def self.database_configured?(database, configuration = self.configuration)
42
+ !!configuration.databases[database]
43
+ end
44
+
45
+ # Executes a SELECT database query
46
+ def self.select(query, database, configuration = self.configuration)
47
+ instrumented_execute(query, database, configuration) do |response, instrument|
48
+ parsed_response = configuration.json_parser.parse(response.body)
49
+
50
+ instrument[:statistics] = parsed_response['statistics']&.symbolize_keys
51
+
52
+ Formatter.format(parsed_response)
53
+ end
54
+ end
55
+
56
+ # Executes any kinds of database query without returning any data (INSERT, DELETE)
57
+ def self.execute(query, database, configuration = self.configuration)
58
+ instrumented_execute(query, database, configuration) do |response, instrument|
59
+ expose_summary(response.headers, instrument)
60
+ end
61
+
62
+ true
63
+ end
64
+
65
+ # Inserts a gzip-compressed CSV to ClickHouse
66
+ #
67
+ # Usage:
68
+ #
69
+ # Create a compressed CSV file:
70
+ # > File.binwrite("my_csv.csv", ActiveSupport::Gzip.compress("id\n10\n20"))
71
+ #
72
+ # Invoke the INSERT query:
73
+ # > ClickHouse::Client.insert_csv('INSERT INTO events (id) FORMAT CSV', File.open("my_csv.csv"), :main)
74
+ def self.insert_csv(query, io, database, configuration = self.configuration)
75
+ db = lookup_database(configuration, database)
76
+
77
+ headers = db.headers.merge(
78
+ 'Transfer-Encoding' => 'chunked',
79
+ 'Content-Length' => File.size(io).to_s,
80
+ 'Content-Encoding' => 'gzip'
81
+ )
82
+
83
+ query = ClickHouse::Client::Query.build(query)
84
+ ActiveSupport::Notifications.instrument('sql.click_house', { query:, database: }) do |instrument|
85
+ response = configuration.http_post_proc.call(
86
+ db.build_custom_uri(extra_variables: { query: query.to_sql }).to_s,
87
+ headers,
88
+ io
89
+ )
90
+ raise DatabaseError, response.body unless response.success?
91
+
92
+ expose_summary(response.headers, instrument)
93
+ end
94
+
95
+ true
96
+ end
97
+
98
+ private_class_method def self.expose_summary(headers, instrument)
99
+ return unless headers['x-clickhouse-summary']
100
+
101
+ instrument[:statistics] =
102
+ configuration.json_parser.parse(headers['x-clickhouse-summary']).symbolize_keys
103
+ end
104
+
105
+ private_class_method def self.lookup_database(configuration, database)
106
+ configuration.databases[database].tap do |db|
107
+ raise ConfigurationError, "The database '#{database}' is not configured" unless db
108
+ end
109
+ end
110
+
111
+ private_class_method def self.instrumented_execute(query, database, configuration)
112
+ db = lookup_database(configuration, database)
113
+
114
+ query = ClickHouse::Client::Query.build(query)
115
+
116
+ log_contents = configuration.log_proc.call(query)
117
+ configuration.logger.info(log_contents)
118
+
119
+ ActiveSupport::Notifications.instrument('sql.click_house', { query:, database: }) do |instrument|
120
+ # Use a multipart POST request where the placeholders are sent with the param_ prefix
121
+ # See: https://github.com/ClickHouse/ClickHouse/issues/8842
122
+ query_with_params = query.prepared_placeholders.transform_keys { |key| "param_#{key}" }
123
+ query_with_params['query'] = query.to_sql
124
+
125
+ response = configuration.http_post_proc.call(
126
+ db.uri.to_s,
127
+ db.headers,
128
+ query_with_params
129
+ )
130
+
131
+ raise DatabaseError, response.body unless response.success?
132
+
133
+ yield response, instrument
134
+ end
135
+ end
136
+ end
137
+ end