clickhouse 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ module Clickhouse
2
+ class Connection
3
+ module Query
4
+ class ResultRow < Array
5
+
6
+ def initialize(values = [], keys = nil)
7
+ super values
8
+ @keys = keys || (0..(values.size - 1)).collect{|i| "column#{i}"}
9
+ end
10
+
11
+ def to_hash
12
+ @hash ||= Hash[@keys.zip(self)]
13
+ end
14
+
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,101 @@
1
+ module Clickhouse
2
+ class Connection
3
+ module Query
4
+ class ResultSet
5
+ include Enumerable
6
+ extend Forwardable
7
+
8
+ def_delegators :@rows, :size, :empty?
9
+ def_delegators :to_a, :first, :last, :flatten
10
+
11
+ def initialize(rows = [], names = nil, types = nil)
12
+ @rows = rows
13
+ @names = names
14
+ @types = types
15
+ end
16
+
17
+ def each
18
+ (0..(size - 1)).collect do |index|
19
+ yield self[index]
20
+ self[index]
21
+ end
22
+ end
23
+
24
+ def [](index)
25
+ row = @rows[index]
26
+ row = @rows[index] = parse_row(row) if row.class == Array
27
+ row
28
+ end
29
+
30
+ def present?
31
+ !empty?
32
+ end
33
+
34
+ def to_hashes
35
+ collect(&:to_hash)
36
+ end
37
+
38
+ private
39
+
40
+ def parse_row(array)
41
+ values = array.each_with_index.to_a.collect do |value, i|
42
+ parse_value(@types[i], value) if @types
43
+ end
44
+ ResultRow.new values, @names
45
+ end
46
+
47
+ def parse_value(type, value)
48
+ if value
49
+ case type
50
+ when "UInt8", "UInt16", "UInt32", "UInt64", "Int8", "Int16", "Int32", "Int64"
51
+ parse_int_value value
52
+ when "Float32", "Float64"
53
+ parse_float_value value
54
+ when "String", "Enum8", "Enum16"
55
+ parse_string_value value
56
+ when /FixedString\(\d+\)/
57
+ parse_fixed_string_value value
58
+ when "Date"
59
+ parse_date_value value
60
+ when "DateTime"
61
+ parse_date_time_value value
62
+ when /Array\(/
63
+ parse_array_value value
64
+ else
65
+ raise NotImplementedError, "Cannot parse value of type #{type.inspect}"
66
+ end
67
+ end
68
+ end
69
+
70
+ def parse_int_value(value)
71
+ value.to_i
72
+ end
73
+
74
+ def parse_float_value(value)
75
+ value.to_f
76
+ end
77
+
78
+ def parse_string_value(value)
79
+ value.force_encoding("UTF-8")
80
+ end
81
+
82
+ def parse_fixed_string_value(value)
83
+ value.delete("\000").force_encoding("UTF-8")
84
+ end
85
+
86
+ def parse_date_value(value)
87
+ Date.parse(value)
88
+ end
89
+
90
+ def parse_date_time_value(value)
91
+ Time.parse(value)
92
+ end
93
+
94
+ def parse_array_value(value)
95
+ JSON.parse(value)
96
+ end
97
+
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,50 @@
1
+ module Clickhouse
2
+ class Connection
3
+ module Query
4
+ class Table
5
+
6
+ def initialize(name)
7
+ @name = name
8
+ @columns = []
9
+ yield self
10
+ end
11
+
12
+ def engine(value)
13
+ @engine = value
14
+ end
15
+
16
+ def to_sql
17
+ raise Clickhouse::InvalidQueryError, "Missing table engine" unless @engine
18
+ length = @columns.collect{|x| x[0].to_s.size}.max
19
+
20
+ sql = []
21
+ sql << "CREATE TABLE #{@name} ("
22
+
23
+ @columns.each_with_index do |(name, type), index|
24
+ sql << " #{name.ljust(length, " ")} #{type}#{"," unless index == @columns.size - 1}"
25
+ end
26
+
27
+ sql << ")"
28
+ sql << "ENGINE = #{@engine}"
29
+
30
+ sql.join("\n")
31
+ end
32
+
33
+ private
34
+
35
+ def method_missing(name, *args)
36
+ type = name.to_s
37
+ .gsub(/(^.|_\w)/) {
38
+ $1.upcase
39
+ }
40
+ .gsub("Uint", "UInt")
41
+ .delete("_")
42
+
43
+ type << "(#{args[1]})" if args[1]
44
+ @columns << [args[0].to_s, type]
45
+ end
46
+
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,18 @@
1
+ module Clickhouse
2
+
3
+ class Error < StandardError
4
+ end
5
+
6
+ class InvalidConnectionError < Error
7
+ end
8
+
9
+ class ConnectionError < Error
10
+ end
11
+
12
+ class InvalidQueryError < Error
13
+ end
14
+
15
+ class QueryError < Error
16
+ end
17
+
18
+ end
@@ -0,0 +1,7 @@
1
+ module Clickhouse
2
+ MAJOR = 0
3
+ MINOR = 1
4
+ TINY = 0
5
+
6
+ VERSION = [MAJOR, MINOR, TINY].join(".")
7
+ end
data/script/console ADDED
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "logger"
4
+ require "bundler"
5
+ Bundler.require :default, :development
6
+
7
+ def connect!(config = {})
8
+ Clickhouse.logger = Logger.new(STDOUT)
9
+ Clickhouse.establish_connection(config)
10
+ end
11
+
12
+ def conn(config = {})
13
+ connect!(config) unless Clickhouse.connection
14
+ Clickhouse.connection
15
+ end
16
+
17
+ def events
18
+ "events"
19
+ end
20
+
21
+ def create_table
22
+ conn.create_table(events) do |t|
23
+ t.fixed_string :id, 16
24
+ t.uint16 :year
25
+ t.date :date
26
+ t.date_time :time
27
+ t.string :event
28
+ t.uint32 :user_id
29
+ t.float32 :revenue
30
+ t.engine "MergeTree(date, (year, date), 8192)"
31
+ end
32
+ end
33
+
34
+ def insert_rows
35
+ conn.insert_rows(events, :names => %w(id year date time event user_id revenue)) do |rows|
36
+ rows << [
37
+ "d91d1c90",
38
+ 2016,
39
+ "2016-10-17",
40
+ "2016-10-17 23:14:28",
41
+ "click",
42
+ 1982,
43
+ 0.18
44
+ ]
45
+ rows << [
46
+ "d91d2294",
47
+ 2016,
48
+ "2016-10-17",
49
+ "2016-10-17 23:14:41",
50
+ "click",
51
+ 1947,
52
+ 0.203
53
+ ]
54
+ end
55
+ end
56
+
57
+ puts "Loading Clickhouse development environment (#{Clickhouse::VERSION})"
58
+ Pry.start
@@ -0,0 +1,15 @@
1
+ require_relative "test_helper/coverage"
2
+
3
+ require "minitest"
4
+ require "minitest/autorun"
5
+ require "mocha/setup"
6
+
7
+ def path(path)
8
+ File.expand_path "../../#{path}", __FILE__
9
+ end
10
+
11
+ require "bundler"
12
+ Bundler.require :default, :development, :test
13
+
14
+ require_relative "test_helper/minitest"
15
+ require_relative "test_helper/simple_connection"
@@ -0,0 +1,16 @@
1
+ if Dir.pwd == File.expand_path("../../..", __FILE__)
2
+ if ENV["REPORT"].to_i == 1
3
+ require "dotenv"
4
+ Dotenv.load
5
+
6
+ require "codeclimate-test-reporter"
7
+ CodeClimate::TestReporter.start
8
+ end
9
+
10
+ require "simplecov"
11
+ SimpleCov.coverage_dir "test/coverage"
12
+ SimpleCov.start do
13
+ add_group "Clickhouse", "lib"
14
+ add_group "Test suite", "test"
15
+ end
16
+ end
@@ -0,0 +1,13 @@
1
+ class MiniTest::Test
2
+ def teardown
3
+ Clickhouse.instance_variables.each do |name|
4
+ Clickhouse.instance_variable_set name, nil
5
+ end
6
+ end
7
+ end
8
+
9
+ class MiniTest::Spec
10
+ def assert_query(expected, actual)
11
+ assert_equal(expected.strip.gsub(/^\s+/, ""), actual)
12
+ end
13
+ end
@@ -0,0 +1,12 @@
1
+ class SimpleConnection
2
+ attr_reader :config
3
+
4
+ def initialize
5
+ @config = {
6
+ :scheme => "http",
7
+ :host => "localhost",
8
+ :port => 8123
9
+ }
10
+ end
11
+
12
+ end
@@ -0,0 +1,36 @@
1
+ require_relative "../../../test_helper"
2
+
3
+ module Unit
4
+ module Connection
5
+ module Query
6
+ class TestResultRow < MiniTest::Test
7
+
8
+ describe Clickhouse::Connection::Query::ResultRow do
9
+ describe "#to_hash" do
10
+ describe "when passing names" do
11
+ it "uses the names as hash keys" do
12
+ result_row = Clickhouse::Connection::Query::ResultRow.new([1, 2, 3], [:a, :b, :c])
13
+ assert_equal({:a => 1, :b => 2, :c => 3}, result_row.to_hash)
14
+ end
15
+ end
16
+
17
+ describe "when not passing names" do
18
+ it "uses 'column<i>' as hash keys" do
19
+ result_row = Clickhouse::Connection::Query::ResultRow.new([1, 2, 3])
20
+ assert_equal({"column0" => 1, "column1" => 2, "column2" => 3}, result_row.to_hash)
21
+ end
22
+ end
23
+
24
+ describe "memoization" do
25
+ it "memoizes the resulting hash" do
26
+ result_row = Clickhouse::Connection::Query::ResultRow.new([1, 2, 3])
27
+ assert_equal result_row.to_hash.object_id, result_row.to_hash.object_id
28
+ end
29
+ end
30
+ end
31
+ end
32
+
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,196 @@
1
+ require_relative "../../../test_helper"
2
+
3
+ module Unit
4
+ module Connection
5
+ module Query
6
+ class TestResultSet < MiniTest::Test
7
+
8
+ describe Clickhouse::Connection::Query::ResultSet do
9
+ before do
10
+ @empty_set = Clickhouse::Connection::Query::ResultSet.new
11
+ @result_set = Clickhouse::Connection::Query::ResultSet.new(
12
+ [
13
+ [
14
+ "1072649",
15
+ "142.94",
16
+ "badrequest.io",
17
+ "d91d1c90\u0000\u0000\u0000",
18
+ "2016-03-20",
19
+ "2016-03-20 23:49:11",
20
+ "[4,2,5,7]"
21
+ ], [
22
+ "12948140",
23
+ "9320.11",
24
+ "engel.pm",
25
+ "d91d217c\u0000\u0000",
26
+ "2016-03-20",
27
+ "2016-03-20 23:58:34",
28
+ "[6,2,9,8,1]"
29
+ ], [
30
+ "319384",
31
+ "101.02",
32
+ "archan937.com",
33
+ "d91d2294\u0000\u0000\u0000",
34
+ "2016-03-20",
35
+ "2016-03-20 22:55:39",
36
+ "[3,1,2]"
37
+ ]
38
+ ],
39
+ %w(
40
+ SUM(clicks)
41
+ AVG(price)
42
+ domain
43
+ id
44
+ date
45
+ MAX(time)
46
+ groupUniqArray(code)
47
+ ),
48
+ %w(
49
+ UInt32
50
+ Float32
51
+ String
52
+ FixedString(16)
53
+ Date
54
+ DateTime
55
+ Array(8)
56
+ )
57
+ )
58
+ end
59
+
60
+ describe "#size" do
61
+ it "returns the size of the result set" do
62
+ assert_equal 3, @result_set.size
63
+ end
64
+ end
65
+
66
+ describe "#empty?" do
67
+ it "returns whether the result set is empty or not" do
68
+ assert_equal true, @empty_set.empty?
69
+ assert_equal false, @result_set.empty?
70
+ end
71
+ end
72
+
73
+ describe "#present?" do
74
+ it "returns whether the result set contains rows or not" do
75
+ assert_equal false, @empty_set.present?
76
+ assert_equal true, @result_set.present?
77
+ end
78
+ end
79
+
80
+ describe "#first" do
81
+ it "returns the first row of the result set" do
82
+ assert_equal [
83
+ 1072649,
84
+ 142.94,
85
+ "badrequest.io",
86
+ "d91d1c90",
87
+ Date.new(2016, 3, 20),
88
+ Time.new(2016, 3, 20, 23, 49, 11),
89
+ [4, 2, 5, 7]
90
+ ], @result_set.first
91
+ end
92
+ end
93
+
94
+ describe "#last" do
95
+ it "returns the size of the result set" do
96
+ assert_equal [
97
+ 319384,
98
+ 101.02,
99
+ "archan937.com",
100
+ "d91d2294",
101
+ Date.new(2016, 3, 20),
102
+ Time.new(2016, 3, 20, 22, 55, 39),
103
+ [3, 1, 2]
104
+ ], @result_set.last
105
+ end
106
+ end
107
+
108
+ describe "#flatten" do
109
+ it "returns the size of the result set" do
110
+ assert_equal [
111
+ 1072649,
112
+ 142.94,
113
+ "badrequest.io",
114
+ "d91d1c90",
115
+ Date.new(2016, 3, 20),
116
+ Time.new(2016, 3, 20, 23, 49, 11),
117
+ 4,
118
+ 2,
119
+ 5,
120
+ 7,
121
+ 12948140,
122
+ 9320.11,
123
+ "engel.pm",
124
+ "d91d217c",
125
+ Date.new(2016, 3, 20),
126
+ Time.new(2016, 3, 20, 23, 58, 34),
127
+ 6,
128
+ 2,
129
+ 9,
130
+ 8,
131
+ 1,
132
+ 319384,
133
+ 101.02,
134
+ "archan937.com",
135
+ "d91d2294",
136
+ Date.new(2016, 3, 20),
137
+ Time.new(2016, 3, 20, 22, 55, 39),
138
+ 3,
139
+ 1,
140
+ 2
141
+ ], @result_set.flatten
142
+ end
143
+ end
144
+
145
+ describe "#to_hashes" do
146
+ it "returns an array containing the rows as hashes" do
147
+ assert_equal [
148
+ {
149
+ "SUM(clicks)" => 1072649,
150
+ "AVG(price)" => 142.94,
151
+ "domain" => "badrequest.io",
152
+ "id" => "d91d1c90",
153
+ "date" => Date.new(2016, 3, 20),
154
+ "MAX(time)" => Time.new(2016, 3, 20, 23, 49, 11),
155
+ "groupUniqArray(code)" => [4, 2, 5, 7]
156
+ }, {
157
+ "SUM(clicks)" => 12948140,
158
+ "AVG(price)" => 9320.11,
159
+ "domain" => "engel.pm",
160
+ "id" => "d91d217c",
161
+ "date" => Date.new(2016, 3, 20),
162
+ "MAX(time)" => Time.new(2016, 3, 20, 23, 58, 34),
163
+ "groupUniqArray(code)" => [6, 2, 9, 8, 1]
164
+ }, {
165
+ "SUM(clicks)" => 319384,
166
+ "AVG(price)" => 101.02,
167
+ "domain" => "archan937.com",
168
+ "id" => "d91d2294",
169
+ "date" => Date.new(2016, 3, 20),
170
+ "MAX(time)" => Time.new(2016, 3, 20, 22, 55, 39),
171
+ "groupUniqArray(code)" => [3, 1, 2]
172
+ }
173
+ ], @result_set.to_hashes
174
+ end
175
+ end
176
+
177
+ describe "memoization" do
178
+ it "memoizes the parsed rows" do
179
+ assert_equal @result_set.to_a[-1].object_id, @result_set.each{}[-1].object_id
180
+ assert_equal @result_set.first.object_id, @result_set[0].object_id
181
+ end
182
+ end
183
+
184
+ describe "non-supported data types" do
185
+ it "raises a NotImplementedError error" do
186
+ assert_raises NotImplementedError do
187
+ Clickhouse::Connection::Query::ResultSet.new([[1]], ["Foo"], ["Bar"])[0]
188
+ end
189
+ end
190
+ end
191
+ end
192
+
193
+ end
194
+ end
195
+ end
196
+ end