umbrellio-utils 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7aa36379cad4a44799273f1c8d85d305a1cbad842cdf6a54d41fc747e2166212
4
- data.tar.gz: 98b110e358259114978e5f5507e0a2b781563e570a0185e8af9ed73e8ac706ce
3
+ metadata.gz: 2a7a306b7e789f1b3b24ce3ca8a94094f6d09a671f478df61b7e2cad5f66d402
4
+ data.tar.gz: 1f301027fd3863925d541841c35cd5bffa514c18cce59abfdb20576c0bba1fdd
5
5
  SHA512:
6
- metadata.gz: 4bae7e5de101aa61d9261163699379f0fffdb5242c789dd80b1c55c46508cd836a8ce568584e00e2a36e3d954a11f6a349e5caa1909cfff426cfca07ae77f10a
7
- data.tar.gz: 2e4432dc497061588cff9f4daf759f45fe330fdf866809295b14c6368bd50b4f010ec92a2588f7d6c44369ed7e30347466e06496f78f9aa1b24b1315515b027c
6
+ metadata.gz: 76b7400d1927214aca02810314e072ddcde0fd15c47b8becc0b0cae7ffd11f71ec3495ce07cf0d3ae346829faa87fc3f1d8fdb31cc9635a636ba49ede1312bac
7
+ data.tar.gz: 023dbebdfdaef806d5dbb952f24d2dad321d5c6d42abc05965811ba9f0d7499457626858099626f10b924f97a5918c3d14f43d4760f1e64fcebc9a774188ec8d
@@ -0,0 +1,42 @@
1
+ <yandex>
2
+ <remote_servers>
3
+ <click_cluster>
4
+ <shard>
5
+ <internal_replication>true</internal_replication>
6
+ <replica>
7
+ <host>localhost</host>
8
+ <port>9000</port>
9
+ </replica>
10
+ </shard>
11
+ </click_cluster>
12
+ </remote_servers>
13
+
14
+ <keeper_server>
15
+ <tcp_port>9181</tcp_port>
16
+ <tcp_port_secure>0</tcp_port_secure>
17
+ <server_id>1</server_id>
18
+ <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
19
+ <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
20
+
21
+ <coordination_settings>
22
+ <operation_timeout_ms>10000</operation_timeout_ms>
23
+ <session_timeout_ms>30000</session_timeout_ms>
24
+ <raft_logs_level>trace</raft_logs_level>
25
+ </coordination_settings>
26
+
27
+ <raft_configuration>
28
+ <server>
29
+ <id>1</id>
30
+ <hostname>localhost</hostname>
31
+ <port>9234</port>
32
+ </server>
33
+ </raft_configuration>
34
+ </keeper_server>
35
+
36
+ <zookeeper>
37
+ <node>
38
+ <host>localhost</host>
39
+ <port>9181</port>
40
+ </node>
41
+ </zookeeper>
42
+ </yandex>
@@ -17,25 +17,52 @@ jobs:
17
17
  matrix:
18
18
  ruby: ["3.1", "3.2", "3.3", "3.4"]
19
19
 
20
- services:
21
- postgres:
22
- image: postgres
23
- env:
24
- POSTGRES_USER: root
25
- POSTGRES_HOST_AUTH_METHOD: trust
26
- options: >-
27
- --health-cmd pg_isready
28
- --health-interval 10s
29
- --health-timeout 5s
30
- --health-retries 5
31
- ports:
32
- - 5432:5432
33
20
  env:
34
- PGHOST: localhost
35
- PGUSER: root
36
-
21
+ PGHOST: pg
22
+ PGUSER: user
23
+ PGPASSWORD: pass
37
24
  steps:
38
- - uses: actions/checkout@v2
25
+ - uses: actions/checkout@v4
26
+
27
+ - name: Create docker network
28
+ run: docker network create dbnet
29
+
30
+ - name: Start PostgreSQL
31
+ run: |
32
+ docker run -d \
33
+ --name pg \
34
+ --network dbnet \
35
+ -e POSTGRES_PASSWORD=pass \
36
+ -e POSTGRES_USER=user \
37
+ -e POSTGRES_DB=umbrellio_utils_test \
38
+ -p 5432:5432 \
39
+ postgres:14
40
+
41
+ - name: Start ClickHouse
42
+ run: |
43
+ docker run -d \
44
+ --name ch \
45
+ --network dbnet \
46
+ -e CLICKHOUSE_SKIP_USER_SETUP=1 -e CLICKHOUSE_DB=umbrellio_utils_test \
47
+ -p 9000:9000 -p 8123:8123 \
48
+ -v ${{ github.workspace }}/.github/clickhouse/clickhouse_keeper.xml:/etc/clickhouse-server/config.d/keeper.xml \
49
+ clickhouse/clickhouse-server:25.3.6.56-alpine
50
+
51
+ - name: Wait for Postgres
52
+ run: |
53
+ for i in {1..30}; do
54
+ if docker exec pg pg_isready -U user; then exit 0; fi
55
+ sleep 1
56
+ done
57
+ exit 1
58
+
59
+ - name: Wait for ClickHouse
60
+ run: |
61
+ for i in {1..30}; do
62
+ if docker exec ch clickhouse-client --query "SELECT 1"; then exit 0; fi
63
+ sleep 1
64
+ done
65
+ exit 1
39
66
 
40
67
  - uses: ruby/setup-ruby@v1
41
68
  with:
@@ -43,8 +70,6 @@ jobs:
43
70
  rubygems: latest
44
71
  bundler-cache: true
45
72
 
46
- - run: psql -c 'CREATE DATABASE umbrellio_utils_test'
47
-
48
73
  - name: Run Linter
49
74
  run: bundle exec ci-helper RubocopLint
50
75
 
data/.rubocop.yml CHANGED
@@ -12,6 +12,9 @@ Naming/MethodParameterName:
12
12
  RSpec/EmptyLineAfterHook:
13
13
  Enabled: false
14
14
 
15
+ Metrics/ModuleLength:
16
+ Enabled: false
17
+
15
18
  Naming/FileName:
16
19
  Exclude:
17
20
  - lib/umbrellio-utils.rb
data/Gemfile CHANGED
@@ -8,6 +8,8 @@ gemspec
8
8
  gem "activesupport"
9
9
  gem "bundler"
10
10
  gem "ci-helper"
11
+ gem "click_house", github: "umbrellio/click_house", branch: "master"
12
+ gem "csv"
11
13
  gem "http"
12
14
  gem "net-pop"
13
15
  gem "nokogiri"
data/Gemfile.lock CHANGED
@@ -1,7 +1,16 @@
1
+ GIT
2
+ remote: https://github.com/umbrellio/click_house.git
3
+ revision: 1bbf8cb909a248b401d0ba9a9f6f1de2e2c068db
4
+ branch: master
5
+ specs:
6
+ click_house (2.1.2)
7
+ activesupport
8
+ faraday (>= 1.7, < 3)
9
+
1
10
  PATH
2
11
  remote: .
3
12
  specs:
4
- umbrellio-utils (1.9.0)
13
+ umbrellio-utils (1.10.0)
5
14
  memery (~> 1)
6
15
 
7
16
  GEM
@@ -100,6 +109,7 @@ GEM
100
109
  concurrent-ruby (1.3.5)
101
110
  connection_pool (2.5.4)
102
111
  crass (1.0.6)
112
+ csv (3.3.5)
103
113
  date (3.4.1)
104
114
  diff-lcs (1.6.2)
105
115
  docile (1.4.1)
@@ -109,6 +119,12 @@ GEM
109
119
  erb (4.0.4)
110
120
  cgi (>= 0.3.3)
111
121
  erubi (1.13.1)
122
+ faraday (2.14.0)
123
+ faraday-net_http (>= 2.0, < 3.5)
124
+ json
125
+ logger
126
+ faraday-net_http (3.4.2)
127
+ net-http (~> 0.5)
112
128
  ffi (1.17.2-aarch64-linux-gnu)
113
129
  ffi (1.17.2-aarch64-linux-musl)
114
130
  ffi (1.17.2-arm-linux-gnu)
@@ -163,6 +179,8 @@ GEM
163
179
  method_source (1.1.0)
164
180
  mini_mime (1.1.5)
165
181
  minitest (5.25.5)
182
+ net-http (0.8.0)
183
+ uri (>= 0.11.1)
166
184
  net-imap (0.5.10)
167
185
  date
168
186
  net-protocol
@@ -364,6 +382,7 @@ GEM
364
382
  unicode-display_width (3.2.0)
365
383
  unicode-emoji (~> 4.1)
366
384
  unicode-emoji (4.1.0)
385
+ uri (1.1.1)
367
386
  useragent (0.16.11)
368
387
  websocket-driver (0.8.0)
369
388
  base64
@@ -388,6 +407,8 @@ DEPENDENCIES
388
407
  activesupport
389
408
  bundler
390
409
  ci-helper
410
+ click_house!
411
+ csv
391
412
  http
392
413
  net-pop
393
414
  nokogiri
@@ -0,0 +1,20 @@
1
+ #!/bin/bash
2
+ set -eu
3
+
4
+ docker stop clickhouse-server || true
5
+ docker rm clickhouse-server || true
6
+
7
+ docker run \
8
+ --detach \
9
+ --network host \
10
+ --name clickhouse-server \
11
+ --ulimit nofile=262144:262144 \
12
+ $CLICKHOUSE_IMAGE_TAG
13
+
14
+ # Wait for ClickHouse server to become available
15
+ until docker exec clickhouse-server clickhouse-client --query "SELECT 1" &>/dev/null; do
16
+ echo "Waiting for ClickHouse to be ready..."
17
+ sleep 1
18
+ done
19
+
20
+ rails ch:create ch:migrate
@@ -1,3 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "umbrellio_utils"
4
+
5
+ if defined?(Rake)
6
+ Dir[File.join(__dir__, "umbrellio_utils/tasks/**/*.rake")].each { |f| load f }
7
+ end
@@ -0,0 +1,186 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UmbrellioUtils
4
+ module ClickHouse
5
+ include Memery
6
+
7
+ extend self
8
+
9
+ delegate :create_database, :drop_database, :tables, :config, to: :client
10
+
11
+ def insert(table_name, db_name: self.db_name, rows: [])
12
+ client.insert(full_table_name(table_name, db_name), rows, format: "JSONEachRow")
13
+ end
14
+
15
+ def from(source, db_name: self.db_name)
16
+ ds =
17
+ case source
18
+ when Symbol
19
+ DB.from(db_name == self.db_name ? SQL[source] : SQL[db_name][source])
20
+ when nil
21
+ DB.dataset
22
+ else
23
+ DB.from(source)
24
+ end
25
+
26
+ ds.clone(ch: true)
27
+ end
28
+
29
+ def execute(sql, host: nil, **opts)
30
+ log_errors(sql) do
31
+ client(host).execute(sql, params: opts)
32
+ end
33
+ end
34
+
35
+ def query(dataset, host: nil, **opts)
36
+ sql = sql_for(dataset)
37
+
38
+ log_errors(sql) do
39
+ select_all(sql, host:, **opts).map { |x| Misc::StrictHash[x.symbolize_keys] }
40
+ end
41
+ end
42
+
43
+ def query_value(dataset, host: nil, **opts)
44
+ sql = sql_for(dataset)
45
+
46
+ log_errors(sql) do
47
+ select_value(sql, host:, **opts)
48
+ end
49
+ end
50
+
51
+ def count(dataset)
52
+ query_value(dataset.select(SQL.ch_count))
53
+ end
54
+
55
+ def optimize_table!(table_name, db_name: self.db_name)
56
+ execute("OPTIMIZE TABLE #{db_name}.#{table_name} ON CLUSTER click_cluster FINAL")
57
+ end
58
+
59
+ def truncate_table!(table_name, db_name: self.db_name)
60
+ execute("TRUNCATE TABLE #{db_name}.#{table_name} ON CLUSTER click_cluster SYNC")
61
+ end
62
+
63
+ def drop_table!(table_name, db_name: self.db_name)
64
+ execute("DROP TABLE #{db_name}.#{table_name} ON CLUSTER click_cluster SYNC")
65
+ end
66
+
67
+ def describe_table(table_name, db_name: self.db_name)
68
+ sql = "DESCRIBE TABLE #{full_table_name(table_name, db_name)} FORMAT JSON"
69
+
70
+ log_errors(sql) do
71
+ select_all(sql).map { |x| Misc::StrictHash[x.symbolize_keys] }
72
+ end
73
+ end
74
+
75
+ def db_name
76
+ client.config.database.to_sym
77
+ end
78
+
79
+ def parse_value(value, type:)
80
+ case type
81
+ when /String/
82
+ value&.to_s
83
+ when /DateTime/
84
+ Time.zone.parse(value) if value
85
+ else
86
+ value
87
+ end
88
+ end
89
+
90
+ def server_version
91
+ select_value("SELECT version()").to_f
92
+ end
93
+
94
+ def pg_table_connection(table)
95
+ host = ENV["PGHOST"] || DB.opts[:host].presence || "localhost"
96
+ port = DB.opts[:port] || 5432
97
+ database = DB.opts[:database]
98
+ username = DB.opts[:user]
99
+ password = DB.opts[:password]
100
+
101
+ Sequel.function(:postgresql, "#{host}:#{port}", database, table, username, password)
102
+ end
103
+
104
+ def with_temp_table(
105
+ dataset, temp_table_name:, primary_key: [:id], primary_key_types: [:integer], **opts, &
106
+ )
107
+ unless DB.table_exists?(temp_table_name)
108
+ UmbrellioUtils::Database.create_temp_table(
109
+ nil, primary_key:, primary_key_types:, temp_table_name:, &
110
+ )
111
+ populate_temp_table!(temp_table_name, dataset)
112
+ end
113
+ UmbrellioUtils::Database.with_temp_table(nil, primary_key:, temp_table_name:, **opts, &)
114
+ end
115
+
116
+ private
117
+
118
+ def client(host = nil)
119
+ cfg = ::ClickHouse.config
120
+ cfg.host = resolve(host) if host
121
+ ::ClickHouse::Connection.new(cfg)
122
+ end
123
+ memoize :client, ttl: 1.minute
124
+
125
+ def resolve(host)
126
+ IPSocket.getaddress(host)
127
+ rescue => e
128
+ Exceptions.notify!(e, raise_errors: false)
129
+ config.host
130
+ end
131
+
132
+ def logger
133
+ client.config.logger
134
+ end
135
+
136
+ def log_errors(sql)
137
+ yield
138
+ rescue ::ClickHouse::Error => e
139
+ logger.error("ClickHouse error: #{e.inspect}\nSQL: #{sql}")
140
+ raise e
141
+ end
142
+
143
+ def sql_for(dataset)
144
+ unless ch_dataset?(dataset)
145
+ raise "Non-ClickHouse dataset: #{dataset.inspect}. " \
146
+ "You should use `CH.from` instead of `DB`"
147
+ end
148
+
149
+ dataset.sql
150
+ end
151
+
152
+ def ch_dataset?(dataset)
153
+ case dataset
154
+ when Sequel::Dataset
155
+ dataset.opts[:ch] && Array(dataset.opts[:from]).all? { |x| ch_dataset?(x) }
156
+ when Sequel::SQL::AliasedExpression
157
+ ch_dataset?(dataset.expression)
158
+ when Sequel::SQL::Identifier, Sequel::SQL::QualifiedIdentifier
159
+ true
160
+ else
161
+ raise "Unknown dataset type: #{dataset.inspect}"
162
+ end
163
+ end
164
+
165
+ def full_table_name(table_name, db_name)
166
+ table_name = table_name.value if table_name.is_a?(Sequel::SQL::Identifier)
167
+ "#{db_name}.#{table_name}"
168
+ end
169
+
170
+ def select_all(sql, host: nil, **opts)
171
+ response = client(host).get(body: sql, query: { default_format: "JSON", **opts })
172
+ ::ClickHouse::Response::Factory.response(response, client(host).config)
173
+ end
174
+
175
+ def select_value(...)
176
+ select_all(...).first.to_a.dig(0, -1)
177
+ end
178
+
179
+ def populate_temp_table!(temp_table_name, dataset)
180
+ execute(<<~SQL.squish)
181
+ INSERT INTO TABLE FUNCTION #{DB.literal(pg_table_connection(temp_table_name))}
182
+ #{dataset.sql}
183
+ SQL
184
+ end
185
+ end
186
+ end
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # rubocop:disable Metrics/ModuleLength
4
3
  module UmbrellioUtils
5
4
  module Database
6
5
  extend self
@@ -79,26 +78,28 @@ module UmbrellioUtils
79
78
  end
80
79
  # rubocop:enable Metrics/ParameterLists
81
80
 
82
- def create_temp_table(dataset, primary_key: nil, temp_table_name: nil)
81
+ def create_temp_table(dataset, primary_key: nil, primary_key_types: nil, temp_table_name: nil)
83
82
  time = Time.current
84
- model = dataset.model
83
+ query_table_name = dataset&.model&.table_name
85
84
 
86
- temp_table_name ||= :"temp_#{model.table_name}_#{time.to_i}_#{time.nsec}"
85
+ temp_table_name ||= :"temp_#{query_table_name}_#{time.to_i}_#{time.nsec}"
87
86
  return temp_table_name if DB.table_exists?(temp_table_name)
88
87
 
89
88
  primary_key = primary_key_from(dataset, primary_key:)
89
+ primary_key_types ||= primary_key.map { |x| dataset.model.db_schema[x][:db_type] }
90
90
 
91
91
  DB.create_table(temp_table_name, unlogged: true) do
92
- primary_key.each do |field|
93
- type = model.db_schema[field][:db_type]
94
- column(field, type)
92
+ primary_key.each.with_index do |field, i|
93
+ column(field, primary_key_types[i])
95
94
  end
96
95
 
97
96
  primary_key(primary_key)
98
97
  end
99
98
 
100
- insert_ds = dataset.select(*qualified_pk(model.table_name, primary_key))
101
- DB[temp_table_name].disable_insert_returning.insert(insert_ds)
99
+ unless dataset.nil?
100
+ insert_ds = dataset.select(*qualified_pk(query_table_name, primary_key))
101
+ DB[temp_table_name].disable_insert_returning.insert(insert_ds)
102
+ end
102
103
 
103
104
  temp_table_name
104
105
  end
@@ -154,4 +155,3 @@ module UmbrellioUtils
154
155
  end
155
156
  end
156
157
  end
157
- # rubocop:enable Metrics/ModuleLength
@@ -0,0 +1,271 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UmbrellioUtils
4
+ module Migrations
5
+ extend self
6
+
7
+ def create_new_id_bigint_column(table_name)
8
+ DB.run(<<~SQL.squish)
9
+ LOCK TABLE #{table_name} IN ACCESS EXCLUSIVE MODE;
10
+
11
+ CREATE OR REPLACE FUNCTION id_trigger()
12
+ RETURNS trigger
13
+ AS
14
+ $BODY$
15
+ DECLARE
16
+ BEGIN
17
+ NEW.id_bigint := NEW.id;
18
+ RETURN NEW;
19
+ END;
20
+ $BODY$ LANGUAGE plpgsql;
21
+
22
+ ALTER TABLE #{table_name} ADD id_bigint BIGINT;
23
+
24
+ CREATE TRIGGER #{table_name}_bigint
25
+ BEFORE INSERT OR UPDATE
26
+ ON #{table_name}
27
+ FOR EACH ROW
28
+ EXECUTE FUNCTION id_trigger();
29
+ SQL
30
+ end
31
+
32
+ def drop_old_id_column(table_name, associations = {}, skip_fk_create: false) # rubocop:disable Metrics/MethodLength
33
+ query_start = <<~SQL.squish
34
+ LOCK TABLE #{table_name} IN ACCESS EXCLUSIVE MODE;
35
+ DROP TRIGGER #{table_name}_bigint ON #{table_name};
36
+ ALTER TABLE #{table_name} RENAME id TO id_integer;
37
+ ALTER TABLE #{table_name} RENAME id_bigint TO id;
38
+
39
+ CREATE SEQUENCE IF NOT EXISTS new_#{table_name}_id_seq
40
+ START WITH 1
41
+ INCREMENT BY 1
42
+ NO MINVALUE
43
+ NO MAXVALUE
44
+ CACHE 1;
45
+
46
+ SELECT setval(
47
+ 'new_#{table_name}_id_seq',
48
+ COALESCE((SELECT MAX(id) + 1 FROM #{table_name}), 1),
49
+ false
50
+ );
51
+ ALTER TABLE #{table_name}
52
+ ALTER COLUMN id SET DEFAULT nextval('new_#{table_name}_id_seq');
53
+ SQL
54
+
55
+ fkey_query = ""
56
+ associations.map do |assoc_table, assoc_name|
57
+ constraint_name = "#{assoc_table}_#{assoc_name}_fkey"
58
+
59
+ fkey_query += <<~SQL.squish
60
+ ALTER TABLE #{assoc_table}
61
+ DROP CONSTRAINT IF EXISTS #{constraint_name}
62
+ SQL
63
+ if skip_fk_create
64
+ fkey_query += ";"
65
+ next
66
+ end
67
+
68
+ fkey_query += <<~SQL.squish
69
+ , ADD CONSTRAINT #{constraint_name}
70
+ FOREIGN KEY (#{assoc_name}) REFERENCES #{table_name}(id) NOT VALID;
71
+ SQL
72
+ end
73
+
74
+ query_end = <<~SQL.squish
75
+ ALTER TABLE #{table_name} DROP id_integer;
76
+ ALTER TABLE #{table_name} ADD CONSTRAINT #{table_name}_pkey PRIMARY KEY
77
+ USING INDEX #{table_name}_id_bigint_index;
78
+ SQL
79
+
80
+ query = query_start + fkey_query + query_end
81
+ DB.run(query)
82
+ end
83
+
84
+ def drop_foreign_keys(_table_name, associations)
85
+ associations.map do |assoc_table, assoc_name|
86
+ constraint_name = "#{assoc_table}_#{assoc_name}_fkey"
87
+ fkey_query = <<~SQL.squish
88
+ ALTER TABLE #{assoc_table} DROP CONSTRAINT IF EXISTS #{constraint_name};
89
+ SQL
90
+ DB.run(fkey_query)
91
+ end
92
+ end
93
+
94
+ def create_foreign_keys(table_name, associations)
95
+ associations.map do |assoc_table, assoc_name|
96
+ constraint_name = "#{assoc_table}_#{assoc_name}_fkey"
97
+ fkey_query = <<~SQL.squish
98
+ DO $$
99
+ BEGIN
100
+ IF NOT EXISTS (
101
+ SELECT 1
102
+ FROM pg_constraint
103
+ WHERE conname = '#{constraint_name}'
104
+ ) THEN
105
+ ALTER TABLE #{assoc_table} ADD CONSTRAINT #{constraint_name}
106
+ FOREIGN KEY (#{assoc_name}) REFERENCES #{table_name}(id) NOT VALID;
107
+ END IF;
108
+ END$$;
109
+ SQL
110
+ DB.run(fkey_query)
111
+ end
112
+ end
113
+
114
+ def create_new_foreign_key_column(table_name, column_name)
115
+ DB.run(<<~SQL.squish)
116
+ LOCK TABLE #{table_name} IN ACCESS EXCLUSIVE MODE;
117
+
118
+ CREATE OR REPLACE FUNCTION #{column_name}_bigint_trigger()
119
+ RETURNS trigger
120
+ AS
121
+ $BODY$
122
+ DECLARE
123
+ BEGIN
124
+ NEW.#{column_name}_bigint := NEW.#{column_name};
125
+ RETURN NEW;
126
+ END;
127
+ $BODY$ LANGUAGE plpgsql;
128
+
129
+ ALTER TABLE #{table_name} ADD #{column_name}_bigint BIGINT;
130
+
131
+ CREATE TRIGGER #{table_name}_#{column_name}_bigint
132
+ BEFORE INSERT OR UPDATE
133
+ ON #{table_name}
134
+ FOR EACH ROW
135
+ EXECUTE FUNCTION #{column_name}_bigint_trigger();
136
+ SQL
137
+ end
138
+
139
+ def check_id_consistency(table_name, col_name = "id")
140
+ res = DB[table_name].where(
141
+ Sequel[col_name.to_sym] !~ SQL.coalesce(Sequel[:"#{col_name}_bigint"], 0),
142
+ ).count
143
+ raise "Inconsistent ids in #{table_name}: #{res} records" if res.positive?
144
+ true
145
+ end
146
+
147
+ # rubocop:disable Metrics/MethodLength
148
+ def drop_old_foreign_key_column(table_name, column_name, skip_constraint: false,
149
+ primary_key: [], uniq_constr: false)
150
+ query_start = <<~SQL.squish
151
+ LOCK TABLE #{table_name} IN ACCESS EXCLUSIVE MODE;
152
+ DROP TRIGGER #{table_name}_#{column_name}_bigint ON #{table_name};
153
+ ALTER TABLE #{table_name} RENAME #{column_name} TO #{column_name}_integer;
154
+ ALTER TABLE #{table_name} RENAME #{column_name}_bigint TO #{column_name};
155
+ SQL
156
+
157
+ fkey_query = ""
158
+ unless skip_constraint
159
+ constraint_name = "#{table_name}_#{column_name}_fkey"
160
+ ref_table_name = column_name.to_s.delete_suffix("_id").pluralize
161
+ fkey_query = <<~SQL.squish
162
+ ALTER TABLE #{table_name}
163
+ DROP CONSTRAINT IF EXISTS #{constraint_name},
164
+ ADD CONSTRAINT #{constraint_name}
165
+ FOREIGN KEY (#{column_name}) REFERENCES #{ref_table_name}(id) NOT VALID;
166
+ SQL
167
+ end
168
+
169
+ drop_query = <<~SQL.squish
170
+ ALTER TABLE #{table_name} DROP #{column_name}_integer;
171
+ SQL
172
+
173
+ constr_query = ""
174
+ if uniq_constr
175
+ constr_query = <<~SQL.squish
176
+ ALTER TABLE #{table_name}
177
+ ADD CONSTRAINT #{table_name}_#{column_name}_key UNIQUE (#{column_name});
178
+ SQL
179
+ end
180
+
181
+ pkey_query = ""
182
+ if primary_key.present?
183
+ pkey_query = <<~SQL.squish
184
+ ALTER TABLE #{table_name} ADD CONSTRAINT #{table_name}_pkey PRIMARY KEY
185
+ USING INDEX #{table_name}_#{primary_key.join("_")}_index;
186
+ SQL
187
+ end
188
+
189
+ query = query_start + fkey_query + drop_query + constr_query + pkey_query
190
+ DB.run(query)
191
+ end
192
+ # rubocop:enable Metrics/MethodLength
193
+
194
+ def check_associations(model, method, reverse_method)
195
+ model.dataset.limit(10).all.each do |record|
196
+ res = record.public_send(method).public_send(reverse_method)
197
+ raise StandardError if res.blank?
198
+ end
199
+ true
200
+ end
201
+
202
+ def create_distributed_table!(table_name, sharding_key, db_name: UmbrellioUtils::ClickHouse.db_name)
203
+ UmbrellioUtils::ClickHouse.execute(<<~SQL.squish)
204
+ DROP TABLE IF EXISTS #{db_name}.#{table_name}_distributed
205
+ ON CLUSTER click_cluster
206
+ SQL
207
+
208
+ UmbrellioUtils::ClickHouse.execute(<<~SQL.squish)
209
+ CREATE TABLE #{db_name}.#{table_name}_distributed
210
+ ON CLUSTER click_cluster
211
+ AS #{db_name}.#{table_name}
212
+ ENGINE = Distributed(click_cluster, #{db_name}, #{table_name}, #{sharding_key})
213
+ SQL
214
+ end
215
+
216
+ # @example
217
+ # add_columns_to_view(
218
+ # "orders_clickhouse_view",
219
+ # Sequel[:orders][:data].pg_jsonb.get_text("some_data_column").as(:some_column),
220
+ # Sequel[:orders][:column].as(:some_other_column),
221
+ # )
222
+ def add_columns_to_view(view_name, *sequel_columns)
223
+ sequel_columns.each do |column|
224
+ unless column.is_a?(Sequel::SQL::AliasedExpression)
225
+ raise ArgumentError.new("not Sequel::SQL::AliasedExpression")
226
+ end
227
+ end
228
+
229
+ DB.transaction do
230
+ DB.run("LOCK TABLE #{view_name}")
231
+ definition = view_definition(view_name)
232
+ sql = sequel_columns.map { |x| DB.literal(x) }.join(", ")
233
+ new_definition = definition.sub("FROM", ", #{sql} FROM")
234
+ DB.run("CREATE OR REPLACE VIEW #{view_name} AS #{new_definition}")
235
+ end
236
+ end
237
+
238
+ # @example
239
+ # drop_columns_from_view("orders_clickhouse_view", "id", "guid")
240
+ def drop_columns_from_view(view_name, *columns)
241
+ DB.transaction do
242
+ DB.run("LOCK TABLE #{view_name}")
243
+ definition = view_definition(view_name)
244
+ parsed_columns = parse_columns(definition)
245
+ parsed_columns.reject! { |name, _| name.in?(columns) }
246
+ sql = parsed_columns.map { |_, sql| sql }.join(", ")
247
+ new_definition = definition.sub(/SELECT(.*?)FROM/i, "SELECT #{sql} FROM")
248
+ DB.run("DROP VIEW #{view_name}")
249
+ DB.run("CREATE VIEW #{view_name} AS #{new_definition}")
250
+ end
251
+ end
252
+
253
+ private
254
+
255
+ def parse_columns(definition)
256
+ fields_sql = definition[/SELECT(.*?)FROM/i, 1].strip
257
+ fields = fields_sql.scan(/(?:[^,(]+|\([^)]*\))+/).map(&:strip)
258
+ field_names = fields.map do |field|
259
+ field[/as (.*)/i, 1] || field[/\.(.*)\z/, 1]
260
+ end
261
+ field_names.zip(fields)
262
+ end
263
+
264
+ def view_definition(view)
265
+ DB[:pg_views]
266
+ .where(viewname: view.to_s)
267
+ .select(:definition).first[:definition]
268
+ .squish
269
+ end
270
+ end
271
+ end
@@ -4,6 +4,11 @@ module UmbrellioUtils
4
4
  module Misc
5
5
  extend self
6
6
 
7
+ class StrictHash < Hash
8
+ alias get []
9
+ alias [] fetch
10
+ end
11
+
7
12
  def table_sync(scope, delay: 1, routing_key: nil)
8
13
  scope.in_batches do |batch|
9
14
  batch_for_sync = batch.all.reject { |model| model.try(:skip_table_sync?) }
@@ -0,0 +1,192 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UmbrellioUtils
4
+ module SQL
5
+ extend self
6
+
7
+ UniqueConstraintViolation = Sequel::UniqueConstraintViolation
8
+
9
+ def [](*args)
10
+ Sequel[*args]
11
+ end
12
+
13
+ def func(...)
14
+ Sequel.function(...)
15
+ end
16
+
17
+ def cast(...)
18
+ Sequel.cast(...)
19
+ end
20
+
21
+ def case(...)
22
+ Sequel.case(...)
23
+ end
24
+
25
+ def pg_jsonb(...)
26
+ Sequel.pg_jsonb(...)
27
+ end
28
+
29
+ def to_utc(date)
30
+ func(:timezone, "UTC", date)
31
+ end
32
+
33
+ def to_timezone(zone, date)
34
+ utc_date = to_utc(date)
35
+ func(:timezone, zone, cast(utc_date, :timestamptz))
36
+ end
37
+
38
+ def and(*conditions)
39
+ Sequel.&(*Array(conditions.flatten.presence || true))
40
+ end
41
+
42
+ def not(...)
43
+ Sequel.~(...)
44
+ end
45
+
46
+ def or(*conditions)
47
+ Sequel.|(*Array(conditions.flatten.presence || true))
48
+ end
49
+
50
+ def pg_range(from_value, to_value, **opts)
51
+ Sequel::Postgres::PGRange.new(from_value, to_value, **opts)
52
+ end
53
+
54
+ def pg_range_by_range(range)
55
+ Sequel::Postgres::PGRange.from_range(range)
56
+ end
57
+
58
+ def max(expr)
59
+ func(:max, expr)
60
+ end
61
+
62
+ def min(expr)
63
+ func(:min, expr)
64
+ end
65
+
66
+ def sum(expr)
67
+ func(:sum, expr)
68
+ end
69
+
70
+ def count(expr = nil)
71
+ expr ? func(:count, expr) : func(:count).*
72
+ end
73
+
74
+ def ch_count(*args)
75
+ Sequel.function(:count, *args)
76
+ end
77
+
78
+ def avg(expr)
79
+ func(:avg, expr)
80
+ end
81
+
82
+ def pg_percentile(expr, percentile)
83
+ func(:percentile_cont, percentile).within_group(expr)
84
+ end
85
+
86
+ def pg_median(expr)
87
+ pg_percentile(expr, 0.5)
88
+ end
89
+
90
+ def ch_median(expr)
91
+ func(:median, expr)
92
+ end
93
+
94
+ def abs(expr)
95
+ func(:abs, expr)
96
+ end
97
+
98
+ def coalesce(*exprs)
99
+ func(:coalesce, *exprs)
100
+ end
101
+
102
+ def coalesce0(*args)
103
+ coalesce(*args, 0)
104
+ end
105
+
106
+ def nullif(main_expr, checking_expr)
107
+ func(:nullif, main_expr, checking_expr)
108
+ end
109
+
110
+ def distinct(expr)
111
+ func(:distinct, expr)
112
+ end
113
+
114
+ def least(*exprs)
115
+ func(:least, *exprs)
116
+ end
117
+
118
+ def greatest(*exprs)
119
+ func(:greatest, *exprs)
120
+ end
121
+
122
+ def date_trunc(truncate, expr)
123
+ func(:date_trunc, truncate.to_s, expr)
124
+ end
125
+
126
+ def ch_timestamp(time)
127
+ time&.strftime("%F %T.%6N")
128
+ end
129
+
130
+ def ch_timestamp_expr(time)
131
+ time = Time.zone.parse(time) if time.is_a?(String)
132
+ func(:toDateTime64, Sequel[ch_timestamp(time)], 6)
133
+ end
134
+
135
+ def ch_time_range(range)
136
+ Range.new(ch_timestamp(range.begin), ch_timestamp(range.end), range.exclude_end?)
137
+ end
138
+
139
+ def jsonb_dig(jsonb, path)
140
+ path.reduce(jsonb) { |acc, cur| acc[cur] }
141
+ end
142
+
143
+ def jsonb_typeof(jsonb)
144
+ func(:jsonb_typeof, jsonb)
145
+ end
146
+
147
+ def empty_jsonb
148
+ Sequel.pg_jsonb({})
149
+ end
150
+
151
+ def round(value, precision = 0)
152
+ func(:round, value, precision)
153
+ end
154
+
155
+ def row(*values)
156
+ func(:row, *values)
157
+ end
158
+
159
+ def map_to_expr(hash)
160
+ hash.map { |aliaz, expr| expr.as(aliaz) }
161
+ end
162
+
163
+ def intersect(left_expr, right_expr)
164
+ Sequel.lit("SELECT ? INTERSECT SELECT ?", left_expr, right_expr)
165
+ end
166
+
167
+ # can rewrite scalar values
168
+ def jsonb_unsafe_set(jsonb, path, value)
169
+ parent_path = path.slice(..-2)
170
+ raw_parent = jsonb_dig(jsonb, parent_path)
171
+ parent = jsonb_rewrite_scalar(raw_parent)
172
+ last_path = path.slice(-1..-1)
173
+ updated_parent = parent.set(last_path, value)
174
+ result = self.case({ { value => nil } => parent }, updated_parent)
175
+ jsonb.set(parent_path, result)
176
+ end
177
+
178
+ def true
179
+ Sequel.lit("true")
180
+ end
181
+
182
+ def false
183
+ Sequel.lit("false")
184
+ end
185
+
186
+ private
187
+
188
+ def jsonb_rewrite_scalar(jsonb)
189
+ self.case({ { jsonb_typeof(jsonb) => %w[object array] } => jsonb }, empty_jsonb).pg_jsonb
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ namespace :ch do
4
+ desc "run clickhouse client"
5
+ task connect: :environment do
6
+ params = {
7
+ host: ENV.fetch("CLICKHOUSE_HOST", UmbrellioUtils::ClickHouse.config.host),
8
+ user: ENV.fetch("CLICKHOUSE_USER", UmbrellioUtils::ClickHouse.config.username),
9
+ password: ENV.fetch("CLICKHOUSE_PASSWORD", UmbrellioUtils::ClickHouse.config.password),
10
+ database: ENV.fetch("CLICKHOUSE_DATABASE", UmbrellioUtils::ClickHouse.config.database),
11
+ **UmbrellioUtils::ClickHouse.config.global_params,
12
+ }.compact_blank
13
+
14
+ cmd = Shellwords.join(["clickhouse", "client", *params.map { |k, v| "--#{k}=#{v}" }])
15
+ exec(cmd)
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.configure do |config|
4
+ config.before(:suite) do
5
+ # Make Postgres return rows truly randomly in specs unless order is properly specified
6
+ class Sequel::Postgres::Dataset # rubocop:disable Lint/ConstantDefinitionInBlock
7
+ def select_sql
8
+ return super if @opts[:_skip_order_patch] || @opts[:append_sql]
9
+ return super if @opts[:ch] && @opts[:order].present?
10
+ order = @opts[:order].dup || []
11
+ fn = @opts.key?(:ch) ? :rand : :random
12
+ order << Sequel.function(fn)
13
+ clone(order:, _skip_order_patch: true).select_sql
14
+ end
15
+ end
16
+ end
17
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module UmbrellioUtils
4
- VERSION = "1.9.0"
4
+ VERSION = "1.10.0"
5
5
  end
@@ -48,18 +48,21 @@ end
48
48
 
49
49
  require_relative "umbrellio_utils/cards"
50
50
  require_relative "umbrellio_utils/checks"
51
+ require_relative "umbrellio_utils/click_house"
51
52
  require_relative "umbrellio_utils/constants"
52
53
  require_relative "umbrellio_utils/control"
53
54
  require_relative "umbrellio_utils/database"
54
55
  require_relative "umbrellio_utils/formatting"
55
56
  require_relative "umbrellio_utils/http_client"
56
57
  require_relative "umbrellio_utils/jobs"
58
+ require_relative "umbrellio_utils/migrations"
57
59
  require_relative "umbrellio_utils/misc"
58
60
  require_relative "umbrellio_utils/parsing"
59
61
  require_relative "umbrellio_utils/passwords"
60
62
  require_relative "umbrellio_utils/random"
61
63
  require_relative "umbrellio_utils/request_wrapper"
62
64
  require_relative "umbrellio_utils/rounding"
65
+ require_relative "umbrellio_utils/sql"
63
66
  require_relative "umbrellio_utils/semantic_logger/tiny_json_formatter"
64
67
  require_relative "umbrellio_utils/store"
65
68
  require_relative "umbrellio_utils/vault"
@@ -23,8 +23,8 @@ Gem::Specification.new do |spec|
23
23
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
24
24
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
25
  end
26
- spec.bindir = "exe"
27
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
26
+ spec.bindir = "bin"
27
+ spec.executables = ["clickhouse-server"]
28
28
  spec.require_paths = ["lib"]
29
29
 
30
30
  spec.add_dependency "memery", "~> 1"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: umbrellio-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.0
4
+ version: 1.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Team Umbrellio
8
8
  autorequire:
9
- bindir: exe
9
+ bindir: bin
10
10
  cert_chain: []
11
- date: 2025-10-29 00:00:00.000000000 Z
11
+ date: 2025-12-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: memery
@@ -27,11 +27,13 @@ dependencies:
27
27
  description: UmbrellioUtils is collection of utility classes and helpers
28
28
  email:
29
29
  - oss@umbrellio.biz
30
- executables: []
30
+ executables:
31
+ - clickhouse-server
31
32
  extensions: []
32
33
  extra_rdoc_files: []
33
34
  files:
34
35
  - ".editorconfig"
36
+ - ".github/clickhouse/clickhouse_keeper.xml"
35
37
  - ".github/workflows/test.yml"
36
38
  - ".gitignore"
37
39
  - ".rspec"
@@ -41,18 +43,21 @@ files:
41
43
  - LICENSE.txt
42
44
  - README.md
43
45
  - Rakefile
46
+ - bin/clickhouse-server
44
47
  - bin/console
45
48
  - bin/setup
46
49
  - lib/umbrellio-utils.rb
47
50
  - lib/umbrellio_utils.rb
48
51
  - lib/umbrellio_utils/cards.rb
49
52
  - lib/umbrellio_utils/checks.rb
53
+ - lib/umbrellio_utils/click_house.rb
50
54
  - lib/umbrellio_utils/constants.rb
51
55
  - lib/umbrellio_utils/control.rb
52
56
  - lib/umbrellio_utils/database.rb
53
57
  - lib/umbrellio_utils/formatting.rb
54
58
  - lib/umbrellio_utils/http_client.rb
55
59
  - lib/umbrellio_utils/jobs.rb
60
+ - lib/umbrellio_utils/migrations.rb
56
61
  - lib/umbrellio_utils/misc.rb
57
62
  - lib/umbrellio_utils/parsing.rb
58
63
  - lib/umbrellio_utils/passwords.rb
@@ -60,7 +65,10 @@ files:
60
65
  - lib/umbrellio_utils/request_wrapper.rb
61
66
  - lib/umbrellio_utils/rounding.rb
62
67
  - lib/umbrellio_utils/semantic_logger/tiny_json_formatter.rb
68
+ - lib/umbrellio_utils/sql.rb
63
69
  - lib/umbrellio_utils/store.rb
70
+ - lib/umbrellio_utils/tasks/clickhouse_connect.rake
71
+ - lib/umbrellio_utils/testing/sequel_patches.rb
64
72
  - lib/umbrellio_utils/vault.rb
65
73
  - lib/umbrellio_utils/version.rb
66
74
  - log/.keep