active_hll 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/lib/active_hll/model.rb +52 -44
- data/lib/active_hll/utils.rb +26 -20
- data/lib/active_hll/version.rb +1 -1
- data/lib/active_hll.rb +1 -5
- metadata +6 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b6d25010e464c117677657108c90fb6b185719ab5f1540ee64ae4c0c622810f0
|
4
|
+
data.tar.gz: c6d2a6971bfab1ec70b99252a30d3224113695ec84f6b74b56212e152468f73b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c859db76f287ef1a329b0a08a56aab630be8eddd69f4d5842b97baf2014373ddfd230dde525a7ab84634edf8f686dc674753f8720888afa3395905fe7c98ee0b
|
7
|
+
data.tar.gz: e9291fce2c932a93a3268fd5382dbdfac350927ae6c0ff2185d4e1ec2f4d05f3857bf9707b487634e8b353b8f600aa94417ce9e5ab0256f029d0034ba4d075cc
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -39,7 +39,7 @@ HLLs provide an approximate count of unique values (like unique visitors). By ro
|
|
39
39
|
Create a table with an `hll` column
|
40
40
|
|
41
41
|
```ruby
|
42
|
-
class CreateEventRollups < ActiveRecord::Migration[
|
42
|
+
class CreateEventRollups < ActiveRecord::Migration[8.0]
|
43
43
|
def change
|
44
44
|
create_table :event_rollups do |t|
|
45
45
|
t.date :time_bucket, index: {unique: true}
|
data/lib/active_hll/model.rb
CHANGED
@@ -20,65 +20,71 @@ module ActiveHll
|
|
20
20
|
# experimental
|
21
21
|
# doesn't work with non-default parameters
|
22
22
|
def hll_generate(values)
|
23
|
-
|
23
|
+
Utils.with_connection(self) do |connection|
|
24
|
+
parts = ["hll_empty()"]
|
24
25
|
|
25
|
-
|
26
|
-
|
27
|
-
|
26
|
+
values.each do |value|
|
27
|
+
parts << Utils.hll_hash_sql(connection, value)
|
28
|
+
end
|
28
29
|
|
29
|
-
|
30
|
-
|
30
|
+
result = connection.select_all("SELECT #{parts.join(" || ")}").rows[0][0]
|
31
|
+
ActiveHll::Type.new.deserialize(result)
|
32
|
+
end
|
31
33
|
end
|
32
34
|
|
33
35
|
def hll_add(attributes)
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
36
|
+
Utils.with_connection(self) do |connection|
|
37
|
+
set_clauses =
|
38
|
+
attributes.map do |attribute, values|
|
39
|
+
values = [values] unless values.is_a?(Array)
|
40
|
+
return 0 if values.empty?
|
41
|
+
|
42
|
+
quoted_column = connection.quote_column_name(attribute)
|
43
|
+
# possibly fetch parameters for the column in the future
|
44
|
+
# for now, users should set a default value on the column
|
45
|
+
parts = ["COALESCE(#{quoted_column}, hll_empty())"]
|
46
|
+
|
47
|
+
values.each do |value|
|
48
|
+
parts << Utils.hll_hash_sql(connection, value)
|
49
|
+
end
|
50
|
+
|
51
|
+
"#{quoted_column} = #{parts.join(" || ")}"
|
46
52
|
end
|
47
53
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
update_all(set_clauses.join(", "))
|
54
|
+
update_all(set_clauses.join(", "))
|
55
|
+
end
|
52
56
|
end
|
53
57
|
|
54
58
|
# experimental
|
55
59
|
def hll_upsert(attributes)
|
56
|
-
|
60
|
+
Utils.with_connection(self) do |connection|
|
61
|
+
hll_columns, other_columns = attributes.keys.partition { |a| columns_hash[a.to_s]&.type == :hll }
|
57
62
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
63
|
+
# important! raise if column detection fails
|
64
|
+
if hll_columns.empty?
|
65
|
+
raise ArgumentError, "No hll columns"
|
66
|
+
end
|
62
67
|
|
63
|
-
|
68
|
+
quoted_table = connection.quote_table_name(table_name)
|
64
69
|
|
65
|
-
|
66
|
-
|
67
|
-
|
70
|
+
quoted_hll_columns = hll_columns.map { |k| connection.quote_column_name(k) }
|
71
|
+
quoted_other_columns = other_columns.map { |k| connection.quote_column_name(k) }
|
72
|
+
quoted_columns = quoted_other_columns + quoted_hll_columns
|
68
73
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
74
|
+
hll_values =
|
75
|
+
hll_columns.map do |k|
|
76
|
+
vs = attributes[k]
|
77
|
+
vs = [vs] unless vs.is_a?(Array)
|
78
|
+
vs.map { |v| Utils.hll_hash_sql(connection, v) }.join(" || ")
|
79
|
+
end
|
80
|
+
other_values = other_columns.map { |k| connection.quote(attributes[k]) }
|
76
81
|
|
77
|
-
|
78
|
-
|
82
|
+
insert_values = other_values + hll_values.map { |v| "hll_empty()#{v.size > 0 ? " || #{v}" : ""}" }
|
83
|
+
update_values = quoted_hll_columns.zip(hll_values).map { |k, v| "#{k} = COALESCE(#{quoted_table}.#{k}, hll_empty())#{v.size > 0 ? " || #{v}" : ""}" }
|
79
84
|
|
80
|
-
|
81
|
-
|
85
|
+
sql = "INSERT INTO #{quoted_table} (#{quoted_columns.join(", ")}) VALUES (#{insert_values.join(", ")}) ON CONFLICT (#{quoted_other_columns.join(", ")}) DO UPDATE SET #{update_values.join(", ")}"
|
86
|
+
connection.exec_insert(sql, "#{name} Upsert")
|
87
|
+
end
|
82
88
|
end
|
83
89
|
end
|
84
90
|
|
@@ -89,8 +95,10 @@ module ActiveHll
|
|
89
95
|
end
|
90
96
|
|
91
97
|
def hll_count(attribute)
|
92
|
-
|
93
|
-
|
98
|
+
Utils.with_connection(self.class) do |connection|
|
99
|
+
quoted_column = connection.quote_column_name(attribute)
|
100
|
+
self.class.where(id: id).pluck("hll_cardinality(#{quoted_column})").first || 0.0
|
101
|
+
end
|
94
102
|
end
|
95
103
|
end
|
96
104
|
end
|
data/lib/active_hll/utils.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module ActiveHll
|
2
2
|
module Utils
|
3
3
|
class << self
|
4
|
-
def hll_hash_sql(
|
4
|
+
def hll_hash_sql(connection, value)
|
5
5
|
hash_function =
|
6
6
|
case value
|
7
7
|
when true, false
|
@@ -13,34 +13,40 @@ module ActiveHll
|
|
13
13
|
else
|
14
14
|
raise ArgumentError, "Unexpected type: #{value.class.name}"
|
15
15
|
end
|
16
|
-
quoted_value =
|
16
|
+
quoted_value = connection.quote(value)
|
17
17
|
"#{hash_function}(#{quoted_value})"
|
18
18
|
end
|
19
19
|
|
20
|
-
def
|
21
|
-
|
22
|
-
|
20
|
+
def with_connection(relation, &block)
|
21
|
+
relation.connection_pool.with_connection(&block)
|
22
|
+
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
rows << (result.column_types.empty? ? untyped_row : columns.each_with_index.map { |c, i| untyped_row[i] && result.column_types[c] ? result.column_types[c].deserialize(untyped_row[i]) : untyped_row[i] })
|
29
|
-
end
|
24
|
+
def hll_calculate(relation, operation, column, default_value:)
|
25
|
+
Utils.with_connection(relation) do |connection|
|
26
|
+
sql, relation, group_values = hll_calculate_sql(relation, connection, operation, column)
|
27
|
+
result = connection.select_all(sql)
|
30
28
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
rows[
|
29
|
+
# typecast
|
30
|
+
rows = []
|
31
|
+
columns = result.columns
|
32
|
+
result.rows.each do |untyped_row|
|
33
|
+
rows << (result.column_types.empty? ? untyped_row : columns.each_with_index.map { |c, i| untyped_row[i] && result.column_types[c] ? result.column_types[c].deserialize(untyped_row[i]) : untyped_row[i] })
|
36
34
|
end
|
37
35
|
|
38
|
-
|
36
|
+
result =
|
37
|
+
if group_values.any?
|
38
|
+
Hash[rows.map { |r| [r.size == 2 ? r[0] : r[0..-2], r[-1]] }]
|
39
|
+
else
|
40
|
+
rows[0] && rows[0][0]
|
41
|
+
end
|
42
|
+
|
43
|
+
result = Groupdate.process_result(relation, result, default_value: default_value) if defined?(Groupdate.process_result)
|
39
44
|
|
40
|
-
|
45
|
+
result
|
46
|
+
end
|
41
47
|
end
|
42
48
|
|
43
|
-
def hll_calculate_sql(relation, operation, column)
|
49
|
+
def hll_calculate_sql(relation, connection, operation, column)
|
44
50
|
# basic version of Active Record disallow_raw_sql!
|
45
51
|
# symbol = column (safe), Arel node = SQL (safe), other = untrusted
|
46
52
|
# matches table.column and column
|
@@ -54,7 +60,7 @@ module ActiveHll
|
|
54
60
|
# column resolution
|
55
61
|
node = relation.all.send(:arel_columns, [column]).first
|
56
62
|
node = Arel::Nodes::SqlLiteral.new(node) if node.is_a?(String)
|
57
|
-
column =
|
63
|
+
column = connection.visitor.accept(node, Arel::Collectors::SQLString.new).value
|
58
64
|
|
59
65
|
group_values = relation.all.group_values
|
60
66
|
|
data/lib/active_hll/version.rb
CHANGED
data/lib/active_hll.rb
CHANGED
@@ -33,9 +33,5 @@ ActiveSupport.on_load(:active_record) do
|
|
33
33
|
ActiveRecord::ConnectionAdapters::TableDefinition.send(:define_column_methods, :hll)
|
34
34
|
|
35
35
|
# prevent unknown OID warning
|
36
|
-
|
37
|
-
ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.singleton_class.prepend(ActiveHll::RegisterType)
|
38
|
-
else
|
39
|
-
ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.prepend(ActiveHll::RegisterType)
|
40
|
-
end
|
36
|
+
ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.singleton_class.prepend(ActiveHll::RegisterType)
|
41
37
|
end
|
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: active_hll
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2025-04-03 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: activerecord
|
@@ -16,15 +15,14 @@ dependencies:
|
|
16
15
|
requirements:
|
17
16
|
- - ">="
|
18
17
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
18
|
+
version: '7.1'
|
20
19
|
type: :runtime
|
21
20
|
prerelease: false
|
22
21
|
version_requirements: !ruby/object:Gem::Requirement
|
23
22
|
requirements:
|
24
23
|
- - ">="
|
25
24
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
27
|
-
description:
|
25
|
+
version: '7.1'
|
28
26
|
email: andrew@ankane.org
|
29
27
|
executables: []
|
30
28
|
extensions: []
|
@@ -45,7 +43,6 @@ homepage: https://github.com/ankane/active_hll
|
|
45
43
|
licenses:
|
46
44
|
- MIT
|
47
45
|
metadata: {}
|
48
|
-
post_install_message:
|
49
46
|
rdoc_options: []
|
50
47
|
require_paths:
|
51
48
|
- lib
|
@@ -53,15 +50,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
53
50
|
requirements:
|
54
51
|
- - ">="
|
55
52
|
- !ruby/object:Gem::Version
|
56
|
-
version: '3.
|
53
|
+
version: '3.2'
|
57
54
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
55
|
requirements:
|
59
56
|
- - ">="
|
60
57
|
- !ruby/object:Gem::Version
|
61
58
|
version: '0'
|
62
59
|
requirements: []
|
63
|
-
rubygems_version: 3.
|
64
|
-
signing_key:
|
60
|
+
rubygems_version: 3.6.2
|
65
61
|
specification_version: 4
|
66
62
|
summary: HyperLogLog for Rails and Postgres
|
67
63
|
test_files: []
|