active_hll 0.1.1 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +4 -4
- data/lib/active_hll/model.rb +52 -44
- data/lib/active_hll/utils.rb +27 -21
- data/lib/active_hll/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49fecb9c7cb1cdcf399ce9ae00e9e637d9d3c0c3789ad02da9bef4e2977bc2f5
|
4
|
+
data.tar.gz: d02131c319a338575fc215397a95d153648bc6c5b390c453f67d0509b1a3ee6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: acc8c253b0d5ab708e73459ed35506e6d009f3c8f709c9f82a1b44ca77da2ac0a8431767f8d6a9d15faf5a17dbf20db1d7093008cad8e118979f1ffcf93b61ef
|
7
|
+
data.tar.gz: 1375c9ac1f6d6df024dfc0ccdf9304f1245a8d5510e8e69b34bf1d51fa49f6fb2bec7cf137476492c68d1b29f3344d947ba3b1cc26e762a53a46bf0c2681e7a1
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
For fast, approximate count-distinct queries
|
6
6
|
|
7
|
-
[![Build Status](https://github.com/ankane/active_hll/workflows/build/badge.svg
|
7
|
+
[![Build Status](https://github.com/ankane/active_hll/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/active_hll/actions)
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
@@ -12,8 +12,8 @@ First, install the [hll extension](https://github.com/citusdata/postgresql-hll)
|
|
12
12
|
|
13
13
|
```sh
|
14
14
|
cd /tmp
|
15
|
-
curl -L https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.
|
16
|
-
cd postgresql-hll-2.
|
15
|
+
curl -L https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz | tar xz
|
16
|
+
cd postgresql-hll-2.18
|
17
17
|
make
|
18
18
|
make install # may need sudo
|
19
19
|
```
|
@@ -39,7 +39,7 @@ HLLs provide an approximate count of unique values (like unique visitors). By ro
|
|
39
39
|
Create a table with an `hll` column
|
40
40
|
|
41
41
|
```ruby
|
42
|
-
class CreateEventRollups < ActiveRecord::Migration[7.
|
42
|
+
class CreateEventRollups < ActiveRecord::Migration[7.2]
|
43
43
|
def change
|
44
44
|
create_table :event_rollups do |t|
|
45
45
|
t.date :time_bucket, index: {unique: true}
|
data/lib/active_hll/model.rb
CHANGED
@@ -20,65 +20,71 @@ module ActiveHll
|
|
20
20
|
# experimental
|
21
21
|
# doesn't work with non-default parameters
|
22
22
|
def hll_generate(values)
|
23
|
-
|
23
|
+
Utils.with_connection(self) do |connection|
|
24
|
+
parts = ["hll_empty()"]
|
24
25
|
|
25
|
-
|
26
|
-
|
27
|
-
|
26
|
+
values.each do |value|
|
27
|
+
parts << Utils.hll_hash_sql(connection, value)
|
28
|
+
end
|
28
29
|
|
29
|
-
|
30
|
-
|
30
|
+
result = connection.select_all("SELECT #{parts.join(" || ")}").rows[0][0]
|
31
|
+
ActiveHll::Type.new.deserialize(result)
|
32
|
+
end
|
31
33
|
end
|
32
34
|
|
33
35
|
def hll_add(attributes)
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
36
|
+
Utils.with_connection(self) do |connection|
|
37
|
+
set_clauses =
|
38
|
+
attributes.map do |attribute, values|
|
39
|
+
values = [values] unless values.is_a?(Array)
|
40
|
+
return 0 if values.empty?
|
41
|
+
|
42
|
+
quoted_column = connection.quote_column_name(attribute)
|
43
|
+
# possibly fetch parameters for the column in the future
|
44
|
+
# for now, users should set a default value on the column
|
45
|
+
parts = ["COALESCE(#{quoted_column}, hll_empty())"]
|
46
|
+
|
47
|
+
values.each do |value|
|
48
|
+
parts << Utils.hll_hash_sql(connection, value)
|
49
|
+
end
|
50
|
+
|
51
|
+
"#{quoted_column} = #{parts.join(" || ")}"
|
46
52
|
end
|
47
53
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
update_all(set_clauses.join(", "))
|
54
|
+
update_all(set_clauses.join(", "))
|
55
|
+
end
|
52
56
|
end
|
53
57
|
|
54
58
|
# experimental
|
55
59
|
def hll_upsert(attributes)
|
56
|
-
|
60
|
+
Utils.with_connection(self) do |connection|
|
61
|
+
hll_columns, other_columns = attributes.keys.partition { |a| columns_hash[a.to_s]&.type == :hll }
|
57
62
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
63
|
+
# important! raise if column detection fails
|
64
|
+
if hll_columns.empty?
|
65
|
+
raise ArgumentError, "No hll columns"
|
66
|
+
end
|
62
67
|
|
63
|
-
|
68
|
+
quoted_table = connection.quote_table_name(table_name)
|
64
69
|
|
65
|
-
|
66
|
-
|
67
|
-
|
70
|
+
quoted_hll_columns = hll_columns.map { |k| connection.quote_column_name(k) }
|
71
|
+
quoted_other_columns = other_columns.map { |k| connection.quote_column_name(k) }
|
72
|
+
quoted_columns = quoted_other_columns + quoted_hll_columns
|
68
73
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
74
|
+
hll_values =
|
75
|
+
hll_columns.map do |k|
|
76
|
+
vs = attributes[k]
|
77
|
+
vs = [vs] unless vs.is_a?(Array)
|
78
|
+
vs.map { |v| Utils.hll_hash_sql(connection, v) }.join(" || ")
|
79
|
+
end
|
80
|
+
other_values = other_columns.map { |k| connection.quote(attributes[k]) }
|
76
81
|
|
77
|
-
|
78
|
-
|
82
|
+
insert_values = other_values + hll_values.map { |v| "hll_empty()#{v.size > 0 ? " || #{v}" : ""}" }
|
83
|
+
update_values = quoted_hll_columns.zip(hll_values).map { |k, v| "#{k} = COALESCE(#{quoted_table}.#{k}, hll_empty())#{v.size > 0 ? " || #{v}" : ""}" }
|
79
84
|
|
80
|
-
|
81
|
-
|
85
|
+
sql = "INSERT INTO #{quoted_table} (#{quoted_columns.join(", ")}) VALUES (#{insert_values.join(", ")}) ON CONFLICT (#{quoted_other_columns.join(", ")}) DO UPDATE SET #{update_values.join(", ")}"
|
86
|
+
connection.exec_insert(sql, "#{name} Upsert")
|
87
|
+
end
|
82
88
|
end
|
83
89
|
end
|
84
90
|
|
@@ -89,8 +95,10 @@ module ActiveHll
|
|
89
95
|
end
|
90
96
|
|
91
97
|
def hll_count(attribute)
|
92
|
-
|
93
|
-
|
98
|
+
Utils.with_connection(self.class) do |connection|
|
99
|
+
quoted_column = connection.quote_column_name(attribute)
|
100
|
+
self.class.where(id: id).pluck("hll_cardinality(#{quoted_column})").first || 0.0
|
101
|
+
end
|
94
102
|
end
|
95
103
|
end
|
96
104
|
end
|
data/lib/active_hll/utils.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module ActiveHll
|
2
2
|
module Utils
|
3
3
|
class << self
|
4
|
-
def hll_hash_sql(
|
4
|
+
def hll_hash_sql(connection, value)
|
5
5
|
hash_function =
|
6
6
|
case value
|
7
7
|
when true, false
|
@@ -13,40 +13,46 @@ module ActiveHll
|
|
13
13
|
else
|
14
14
|
raise ArgumentError, "Unexpected type: #{value.class.name}"
|
15
15
|
end
|
16
|
-
quoted_value =
|
16
|
+
quoted_value = connection.quote(value)
|
17
17
|
"#{hash_function}(#{quoted_value})"
|
18
18
|
end
|
19
19
|
|
20
|
-
def
|
21
|
-
|
22
|
-
|
20
|
+
def with_connection(relation, &block)
|
21
|
+
relation.connection_pool.with_connection(&block)
|
22
|
+
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
rows << (result.column_types.empty? ? untyped_row : columns.each_with_index.map { |c, i| untyped_row[i] && result.column_types[c] ? result.column_types[c].deserialize(untyped_row[i]) : untyped_row[i] })
|
29
|
-
end
|
24
|
+
def hll_calculate(relation, operation, column, default_value:)
|
25
|
+
Utils.with_connection(relation) do |connection|
|
26
|
+
sql, relation, group_values = hll_calculate_sql(relation, connection, operation, column)
|
27
|
+
result = connection.select_all(sql)
|
30
28
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
rows[
|
29
|
+
# typecast
|
30
|
+
rows = []
|
31
|
+
columns = result.columns
|
32
|
+
result.rows.each do |untyped_row|
|
33
|
+
rows << (result.column_types.empty? ? untyped_row : columns.each_with_index.map { |c, i| untyped_row[i] && result.column_types[c] ? result.column_types[c].deserialize(untyped_row[i]) : untyped_row[i] })
|
36
34
|
end
|
37
35
|
|
38
|
-
|
36
|
+
result =
|
37
|
+
if group_values.any?
|
38
|
+
Hash[rows.map { |r| [r.size == 2 ? r[0] : r[0..-2], r[-1]] }]
|
39
|
+
else
|
40
|
+
rows[0] && rows[0][0]
|
41
|
+
end
|
42
|
+
|
43
|
+
result = Groupdate.process_result(relation, result, default_value: default_value) if defined?(Groupdate.process_result)
|
39
44
|
|
40
|
-
|
45
|
+
result
|
46
|
+
end
|
41
47
|
end
|
42
48
|
|
43
|
-
def hll_calculate_sql(relation, operation, column)
|
49
|
+
def hll_calculate_sql(relation, connection, operation, column)
|
44
50
|
# basic version of Active Record disallow_raw_sql!
|
45
51
|
# symbol = column (safe), Arel node = SQL (safe), other = untrusted
|
46
52
|
# matches table.column and column
|
47
53
|
unless column.is_a?(Symbol) || column.is_a?(Arel::Nodes::SqlLiteral)
|
48
54
|
column = column.to_s
|
49
|
-
unless /\A\w+(\.\w+)?\z/i.match(column)
|
55
|
+
unless /\A\w+(\.\w+)?\z/i.match?(column)
|
50
56
|
raise ActiveRecord::UnknownAttributeReference, "Query method called with non-attribute argument(s): #{column.inspect}. Use Arel.sql() for known-safe values."
|
51
57
|
end
|
52
58
|
end
|
@@ -54,7 +60,7 @@ module ActiveHll
|
|
54
60
|
# column resolution
|
55
61
|
node = relation.all.send(:arel_columns, [column]).first
|
56
62
|
node = Arel::Nodes::SqlLiteral.new(node) if node.is_a?(String)
|
57
|
-
column =
|
63
|
+
column = connection.visitor.accept(node, Arel::Collectors::SQLString.new).value
|
58
64
|
|
59
65
|
group_values = relation.all.group_values
|
60
66
|
|
data/lib/active_hll/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: active_hll
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-10-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '6'
|
19
|
+
version: '6.1'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '6'
|
26
|
+
version: '6.1'
|
27
27
|
description:
|
28
28
|
email: andrew@ankane.org
|
29
29
|
executables: []
|
@@ -53,14 +53,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
53
53
|
requirements:
|
54
54
|
- - ">="
|
55
55
|
- !ruby/object:Gem::Version
|
56
|
-
version: '
|
56
|
+
version: '3.1'
|
57
57
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
requirements: []
|
63
|
-
rubygems_version: 3.
|
63
|
+
rubygems_version: 3.5.16
|
64
64
|
signing_key:
|
65
65
|
specification_version: 4
|
66
66
|
summary: HyperLogLog for Rails and Postgres
|