active_hll 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +13 -7
- data/lib/active_hll/model.rb +30 -0
- data/lib/active_hll/utils.rb +1 -1
- data/lib/active_hll/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a357d4e04aad048c5abddf17021edde474073438414e6d85e3f48748ff1fc304
|
4
|
+
data.tar.gz: '03031590842b88853c7028e5a45174ff104f674ecf86e235f64fdb7120435c23'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2efc0fc07d80ae37f2063388151bfe3b0a7459d584a03b51661b1010f64b03d8e8b275e65664f859e77fb6c1fa48840ef3f7cae706f37c23afbefe6d358fe04f
|
7
|
+
data.tar.gz: 29c3beb46f85e6a58ed522a4503414b4cbb2f322c5563c26a07a38e2160d84058c3c79ace3dd17165f961b76d7313b8e5c4f906b52d52106fa8d5713bccb8ffc
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
For fast, approximate count-distinct queries
|
6
6
|
|
7
|
-
[](https://github.com/ankane/active_hll/actions)
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
@@ -12,8 +12,8 @@ First, install the [hll extension](https://github.com/citusdata/postgresql-hll)
|
|
12
12
|
|
13
13
|
```sh
|
14
14
|
cd /tmp
|
15
|
-
curl -L https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.
|
16
|
-
cd postgresql-hll-2.
|
15
|
+
curl -L https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz | tar xz
|
16
|
+
cd postgresql-hll-2.18
|
17
17
|
make
|
18
18
|
make install # may need sudo
|
19
19
|
```
|
@@ -39,7 +39,7 @@ HLLs provide an approximate count of unique values (like unique visitors). By ro
|
|
39
39
|
Create a table with an `hll` column
|
40
40
|
|
41
41
|
```ruby
|
42
|
-
class CreateEventRollups < ActiveRecord::Migration[7.
|
42
|
+
class CreateEventRollups < ActiveRecord::Migration[7.1]
|
43
43
|
def change
|
44
44
|
create_table :event_rollups do |t|
|
45
45
|
t.date :time_bucket, index: {unique: true}
|
@@ -53,7 +53,7 @@ You can use [batch](#batch) and [stream](#stream) approaches to build HLLs
|
|
53
53
|
|
54
54
|
### Batch
|
55
55
|
|
56
|
-
|
56
|
+
To generate HLLs from existing data, use the `hll_agg` method
|
57
57
|
|
58
58
|
```ruby
|
59
59
|
hlls = Event.group_by_day(:created_at).hll_agg(:visitor_id)
|
@@ -74,12 +74,18 @@ For a large number of HLLs, use SQL to generate and upsert in a single statement
|
|
74
74
|
|
75
75
|
### Stream
|
76
76
|
|
77
|
-
|
77
|
+
To add new data to HLLs, use the `hll_add` method
|
78
78
|
|
79
79
|
```ruby
|
80
80
|
EventRollup.where(time_bucket: Date.current).hll_add(visitor_ids: ["visitor1", "visitor2"])
|
81
81
|
```
|
82
82
|
|
83
|
+
or the `hll_upsert` method (experimental)
|
84
|
+
|
85
|
+
```ruby
|
86
|
+
EventRollup.hll_upsert({time_bucket: Date.current, visitor_ids: ["visitor1", "visitor2"]})
|
87
|
+
```
|
88
|
+
|
83
89
|
## Querying
|
84
90
|
|
85
91
|
Get approximate unique values for a time range
|
@@ -132,7 +138,7 @@ There’s not a way to remove data from an HLL, so to delete data for a specific
|
|
132
138
|
|
133
139
|
## Hosted Postgres
|
134
140
|
|
135
|
-
The `hll` extension is available on
|
141
|
+
The `hll` extension is available on a number of [hosted providers](https://github.com/ankane/active_hll/issues/4).
|
136
142
|
|
137
143
|
## History
|
138
144
|
|
data/lib/active_hll/model.rb
CHANGED
@@ -50,6 +50,36 @@ module ActiveHll
|
|
50
50
|
|
51
51
|
update_all(set_clauses.join(", "))
|
52
52
|
end
|
53
|
+
|
54
|
+
# experimental
|
55
|
+
def hll_upsert(attributes)
|
56
|
+
hll_columns, other_columns = attributes.keys.partition { |a| columns_hash[a.to_s]&.type == :hll }
|
57
|
+
|
58
|
+
# important! raise if column detection fails
|
59
|
+
if hll_columns.empty?
|
60
|
+
raise ArgumentError, "No hll columns"
|
61
|
+
end
|
62
|
+
|
63
|
+
quoted_table = connection.quote_table_name(table_name)
|
64
|
+
|
65
|
+
quoted_hll_columns = hll_columns.map { |k| connection.quote_column_name(k) }
|
66
|
+
quoted_other_columns = other_columns.map { |k| connection.quote_column_name(k) }
|
67
|
+
quoted_columns = quoted_other_columns + quoted_hll_columns
|
68
|
+
|
69
|
+
hll_values =
|
70
|
+
hll_columns.map do |k|
|
71
|
+
vs = attributes[k]
|
72
|
+
vs = [vs] unless vs.is_a?(Array)
|
73
|
+
vs.map { |v| Utils.hll_hash_sql(self, v) }.join(" || ")
|
74
|
+
end
|
75
|
+
other_values = other_columns.map { |k| connection.quote(attributes[k]) }
|
76
|
+
|
77
|
+
insert_values = other_values + hll_values.map { |v| "hll_empty()#{v.size > 0 ? " || #{v}" : ""}" }
|
78
|
+
update_values = quoted_hll_columns.zip(hll_values).map { |k, v| "#{k} = COALESCE(#{quoted_table}.#{k}, hll_empty())#{v.size > 0 ? " || #{v}" : ""}" }
|
79
|
+
|
80
|
+
sql = "INSERT INTO #{quoted_table} (#{quoted_columns.join(", ")}) VALUES (#{insert_values.join(", ")}) ON CONFLICT (#{quoted_other_columns.join(", ")}) DO UPDATE SET #{update_values.join(", ")}"
|
81
|
+
connection.exec_insert(sql, "#{name} Upsert")
|
82
|
+
end
|
53
83
|
end
|
54
84
|
|
55
85
|
# doesn't update in-memory record attribute for performance
|
data/lib/active_hll/utils.rb
CHANGED
@@ -46,7 +46,7 @@ module ActiveHll
|
|
46
46
|
# matches table.column and column
|
47
47
|
unless column.is_a?(Symbol) || column.is_a?(Arel::Nodes::SqlLiteral)
|
48
48
|
column = column.to_s
|
49
|
-
unless /\A\w+(\.\w+)?\z/i.match(column)
|
49
|
+
unless /\A\w+(\.\w+)?\z/i.match?(column)
|
50
50
|
raise ActiveRecord::UnknownAttributeReference, "Query method called with non-attribute argument(s): #{column.inspect}. Use Arel.sql() for known-safe values."
|
51
51
|
end
|
52
52
|
end
|
data/lib/active_hll/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: active_hll
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-06-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '6'
|
19
|
+
version: '6.1'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '6'
|
26
|
+
version: '6.1'
|
27
27
|
description:
|
28
28
|
email: andrew@ankane.org
|
29
29
|
executables: []
|
@@ -53,14 +53,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
53
53
|
requirements:
|
54
54
|
- - ">="
|
55
55
|
- !ruby/object:Gem::Version
|
56
|
-
version: '
|
56
|
+
version: '3.1'
|
57
57
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
requirements: []
|
63
|
-
rubygems_version: 3.
|
63
|
+
rubygems_version: 3.5.11
|
64
64
|
signing_key:
|
65
65
|
specification_version: 4
|
66
66
|
summary: HyperLogLog for Rails and Postgres
|