active_hll 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +13 -7
- data/lib/active_hll/model.rb +30 -0
- data/lib/active_hll/utils.rb +1 -1
- data/lib/active_hll/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a357d4e04aad048c5abddf17021edde474073438414e6d85e3f48748ff1fc304
|
4
|
+
data.tar.gz: '03031590842b88853c7028e5a45174ff104f674ecf86e235f64fdb7120435c23'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2efc0fc07d80ae37f2063388151bfe3b0a7459d584a03b51661b1010f64b03d8e8b275e65664f859e77fb6c1fa48840ef3f7cae706f37c23afbefe6d358fe04f
|
7
|
+
data.tar.gz: 29c3beb46f85e6a58ed522a4503414b4cbb2f322c5563c26a07a38e2160d84058c3c79ace3dd17165f961b76d7313b8e5c4f906b52d52106fa8d5713bccb8ffc
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
For fast, approximate count-distinct queries
|
6
6
|
|
7
|
-
[![Build Status](https://github.com/ankane/active_hll/workflows/build/badge.svg
|
7
|
+
[![Build Status](https://github.com/ankane/active_hll/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/active_hll/actions)
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
@@ -12,8 +12,8 @@ First, install the [hll extension](https://github.com/citusdata/postgresql-hll)
|
|
12
12
|
|
13
13
|
```sh
|
14
14
|
cd /tmp
|
15
|
-
curl -L https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.
|
16
|
-
cd postgresql-hll-2.
|
15
|
+
curl -L https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz | tar xz
|
16
|
+
cd postgresql-hll-2.18
|
17
17
|
make
|
18
18
|
make install # may need sudo
|
19
19
|
```
|
@@ -39,7 +39,7 @@ HLLs provide an approximate count of unique values (like unique visitors). By ro
|
|
39
39
|
Create a table with an `hll` column
|
40
40
|
|
41
41
|
```ruby
|
42
|
-
class CreateEventRollups < ActiveRecord::Migration[7.
|
42
|
+
class CreateEventRollups < ActiveRecord::Migration[7.1]
|
43
43
|
def change
|
44
44
|
create_table :event_rollups do |t|
|
45
45
|
t.date :time_bucket, index: {unique: true}
|
@@ -53,7 +53,7 @@ You can use [batch](#batch) and [stream](#stream) approaches to build HLLs
|
|
53
53
|
|
54
54
|
### Batch
|
55
55
|
|
56
|
-
|
56
|
+
To generate HLLs from existing data, use the `hll_agg` method
|
57
57
|
|
58
58
|
```ruby
|
59
59
|
hlls = Event.group_by_day(:created_at).hll_agg(:visitor_id)
|
@@ -74,12 +74,18 @@ For a large number of HLLs, use SQL to generate and upsert in a single statement
|
|
74
74
|
|
75
75
|
### Stream
|
76
76
|
|
77
|
-
|
77
|
+
To add new data to HLLs, use the `hll_add` method
|
78
78
|
|
79
79
|
```ruby
|
80
80
|
EventRollup.where(time_bucket: Date.current).hll_add(visitor_ids: ["visitor1", "visitor2"])
|
81
81
|
```
|
82
82
|
|
83
|
+
or the `hll_upsert` method (experimental)
|
84
|
+
|
85
|
+
```ruby
|
86
|
+
EventRollup.hll_upsert({time_bucket: Date.current, visitor_ids: ["visitor1", "visitor2"]})
|
87
|
+
```
|
88
|
+
|
83
89
|
## Querying
|
84
90
|
|
85
91
|
Get approximate unique values for a time range
|
@@ -132,7 +138,7 @@ There’s not a way to remove data from an HLL, so to delete data for a specific
|
|
132
138
|
|
133
139
|
## Hosted Postgres
|
134
140
|
|
135
|
-
The `hll` extension is available on
|
141
|
+
The `hll` extension is available on a number of [hosted providers](https://github.com/ankane/active_hll/issues/4).
|
136
142
|
|
137
143
|
## History
|
138
144
|
|
data/lib/active_hll/model.rb
CHANGED
@@ -50,6 +50,36 @@ module ActiveHll
|
|
50
50
|
|
51
51
|
update_all(set_clauses.join(", "))
|
52
52
|
end
|
53
|
+
|
54
|
+
# experimental
|
55
|
+
def hll_upsert(attributes)
|
56
|
+
hll_columns, other_columns = attributes.keys.partition { |a| columns_hash[a.to_s]&.type == :hll }
|
57
|
+
|
58
|
+
# important! raise if column detection fails
|
59
|
+
if hll_columns.empty?
|
60
|
+
raise ArgumentError, "No hll columns"
|
61
|
+
end
|
62
|
+
|
63
|
+
quoted_table = connection.quote_table_name(table_name)
|
64
|
+
|
65
|
+
quoted_hll_columns = hll_columns.map { |k| connection.quote_column_name(k) }
|
66
|
+
quoted_other_columns = other_columns.map { |k| connection.quote_column_name(k) }
|
67
|
+
quoted_columns = quoted_other_columns + quoted_hll_columns
|
68
|
+
|
69
|
+
hll_values =
|
70
|
+
hll_columns.map do |k|
|
71
|
+
vs = attributes[k]
|
72
|
+
vs = [vs] unless vs.is_a?(Array)
|
73
|
+
vs.map { |v| Utils.hll_hash_sql(self, v) }.join(" || ")
|
74
|
+
end
|
75
|
+
other_values = other_columns.map { |k| connection.quote(attributes[k]) }
|
76
|
+
|
77
|
+
insert_values = other_values + hll_values.map { |v| "hll_empty()#{v.size > 0 ? " || #{v}" : ""}" }
|
78
|
+
update_values = quoted_hll_columns.zip(hll_values).map { |k, v| "#{k} = COALESCE(#{quoted_table}.#{k}, hll_empty())#{v.size > 0 ? " || #{v}" : ""}" }
|
79
|
+
|
80
|
+
sql = "INSERT INTO #{quoted_table} (#{quoted_columns.join(", ")}) VALUES (#{insert_values.join(", ")}) ON CONFLICT (#{quoted_other_columns.join(", ")}) DO UPDATE SET #{update_values.join(", ")}"
|
81
|
+
connection.exec_insert(sql, "#{name} Upsert")
|
82
|
+
end
|
53
83
|
end
|
54
84
|
|
55
85
|
# doesn't update in-memory record attribute for performance
|
data/lib/active_hll/utils.rb
CHANGED
@@ -46,7 +46,7 @@ module ActiveHll
|
|
46
46
|
# matches table.column and column
|
47
47
|
unless column.is_a?(Symbol) || column.is_a?(Arel::Nodes::SqlLiteral)
|
48
48
|
column = column.to_s
|
49
|
-
unless /\A\w+(\.\w+)?\z/i.match(column)
|
49
|
+
unless /\A\w+(\.\w+)?\z/i.match?(column)
|
50
50
|
raise ActiveRecord::UnknownAttributeReference, "Query method called with non-attribute argument(s): #{column.inspect}. Use Arel.sql() for known-safe values."
|
51
51
|
end
|
52
52
|
end
|
data/lib/active_hll/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: active_hll
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-06-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '6'
|
19
|
+
version: '6.1'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '6'
|
26
|
+
version: '6.1'
|
27
27
|
description:
|
28
28
|
email: andrew@ankane.org
|
29
29
|
executables: []
|
@@ -53,14 +53,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
53
53
|
requirements:
|
54
54
|
- - ">="
|
55
55
|
- !ruby/object:Gem::Version
|
56
|
-
version: '
|
56
|
+
version: '3.1'
|
57
57
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
requirements: []
|
63
|
-
rubygems_version: 3.
|
63
|
+
rubygems_version: 3.5.11
|
64
64
|
signing_key:
|
65
65
|
specification_version: 4
|
66
66
|
summary: HyperLogLog for Rails and Postgres
|