active_hll 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +9 -3
- data/lib/active_hll/model.rb +30 -0
- data/lib/active_hll/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 20a8025b26b6bc13e6ff8a37b877130200b87ca60608a68d1d96ba167c8c1a76
|
4
|
+
data.tar.gz: 4f3d16a8da8a25554ee8d7f9e5d1d7e6d6621e083528fde848e18b88daa0bbbf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 856f6b94a988a75b4809b375f9f554c781f07958a89015c6b4cfa6eaf0c67bfaf12896762f1d940ef0c78f1bf26d674fb02a4f7bcf078205fbbd7a033a7bace0
|
7
|
+
data.tar.gz: c629cee7c89b0ee9b05ef65a453ee21be21f696bfd399ab7f29ae50f33aa369f6feeeb3ad41967e81329cadb1e70f78732e549c6541169e56e2f7f678fae7366
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -53,7 +53,7 @@ You can use [batch](#batch) and [stream](#stream) approaches to build HLLs
|
|
53
53
|
|
54
54
|
### Batch
|
55
55
|
|
56
|
-
|
56
|
+
To generate HLLs from existing data, use the `hll_agg` method
|
57
57
|
|
58
58
|
```ruby
|
59
59
|
hlls = Event.group_by_day(:created_at).hll_agg(:visitor_id)
|
@@ -74,12 +74,18 @@ For a large number of HLLs, use SQL to generate and upsert in a single statement
|
|
74
74
|
|
75
75
|
### Stream
|
76
76
|
|
77
|
-
|
77
|
+
To add new data to HLLs, use the `hll_add` method
|
78
78
|
|
79
79
|
```ruby
|
80
80
|
EventRollup.where(time_bucket: Date.current).hll_add(visitor_ids: ["visitor1", "visitor2"])
|
81
81
|
```
|
82
82
|
|
83
|
+
or the `hll_upsert` method (experimental)
|
84
|
+
|
85
|
+
```ruby
|
86
|
+
EventRollup.hll_upsert({time_bucket: Date.current, visitor_ids: ["visitor1", "visitor2"]})
|
87
|
+
```
|
88
|
+
|
83
89
|
## Querying
|
84
90
|
|
85
91
|
Get approximate unique values for a time range
|
@@ -132,7 +138,7 @@ There’s not a way to remove data from an HLL, so to delete data for a specific
|
|
132
138
|
|
133
139
|
## Hosted Postgres
|
134
140
|
|
135
|
-
The `hll` extension is available on
|
141
|
+
The `hll` extension is available on a number of [hosted providers](https://github.com/ankane/active_hll/issues/4).
|
136
142
|
|
137
143
|
## History
|
138
144
|
|
data/lib/active_hll/model.rb
CHANGED
@@ -50,6 +50,36 @@ module ActiveHll
|
|
50
50
|
|
51
51
|
update_all(set_clauses.join(", "))
|
52
52
|
end
|
53
|
+
|
54
|
+
# experimental
|
55
|
+
def hll_upsert(attributes)
|
56
|
+
hll_columns, other_columns = attributes.keys.partition { |a| columns_hash[a.to_s]&.type == :hll }
|
57
|
+
|
58
|
+
# important! raise if column detection fails
|
59
|
+
if hll_columns.empty?
|
60
|
+
raise ArgumentError, "No hll columns"
|
61
|
+
end
|
62
|
+
|
63
|
+
quoted_table = connection.quote_table_name(table_name)
|
64
|
+
|
65
|
+
quoted_hll_columns = hll_columns.map { |k| connection.quote_column_name(k) }
|
66
|
+
quoted_other_columns = other_columns.map { |k| connection.quote_column_name(k) }
|
67
|
+
quoted_columns = quoted_other_columns + quoted_hll_columns
|
68
|
+
|
69
|
+
hll_values =
|
70
|
+
hll_columns.map do |k|
|
71
|
+
vs = attributes[k]
|
72
|
+
vs = [vs] unless vs.is_a?(Array)
|
73
|
+
vs.map { |v| Utils.hll_hash_sql(self, v) }.join(" || ")
|
74
|
+
end
|
75
|
+
other_values = other_columns.map { |k| connection.quote(attributes[k]) }
|
76
|
+
|
77
|
+
insert_values = other_values + hll_values.map { |v| "hll_empty()#{v.size > 0 ? " || #{v}" : ""}" }
|
78
|
+
update_values = quoted_hll_columns.zip(hll_values).map { |k, v| "#{k} = COALESCE(#{quoted_table}.#{k}, hll_empty())#{v.size > 0 ? " || #{v}" : ""}" }
|
79
|
+
|
80
|
+
sql = "INSERT INTO #{quoted_table} (#{quoted_columns.join(", ")}) VALUES (#{insert_values.join(", ")}) ON CONFLICT (#{quoted_other_columns.join(", ")}) DO UPDATE SET #{update_values.join(", ")}"
|
81
|
+
connection.exec_insert(sql, "#{name} Upsert")
|
82
|
+
end
|
53
83
|
end
|
54
84
|
|
55
85
|
# doesn't update in-memory record attribute for performance
|
data/lib/active_hll/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: active_hll
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|