active_hll 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a9014f39ae62f6db1066d4933bc28485ecc57e788922d70437c4744edfb0c033
4
- data.tar.gz: 91320d88909c41e425966993e145b9ff8aa5b57c6c3ac0556dc3661519691968
3
+ metadata.gz: 20a8025b26b6bc13e6ff8a37b877130200b87ca60608a68d1d96ba167c8c1a76
4
+ data.tar.gz: 4f3d16a8da8a25554ee8d7f9e5d1d7e6d6621e083528fde848e18b88daa0bbbf
5
5
  SHA512:
6
- metadata.gz: c55980654230a259a249a24b959c18b054d8e41773653bcc5411dbbbe3200fdd35c65dcfe729ed042943a8f2e81e26843c696d4989840fa43384622e3d56614f
7
- data.tar.gz: 5285ca0005787997dec664512b0bc6796201a8a4bd719e0ce0b9f6efa50096f3a3e7ade096db6ba943e9f84630b2a92c5d9b3a53a4a0b744cc4162e87d6c2990
6
+ metadata.gz: 856f6b94a988a75b4809b375f9f554c781f07958a89015c6b4cfa6eaf0c67bfaf12896762f1d940ef0c78f1bf26d674fb02a4f7bcf078205fbbd7a033a7bace0
7
+ data.tar.gz: c629cee7c89b0ee9b05ef65a453ee21be21f696bfd399ab7f29ae50f33aa369f6feeeb3ad41967e81329cadb1e70f78732e549c6541169e56e2f7f678fae7366
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.1.1 (2023-01-29)
2
+
3
+ - Added experimental `hll_upsert` method
4
+
1
5
  ## 0.1.0 (2023-01-24)
2
6
 
3
7
  - First release
data/README.md CHANGED
@@ -53,7 +53,7 @@ You can use [batch](#batch) and [stream](#stream) approaches to build HLLs
53
53
 
54
54
  ### Batch
55
55
 
56
- Use the `hll_agg` method to generate HLLs from existing data
56
+ To generate HLLs from existing data, use the `hll_agg` method
57
57
 
58
58
  ```ruby
59
59
  hlls = Event.group_by_day(:created_at).hll_agg(:visitor_id)
@@ -74,12 +74,18 @@ For a large number of HLLs, use SQL to generate and upsert in a single statement
74
74
 
75
75
  ### Stream
76
76
 
77
- Use the `hll_add` method to add new data to HLLs
77
+ To add new data to HLLs, use the `hll_add` method
78
78
 
79
79
  ```ruby
80
80
  EventRollup.where(time_bucket: Date.current).hll_add(visitor_ids: ["visitor1", "visitor2"])
81
81
  ```
82
82
 
83
+ or the `hll_upsert` method (experimental)
84
+
85
+ ```ruby
86
+ EventRollup.hll_upsert({time_bucket: Date.current, visitor_ids: ["visitor1", "visitor2"]})
87
+ ```
88
+
83
89
  ## Querying
84
90
 
85
91
  Get approximate unique values for a time range
@@ -132,7 +138,7 @@ There’s not a way to remove data from an HLL, so to delete data for a specific
132
138
 
133
139
  ## Hosted Postgres
134
140
 
135
- The `hll` extension is available on Amazon RDS, Google Cloud SQL, and DigitalOcean Managed Databases.
141
+ The `hll` extension is available on a number of [hosted providers](https://github.com/ankane/active_hll/issues/4).
136
142
 
137
143
  ## History
138
144
 
@@ -50,6 +50,36 @@ module ActiveHll
50
50
 
51
51
  update_all(set_clauses.join(", "))
52
52
  end
53
+
54
+ # experimental
55
+ def hll_upsert(attributes)
56
+ hll_columns, other_columns = attributes.keys.partition { |a| columns_hash[a.to_s]&.type == :hll }
57
+
58
+ # important! raise if column detection fails
59
+ if hll_columns.empty?
60
+ raise ArgumentError, "No hll columns"
61
+ end
62
+
63
+ quoted_table = connection.quote_table_name(table_name)
64
+
65
+ quoted_hll_columns = hll_columns.map { |k| connection.quote_column_name(k) }
66
+ quoted_other_columns = other_columns.map { |k| connection.quote_column_name(k) }
67
+ quoted_columns = quoted_other_columns + quoted_hll_columns
68
+
69
+ hll_values =
70
+ hll_columns.map do |k|
71
+ vs = attributes[k]
72
+ vs = [vs] unless vs.is_a?(Array)
73
+ vs.map { |v| Utils.hll_hash_sql(self, v) }.join(" || ")
74
+ end
75
+ other_values = other_columns.map { |k| connection.quote(attributes[k]) }
76
+
77
+ insert_values = other_values + hll_values.map { |v| "hll_empty()#{v.size > 0 ? " || #{v}" : ""}" }
78
+ update_values = quoted_hll_columns.zip(hll_values).map { |k, v| "#{k} = COALESCE(#{quoted_table}.#{k}, hll_empty())#{v.size > 0 ? " || #{v}" : ""}" }
79
+
80
+ sql = "INSERT INTO #{quoted_table} (#{quoted_columns.join(", ")}) VALUES (#{insert_values.join(", ")}) ON CONFLICT (#{quoted_other_columns.join(", ")}) DO UPDATE SET #{update_values.join(", ")}"
81
+ connection.exec_insert(sql, "#{name} Upsert")
82
+ end
53
83
  end
54
84
 
55
85
  # doesn't update in-memory record attribute for performance
@@ -1,3 +1,3 @@
1
1
  module ActiveHll
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: active_hll
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-24 00:00:00.000000000 Z
11
+ date: 2023-01-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord