active_hll 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a9014f39ae62f6db1066d4933bc28485ecc57e788922d70437c4744edfb0c033
4
- data.tar.gz: 91320d88909c41e425966993e145b9ff8aa5b57c6c3ac0556dc3661519691968
3
+ metadata.gz: a357d4e04aad048c5abddf17021edde474073438414e6d85e3f48748ff1fc304
4
+ data.tar.gz: '03031590842b88853c7028e5a45174ff104f674ecf86e235f64fdb7120435c23'
5
5
  SHA512:
6
- metadata.gz: c55980654230a259a249a24b959c18b054d8e41773653bcc5411dbbbe3200fdd35c65dcfe729ed042943a8f2e81e26843c696d4989840fa43384622e3d56614f
7
- data.tar.gz: 5285ca0005787997dec664512b0bc6796201a8a4bd719e0ce0b9f6efa50096f3a3e7ade096db6ba943e9f84630b2a92c5d9b3a53a4a0b744cc4162e87d6c2990
6
+ metadata.gz: 2efc0fc07d80ae37f2063388151bfe3b0a7459d584a03b51661b1010f64b03d8e8b275e65664f859e77fb6c1fa48840ef3f7cae706f37c23afbefe6d358fe04f
7
+ data.tar.gz: 29c3beb46f85e6a58ed522a4503414b4cbb2f322c5563c26a07a38e2160d84058c3c79ace3dd17165f961b76d7313b8e5c4f906b52d52106fa8d5713bccb8ffc
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.2.0 (2024-06-24)
2
+
3
+ - Dropped support for Ruby < 3.1 and Active Record < 6.1
4
+
5
+ ## 0.1.1 (2023-01-29)
6
+
7
+ - Added experimental `hll_upsert` method
8
+
1
9
  ## 0.1.0 (2023-01-24)
2
10
 
3
11
  - First release
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2023 Andrew Kane
3
+ Copyright (c) 2023-2024 Andrew Kane
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  For fast, approximate count-distinct queries
6
6
 
7
- [![Build Status](https://github.com/ankane/active_hll/workflows/build/badge.svg?branch=master)](https://github.com/ankane/active_hll/actions)
7
+ [![Build Status](https://github.com/ankane/active_hll/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/active_hll/actions)
8
8
 
9
9
  ## Installation
10
10
 
@@ -12,8 +12,8 @@ First, install the [hll extension](https://github.com/citusdata/postgresql-hll)
12
12
 
13
13
  ```sh
14
14
  cd /tmp
15
- curl -L https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.17.tar.gz | tar xz
16
- cd postgresql-hll-2.17
15
+ curl -L https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz | tar xz
16
+ cd postgresql-hll-2.18
17
17
  make
18
18
  make install # may need sudo
19
19
  ```
@@ -39,7 +39,7 @@ HLLs provide an approximate count of unique values (like unique visitors). By ro
39
39
  Create a table with an `hll` column
40
40
 
41
41
  ```ruby
42
- class CreateEventRollups < ActiveRecord::Migration[7.0]
42
+ class CreateEventRollups < ActiveRecord::Migration[7.1]
43
43
  def change
44
44
  create_table :event_rollups do |t|
45
45
  t.date :time_bucket, index: {unique: true}
@@ -53,7 +53,7 @@ You can use [batch](#batch) and [stream](#stream) approaches to build HLLs
53
53
 
54
54
  ### Batch
55
55
 
56
- Use the `hll_agg` method to generate HLLs from existing data
56
+ To generate HLLs from existing data, use the `hll_agg` method
57
57
 
58
58
  ```ruby
59
59
  hlls = Event.group_by_day(:created_at).hll_agg(:visitor_id)
@@ -74,12 +74,18 @@ For a large number of HLLs, use SQL to generate and upsert in a single statement
74
74
 
75
75
  ### Stream
76
76
 
77
- Use the `hll_add` method to add new data to HLLs
77
+ To add new data to HLLs, use the `hll_add` method
78
78
 
79
79
  ```ruby
80
80
  EventRollup.where(time_bucket: Date.current).hll_add(visitor_ids: ["visitor1", "visitor2"])
81
81
  ```
82
82
 
83
+ or the `hll_upsert` method (experimental)
84
+
85
+ ```ruby
86
+ EventRollup.hll_upsert({time_bucket: Date.current, visitor_ids: ["visitor1", "visitor2"]})
87
+ ```
88
+
83
89
  ## Querying
84
90
 
85
91
  Get approximate unique values for a time range
@@ -132,7 +138,7 @@ There’s not a way to remove data from an HLL, so to delete data for a specific
132
138
 
133
139
  ## Hosted Postgres
134
140
 
135
- The `hll` extension is available on Amazon RDS, Google Cloud SQL, and DigitalOcean Managed Databases.
141
+ The `hll` extension is available on a number of [hosted providers](https://github.com/ankane/active_hll/issues/4).
136
142
 
137
143
  ## History
138
144
 
@@ -50,6 +50,36 @@ module ActiveHll
50
50
 
51
51
  update_all(set_clauses.join(", "))
52
52
  end
53
+
54
+ # experimental
55
+ def hll_upsert(attributes)
56
+ hll_columns, other_columns = attributes.keys.partition { |a| columns_hash[a.to_s]&.type == :hll }
57
+
58
+ # important! raise if column detection fails
59
+ if hll_columns.empty?
60
+ raise ArgumentError, "No hll columns"
61
+ end
62
+
63
+ quoted_table = connection.quote_table_name(table_name)
64
+
65
+ quoted_hll_columns = hll_columns.map { |k| connection.quote_column_name(k) }
66
+ quoted_other_columns = other_columns.map { |k| connection.quote_column_name(k) }
67
+ quoted_columns = quoted_other_columns + quoted_hll_columns
68
+
69
+ hll_values =
70
+ hll_columns.map do |k|
71
+ vs = attributes[k]
72
+ vs = [vs] unless vs.is_a?(Array)
73
+ vs.map { |v| Utils.hll_hash_sql(self, v) }.join(" || ")
74
+ end
75
+ other_values = other_columns.map { |k| connection.quote(attributes[k]) }
76
+
77
+ insert_values = other_values + hll_values.map { |v| "hll_empty()#{v.size > 0 ? " || #{v}" : ""}" }
78
+ update_values = quoted_hll_columns.zip(hll_values).map { |k, v| "#{k} = COALESCE(#{quoted_table}.#{k}, hll_empty())#{v.size > 0 ? " || #{v}" : ""}" }
79
+
80
+ sql = "INSERT INTO #{quoted_table} (#{quoted_columns.join(", ")}) VALUES (#{insert_values.join(", ")}) ON CONFLICT (#{quoted_other_columns.join(", ")}) DO UPDATE SET #{update_values.join(", ")}"
81
+ connection.exec_insert(sql, "#{name} Upsert")
82
+ end
53
83
  end
54
84
 
55
85
  # doesn't update in-memory record attribute for performance
@@ -46,7 +46,7 @@ module ActiveHll
46
46
  # matches table.column and column
47
47
  unless column.is_a?(Symbol) || column.is_a?(Arel::Nodes::SqlLiteral)
48
48
  column = column.to_s
49
- unless /\A\w+(\.\w+)?\z/i.match(column)
49
+ unless /\A\w+(\.\w+)?\z/i.match?(column)
50
50
  raise ActiveRecord::UnknownAttributeReference, "Query method called with non-attribute argument(s): #{column.inspect}. Use Arel.sql() for known-safe values."
51
51
  end
52
52
  end
@@ -1,3 +1,3 @@
1
1
  module ActiveHll
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: active_hll
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-24 00:00:00.000000000 Z
11
+ date: 2024-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '6'
19
+ version: '6.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '6'
26
+ version: '6.1'
27
27
  description:
28
28
  email: andrew@ankane.org
29
29
  executables: []
@@ -53,14 +53,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - ">="
55
55
  - !ruby/object:Gem::Version
56
- version: '2.7'
56
+ version: '3.1'
57
57
  required_rubygems_version: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  requirements: []
63
- rubygems_version: 3.4.1
63
+ rubygems_version: 3.5.11
64
64
  signing_key:
65
65
  specification_version: 4
66
66
  summary: HyperLogLog for Rails and Postgres