active_hll 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 20a8025b26b6bc13e6ff8a37b877130200b87ca60608a68d1d96ba167c8c1a76
4
- data.tar.gz: 4f3d16a8da8a25554ee8d7f9e5d1d7e6d6621e083528fde848e18b88daa0bbbf
3
+ metadata.gz: 49fecb9c7cb1cdcf399ce9ae00e9e637d9d3c0c3789ad02da9bef4e2977bc2f5
4
+ data.tar.gz: d02131c319a338575fc215397a95d153648bc6c5b390c453f67d0509b1a3ee6f
5
5
  SHA512:
6
- metadata.gz: 856f6b94a988a75b4809b375f9f554c781f07958a89015c6b4cfa6eaf0c67bfaf12896762f1d940ef0c78f1bf26d674fb02a4f7bcf078205fbbd7a033a7bace0
7
- data.tar.gz: c629cee7c89b0ee9b05ef65a453ee21be21f696bfd399ab7f29ae50f33aa369f6feeeb3ad41967e81329cadb1e70f78732e549c6541169e56e2f7f678fae7366
6
+ metadata.gz: acc8c253b0d5ab708e73459ed35506e6d009f3c8f709c9f82a1b44ca77da2ac0a8431767f8d6a9d15faf5a17dbf20db1d7093008cad8e118979f1ffcf93b61ef
7
+ data.tar.gz: 1375c9ac1f6d6df024dfc0ccdf9304f1245a8d5510e8e69b34bf1d51fa49f6fb2bec7cf137476492c68d1b29f3344d947ba3b1cc26e762a53a46bf0c2681e7a1
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.2.1 (2024-10-07)
2
+
3
+ - Fixed connection leasing for Active Record 7.2+
4
+
5
+ ## 0.2.0 (2024-06-24)
6
+
7
+ - Dropped support for Ruby < 3.1 and Active Record < 6.1
8
+
1
9
  ## 0.1.1 (2023-01-29)
2
10
 
3
11
  - Added experimental `hll_upsert` method
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2023 Andrew Kane
3
+ Copyright (c) 2023-2024 Andrew Kane
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
 
5
5
  For fast, approximate count-distinct queries
6
6
 
7
- [![Build Status](https://github.com/ankane/active_hll/workflows/build/badge.svg?branch=master)](https://github.com/ankane/active_hll/actions)
7
+ [![Build Status](https://github.com/ankane/active_hll/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/active_hll/actions)
8
8
 
9
9
  ## Installation
10
10
 
@@ -12,8 +12,8 @@ First, install the [hll extension](https://github.com/citusdata/postgresql-hll)
12
12
 
13
13
  ```sh
14
14
  cd /tmp
15
- curl -L https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.17.tar.gz | tar xz
16
- cd postgresql-hll-2.17
15
+ curl -L https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz | tar xz
16
+ cd postgresql-hll-2.18
17
17
  make
18
18
  make install # may need sudo
19
19
  ```
@@ -39,7 +39,7 @@ HLLs provide an approximate count of unique values (like unique visitors). By ro
39
39
  Create a table with an `hll` column
40
40
 
41
41
  ```ruby
42
- class CreateEventRollups < ActiveRecord::Migration[7.0]
42
+ class CreateEventRollups < ActiveRecord::Migration[7.2]
43
43
  def change
44
44
  create_table :event_rollups do |t|
45
45
  t.date :time_bucket, index: {unique: true}
@@ -20,65 +20,71 @@ module ActiveHll
20
20
  # experimental
21
21
  # doesn't work with non-default parameters
22
22
  def hll_generate(values)
23
- parts = ["hll_empty()"]
23
+ Utils.with_connection(self) do |connection|
24
+ parts = ["hll_empty()"]
24
25
 
25
- values.each do |value|
26
- parts << Utils.hll_hash_sql(self, value)
27
- end
26
+ values.each do |value|
27
+ parts << Utils.hll_hash_sql(connection, value)
28
+ end
28
29
 
29
- result = connection.select_all("SELECT #{parts.join(" || ")}").rows[0][0]
30
- ActiveHll::Type.new.deserialize(result)
30
+ result = connection.select_all("SELECT #{parts.join(" || ")}").rows[0][0]
31
+ ActiveHll::Type.new.deserialize(result)
32
+ end
31
33
  end
32
34
 
33
35
  def hll_add(attributes)
34
- set_clauses =
35
- attributes.map do |attribute, values|
36
- values = [values] unless values.is_a?(Array)
37
- return 0 if values.empty?
38
-
39
- quoted_column = connection.quote_column_name(attribute)
40
- # possibly fetch parameters for the column in the future
41
- # for now, users should set a default value on the column
42
- parts = ["COALESCE(#{quoted_column}, hll_empty())"]
43
-
44
- values.each do |value|
45
- parts << Utils.hll_hash_sql(self, value)
36
+ Utils.with_connection(self) do |connection|
37
+ set_clauses =
38
+ attributes.map do |attribute, values|
39
+ values = [values] unless values.is_a?(Array)
40
+ return 0 if values.empty?
41
+
42
+ quoted_column = connection.quote_column_name(attribute)
43
+ # possibly fetch parameters for the column in the future
44
+ # for now, users should set a default value on the column
45
+ parts = ["COALESCE(#{quoted_column}, hll_empty())"]
46
+
47
+ values.each do |value|
48
+ parts << Utils.hll_hash_sql(connection, value)
49
+ end
50
+
51
+ "#{quoted_column} = #{parts.join(" || ")}"
46
52
  end
47
53
 
48
- "#{quoted_column} = #{parts.join(" || ")}"
49
- end
50
-
51
- update_all(set_clauses.join(", "))
54
+ update_all(set_clauses.join(", "))
55
+ end
52
56
  end
53
57
 
54
58
  # experimental
55
59
  def hll_upsert(attributes)
56
- hll_columns, other_columns = attributes.keys.partition { |a| columns_hash[a.to_s]&.type == :hll }
60
+ Utils.with_connection(self) do |connection|
61
+ hll_columns, other_columns = attributes.keys.partition { |a| columns_hash[a.to_s]&.type == :hll }
57
62
 
58
- # important! raise if column detection fails
59
- if hll_columns.empty?
60
- raise ArgumentError, "No hll columns"
61
- end
63
+ # important! raise if column detection fails
64
+ if hll_columns.empty?
65
+ raise ArgumentError, "No hll columns"
66
+ end
62
67
 
63
- quoted_table = connection.quote_table_name(table_name)
68
+ quoted_table = connection.quote_table_name(table_name)
64
69
 
65
- quoted_hll_columns = hll_columns.map { |k| connection.quote_column_name(k) }
66
- quoted_other_columns = other_columns.map { |k| connection.quote_column_name(k) }
67
- quoted_columns = quoted_other_columns + quoted_hll_columns
70
+ quoted_hll_columns = hll_columns.map { |k| connection.quote_column_name(k) }
71
+ quoted_other_columns = other_columns.map { |k| connection.quote_column_name(k) }
72
+ quoted_columns = quoted_other_columns + quoted_hll_columns
68
73
 
69
- hll_values =
70
- hll_columns.map do |k|
71
- vs = attributes[k]
72
- vs = [vs] unless vs.is_a?(Array)
73
- vs.map { |v| Utils.hll_hash_sql(self, v) }.join(" || ")
74
- end
75
- other_values = other_columns.map { |k| connection.quote(attributes[k]) }
74
+ hll_values =
75
+ hll_columns.map do |k|
76
+ vs = attributes[k]
77
+ vs = [vs] unless vs.is_a?(Array)
78
+ vs.map { |v| Utils.hll_hash_sql(connection, v) }.join(" || ")
79
+ end
80
+ other_values = other_columns.map { |k| connection.quote(attributes[k]) }
76
81
 
77
- insert_values = other_values + hll_values.map { |v| "hll_empty()#{v.size > 0 ? " || #{v}" : ""}" }
78
- update_values = quoted_hll_columns.zip(hll_values).map { |k, v| "#{k} = COALESCE(#{quoted_table}.#{k}, hll_empty())#{v.size > 0 ? " || #{v}" : ""}" }
82
+ insert_values = other_values + hll_values.map { |v| "hll_empty()#{v.size > 0 ? " || #{v}" : ""}" }
83
+ update_values = quoted_hll_columns.zip(hll_values).map { |k, v| "#{k} = COALESCE(#{quoted_table}.#{k}, hll_empty())#{v.size > 0 ? " || #{v}" : ""}" }
79
84
 
80
- sql = "INSERT INTO #{quoted_table} (#{quoted_columns.join(", ")}) VALUES (#{insert_values.join(", ")}) ON CONFLICT (#{quoted_other_columns.join(", ")}) DO UPDATE SET #{update_values.join(", ")}"
81
- connection.exec_insert(sql, "#{name} Upsert")
85
+ sql = "INSERT INTO #{quoted_table} (#{quoted_columns.join(", ")}) VALUES (#{insert_values.join(", ")}) ON CONFLICT (#{quoted_other_columns.join(", ")}) DO UPDATE SET #{update_values.join(", ")}"
86
+ connection.exec_insert(sql, "#{name} Upsert")
87
+ end
82
88
  end
83
89
  end
84
90
 
@@ -89,8 +95,10 @@ module ActiveHll
89
95
  end
90
96
 
91
97
  def hll_count(attribute)
92
- quoted_column = self.class.connection.quote_column_name(attribute)
93
- self.class.where(id: id).pluck("hll_cardinality(#{quoted_column})").first || 0.0
98
+ Utils.with_connection(self.class) do |connection|
99
+ quoted_column = connection.quote_column_name(attribute)
100
+ self.class.where(id: id).pluck("hll_cardinality(#{quoted_column})").first || 0.0
101
+ end
94
102
  end
95
103
  end
96
104
  end
@@ -1,7 +1,7 @@
1
1
  module ActiveHll
2
2
  module Utils
3
3
  class << self
4
- def hll_hash_sql(klass, value)
4
+ def hll_hash_sql(connection, value)
5
5
  hash_function =
6
6
  case value
7
7
  when true, false
@@ -13,40 +13,46 @@ module ActiveHll
13
13
  else
14
14
  raise ArgumentError, "Unexpected type: #{value.class.name}"
15
15
  end
16
- quoted_value = klass.connection.quote(value)
16
+ quoted_value = connection.quote(value)
17
17
  "#{hash_function}(#{quoted_value})"
18
18
  end
19
19
 
20
- def hll_calculate(relation, operation, column, default_value:)
21
- sql, relation, group_values = hll_calculate_sql(relation, operation, column)
22
- result = relation.connection.select_all(sql)
20
+ def with_connection(relation, &block)
21
+ relation.connection_pool.with_connection(&block)
22
+ end
23
23
 
24
- # typecast
25
- rows = []
26
- columns = result.columns
27
- result.rows.each do |untyped_row|
28
- rows << (result.column_types.empty? ? untyped_row : columns.each_with_index.map { |c, i| untyped_row[i] && result.column_types[c] ? result.column_types[c].deserialize(untyped_row[i]) : untyped_row[i] })
29
- end
24
+ def hll_calculate(relation, operation, column, default_value:)
25
+ Utils.with_connection(relation) do |connection|
26
+ sql, relation, group_values = hll_calculate_sql(relation, connection, operation, column)
27
+ result = connection.select_all(sql)
30
28
 
31
- result =
32
- if group_values.any?
33
- Hash[rows.map { |r| [r.size == 2 ? r[0] : r[0..-2], r[-1]] }]
34
- else
35
- rows[0] && rows[0][0]
29
+ # typecast
30
+ rows = []
31
+ columns = result.columns
32
+ result.rows.each do |untyped_row|
33
+ rows << (result.column_types.empty? ? untyped_row : columns.each_with_index.map { |c, i| untyped_row[i] && result.column_types[c] ? result.column_types[c].deserialize(untyped_row[i]) : untyped_row[i] })
36
34
  end
37
35
 
38
- result = Groupdate.process_result(relation, result, default_value: default_value) if defined?(Groupdate.process_result)
36
+ result =
37
+ if group_values.any?
38
+ Hash[rows.map { |r| [r.size == 2 ? r[0] : r[0..-2], r[-1]] }]
39
+ else
40
+ rows[0] && rows[0][0]
41
+ end
42
+
43
+ result = Groupdate.process_result(relation, result, default_value: default_value) if defined?(Groupdate.process_result)
39
44
 
40
- result
45
+ result
46
+ end
41
47
  end
42
48
 
43
- def hll_calculate_sql(relation, operation, column)
49
+ def hll_calculate_sql(relation, connection, operation, column)
44
50
  # basic version of Active Record disallow_raw_sql!
45
51
  # symbol = column (safe), Arel node = SQL (safe), other = untrusted
46
52
  # matches table.column and column
47
53
  unless column.is_a?(Symbol) || column.is_a?(Arel::Nodes::SqlLiteral)
48
54
  column = column.to_s
49
- unless /\A\w+(\.\w+)?\z/i.match(column)
55
+ unless /\A\w+(\.\w+)?\z/i.match?(column)
50
56
  raise ActiveRecord::UnknownAttributeReference, "Query method called with non-attribute argument(s): #{column.inspect}. Use Arel.sql() for known-safe values."
51
57
  end
52
58
  end
@@ -54,7 +60,7 @@ module ActiveHll
54
60
  # column resolution
55
61
  node = relation.all.send(:arel_columns, [column]).first
56
62
  node = Arel::Nodes::SqlLiteral.new(node) if node.is_a?(String)
57
- column = relation.connection.visitor.accept(node, Arel::Collectors::SQLString.new).value
63
+ column = connection.visitor.accept(node, Arel::Collectors::SQLString.new).value
58
64
 
59
65
  group_values = relation.all.group_values
60
66
 
@@ -1,3 +1,3 @@
1
1
  module ActiveHll
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: active_hll
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-30 00:00:00.000000000 Z
11
+ date: 2024-10-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '6'
19
+ version: '6.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '6'
26
+ version: '6.1'
27
27
  description:
28
28
  email: andrew@ankane.org
29
29
  executables: []
@@ -53,14 +53,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - ">="
55
55
  - !ruby/object:Gem::Version
56
- version: '2.7'
56
+ version: '3.1'
57
57
  required_rubygems_version: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  requirements: []
63
- rubygems_version: 3.4.1
63
+ rubygems_version: 3.5.16
64
64
  signing_key:
65
65
  specification_version: 4
66
66
  summary: HyperLogLog for Rails and Postgres