active_hll 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +1 -1
- data/lib/active_hll/model.rb +52 -44
- data/lib/active_hll/utils.rb +26 -20
- data/lib/active_hll/version.rb +1 -1
- metadata +3 -3
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 49fecb9c7cb1cdcf399ce9ae00e9e637d9d3c0c3789ad02da9bef4e2977bc2f5
         | 
| 4 | 
            +
              data.tar.gz: d02131c319a338575fc215397a95d153648bc6c5b390c453f67d0509b1a3ee6f
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: acc8c253b0d5ab708e73459ed35506e6d009f3c8f709c9f82a1b44ca77da2ac0a8431767f8d6a9d15faf5a17dbf20db1d7093008cad8e118979f1ffcf93b61ef
         | 
| 7 | 
            +
              data.tar.gz: 1375c9ac1f6d6df024dfc0ccdf9304f1245a8d5510e8e69b34bf1d51fa49f6fb2bec7cf137476492c68d1b29f3344d947ba3b1cc26e762a53a46bf0c2681e7a1
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | @@ -39,7 +39,7 @@ HLLs provide an approximate count of unique values (like unique visitors). By ro | |
| 39 39 | 
             
            Create a table with an `hll` column
         | 
| 40 40 |  | 
| 41 41 | 
             
            ```ruby
         | 
| 42 | 
            -
            class CreateEventRollups < ActiveRecord::Migration[7. | 
| 42 | 
            +
            class CreateEventRollups < ActiveRecord::Migration[7.2]
         | 
| 43 43 | 
             
              def change
         | 
| 44 44 | 
             
                create_table :event_rollups do |t|
         | 
| 45 45 | 
             
                  t.date :time_bucket, index: {unique: true}
         | 
    
        data/lib/active_hll/model.rb
    CHANGED
    
    | @@ -20,65 +20,71 @@ module ActiveHll | |
| 20 20 | 
             
                  # experimental
         | 
| 21 21 | 
             
                  # doesn't work with non-default parameters
         | 
| 22 22 | 
             
                  def hll_generate(values)
         | 
| 23 | 
            -
                     | 
| 23 | 
            +
                    Utils.with_connection(self) do |connection|
         | 
| 24 | 
            +
                      parts = ["hll_empty()"]
         | 
| 24 25 |  | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 26 | 
            +
                      values.each do |value|
         | 
| 27 | 
            +
                        parts << Utils.hll_hash_sql(connection, value)
         | 
| 28 | 
            +
                      end
         | 
| 28 29 |  | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 30 | 
            +
                      result = connection.select_all("SELECT #{parts.join(" || ")}").rows[0][0]
         | 
| 31 | 
            +
                      ActiveHll::Type.new.deserialize(result)
         | 
| 32 | 
            +
                    end
         | 
| 31 33 | 
             
                  end
         | 
| 32 34 |  | 
| 33 35 | 
             
                  def hll_add(attributes)
         | 
| 34 | 
            -
                     | 
| 35 | 
            -
                       | 
| 36 | 
            -
                         | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
                           | 
| 36 | 
            +
                    Utils.with_connection(self) do |connection|
         | 
| 37 | 
            +
                      set_clauses =
         | 
| 38 | 
            +
                        attributes.map do |attribute, values|
         | 
| 39 | 
            +
                          values = [values] unless values.is_a?(Array)
         | 
| 40 | 
            +
                          return 0 if values.empty?
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                          quoted_column = connection.quote_column_name(attribute)
         | 
| 43 | 
            +
                          # possibly fetch parameters for the column in the future
         | 
| 44 | 
            +
                          # for now, users should set a default value on the column
         | 
| 45 | 
            +
                          parts = ["COALESCE(#{quoted_column}, hll_empty())"]
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                          values.each do |value|
         | 
| 48 | 
            +
                            parts << Utils.hll_hash_sql(connection, value)
         | 
| 49 | 
            +
                          end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                          "#{quoted_column} = #{parts.join(" || ")}"
         | 
| 46 52 | 
             
                        end
         | 
| 47 53 |  | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 51 | 
            -
                    update_all(set_clauses.join(", "))
         | 
| 54 | 
            +
                      update_all(set_clauses.join(", "))
         | 
| 55 | 
            +
                    end
         | 
| 52 56 | 
             
                  end
         | 
| 53 57 |  | 
| 54 58 | 
             
                  # experimental
         | 
| 55 59 | 
             
                  def hll_upsert(attributes)
         | 
| 56 | 
            -
                     | 
| 60 | 
            +
                    Utils.with_connection(self) do |connection|
         | 
| 61 | 
            +
                      hll_columns, other_columns = attributes.keys.partition { |a| columns_hash[a.to_s]&.type == :hll }
         | 
| 57 62 |  | 
| 58 | 
            -
             | 
| 59 | 
            -
             | 
| 60 | 
            -
             | 
| 61 | 
            -
             | 
| 63 | 
            +
                      # important! raise if column detection fails
         | 
| 64 | 
            +
                      if hll_columns.empty?
         | 
| 65 | 
            +
                        raise ArgumentError, "No hll columns"
         | 
| 66 | 
            +
                      end
         | 
| 62 67 |  | 
| 63 | 
            -
             | 
| 68 | 
            +
                      quoted_table = connection.quote_table_name(table_name)
         | 
| 64 69 |  | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 67 | 
            -
             | 
| 70 | 
            +
                      quoted_hll_columns = hll_columns.map { |k| connection.quote_column_name(k) }
         | 
| 71 | 
            +
                      quoted_other_columns = other_columns.map { |k| connection.quote_column_name(k) }
         | 
| 72 | 
            +
                      quoted_columns = quoted_other_columns + quoted_hll_columns
         | 
| 68 73 |  | 
| 69 | 
            -
             | 
| 70 | 
            -
             | 
| 71 | 
            -
             | 
| 72 | 
            -
             | 
| 73 | 
            -
             | 
| 74 | 
            -
             | 
| 75 | 
            -
             | 
| 74 | 
            +
                      hll_values =
         | 
| 75 | 
            +
                        hll_columns.map do |k|
         | 
| 76 | 
            +
                          vs = attributes[k]
         | 
| 77 | 
            +
                          vs = [vs] unless vs.is_a?(Array)
         | 
| 78 | 
            +
                          vs.map { |v| Utils.hll_hash_sql(connection, v) }.join(" || ")
         | 
| 79 | 
            +
                        end
         | 
| 80 | 
            +
                      other_values = other_columns.map { |k| connection.quote(attributes[k]) }
         | 
| 76 81 |  | 
| 77 | 
            -
             | 
| 78 | 
            -
             | 
| 82 | 
            +
                      insert_values = other_values + hll_values.map { |v| "hll_empty()#{v.size > 0 ? " || #{v}" : ""}" }
         | 
| 83 | 
            +
                      update_values = quoted_hll_columns.zip(hll_values).map { |k, v| "#{k} = COALESCE(#{quoted_table}.#{k}, hll_empty())#{v.size > 0 ? " || #{v}" : ""}" }
         | 
| 79 84 |  | 
| 80 | 
            -
             | 
| 81 | 
            -
             | 
| 85 | 
            +
                      sql = "INSERT INTO #{quoted_table} (#{quoted_columns.join(", ")}) VALUES (#{insert_values.join(", ")}) ON CONFLICT (#{quoted_other_columns.join(", ")}) DO UPDATE SET #{update_values.join(", ")}"
         | 
| 86 | 
            +
                      connection.exec_insert(sql, "#{name} Upsert")
         | 
| 87 | 
            +
                    end
         | 
| 82 88 | 
             
                  end
         | 
| 83 89 | 
             
                end
         | 
| 84 90 |  | 
| @@ -89,8 +95,10 @@ module ActiveHll | |
| 89 95 | 
             
                end
         | 
| 90 96 |  | 
| 91 97 | 
             
                def hll_count(attribute)
         | 
| 92 | 
            -
                   | 
| 93 | 
            -
             | 
| 98 | 
            +
                  Utils.with_connection(self.class) do |connection|
         | 
| 99 | 
            +
                    quoted_column = connection.quote_column_name(attribute)
         | 
| 100 | 
            +
                    self.class.where(id: id).pluck("hll_cardinality(#{quoted_column})").first || 0.0
         | 
| 101 | 
            +
                  end
         | 
| 94 102 | 
             
                end
         | 
| 95 103 | 
             
              end
         | 
| 96 104 | 
             
            end
         | 
    
        data/lib/active_hll/utils.rb
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            module ActiveHll
         | 
| 2 2 | 
             
              module Utils
         | 
| 3 3 | 
             
                class << self
         | 
| 4 | 
            -
                  def hll_hash_sql( | 
| 4 | 
            +
                  def hll_hash_sql(connection, value)
         | 
| 5 5 | 
             
                    hash_function =
         | 
| 6 6 | 
             
                      case value
         | 
| 7 7 | 
             
                      when true, false
         | 
| @@ -13,34 +13,40 @@ module ActiveHll | |
| 13 13 | 
             
                      else
         | 
| 14 14 | 
             
                        raise ArgumentError, "Unexpected type: #{value.class.name}"
         | 
| 15 15 | 
             
                      end
         | 
| 16 | 
            -
                    quoted_value =  | 
| 16 | 
            +
                    quoted_value = connection.quote(value)
         | 
| 17 17 | 
             
                    "#{hash_function}(#{quoted_value})"
         | 
| 18 18 | 
             
                  end
         | 
| 19 19 |  | 
| 20 | 
            -
                  def  | 
| 21 | 
            -
                     | 
| 22 | 
            -
             | 
| 20 | 
            +
                  def with_connection(relation, &block)
         | 
| 21 | 
            +
                    relation.connection_pool.with_connection(&block)
         | 
| 22 | 
            +
                  end
         | 
| 23 23 |  | 
| 24 | 
            -
             | 
| 25 | 
            -
                     | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 28 | 
            -
                      rows << (result.column_types.empty? ? untyped_row : columns.each_with_index.map { |c, i| untyped_row[i] && result.column_types[c] ? result.column_types[c].deserialize(untyped_row[i]) : untyped_row[i] })
         | 
| 29 | 
            -
                    end
         | 
| 24 | 
            +
                  def hll_calculate(relation, operation, column, default_value:)
         | 
| 25 | 
            +
                    Utils.with_connection(relation) do |connection|
         | 
| 26 | 
            +
                      sql, relation, group_values = hll_calculate_sql(relation, connection, operation, column)
         | 
| 27 | 
            +
                      result = connection.select_all(sql)
         | 
| 30 28 |  | 
| 31 | 
            -
             | 
| 32 | 
            -
                       | 
| 33 | 
            -
             | 
| 34 | 
            -
                       | 
| 35 | 
            -
                        rows[ | 
| 29 | 
            +
                      # typecast
         | 
| 30 | 
            +
                      rows = []
         | 
| 31 | 
            +
                      columns = result.columns
         | 
| 32 | 
            +
                      result.rows.each do |untyped_row|
         | 
| 33 | 
            +
                        rows << (result.column_types.empty? ? untyped_row : columns.each_with_index.map { |c, i| untyped_row[i] && result.column_types[c] ? result.column_types[c].deserialize(untyped_row[i]) : untyped_row[i] })
         | 
| 36 34 | 
             
                      end
         | 
| 37 35 |  | 
| 38 | 
            -
             | 
| 36 | 
            +
                      result =
         | 
| 37 | 
            +
                        if group_values.any?
         | 
| 38 | 
            +
                          Hash[rows.map { |r| [r.size == 2 ? r[0] : r[0..-2], r[-1]] }]
         | 
| 39 | 
            +
                        else
         | 
| 40 | 
            +
                          rows[0] && rows[0][0]
         | 
| 41 | 
            +
                        end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                      result = Groupdate.process_result(relation, result, default_value: default_value) if defined?(Groupdate.process_result)
         | 
| 39 44 |  | 
| 40 | 
            -
             | 
| 45 | 
            +
                      result
         | 
| 46 | 
            +
                    end
         | 
| 41 47 | 
             
                  end
         | 
| 42 48 |  | 
| 43 | 
            -
                  def hll_calculate_sql(relation, operation, column)
         | 
| 49 | 
            +
                  def hll_calculate_sql(relation, connection, operation, column)
         | 
| 44 50 | 
             
                    # basic version of Active Record disallow_raw_sql!
         | 
| 45 51 | 
             
                    # symbol = column (safe), Arel node = SQL (safe), other = untrusted
         | 
| 46 52 | 
             
                    # matches table.column and column
         | 
| @@ -54,7 +60,7 @@ module ActiveHll | |
| 54 60 | 
             
                    # column resolution
         | 
| 55 61 | 
             
                    node = relation.all.send(:arel_columns, [column]).first
         | 
| 56 62 | 
             
                    node = Arel::Nodes::SqlLiteral.new(node) if node.is_a?(String)
         | 
| 57 | 
            -
                    column =  | 
| 63 | 
            +
                    column = connection.visitor.accept(node, Arel::Collectors::SQLString.new).value
         | 
| 58 64 |  | 
| 59 65 | 
             
                    group_values = relation.all.group_values
         | 
| 60 66 |  | 
    
        data/lib/active_hll/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: active_hll
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.2. | 
| 4 | 
            +
              version: 0.2.1
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Andrew Kane
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2024- | 
| 11 | 
            +
            date: 2024-10-08 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: activerecord
         | 
| @@ -60,7 +60,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 60 60 | 
             
                - !ruby/object:Gem::Version
         | 
| 61 61 | 
             
                  version: '0'
         | 
| 62 62 | 
             
            requirements: []
         | 
| 63 | 
            -
            rubygems_version: 3.5. | 
| 63 | 
            +
            rubygems_version: 3.5.16
         | 
| 64 64 | 
             
            signing_key:
         | 
| 65 65 | 
             
            specification_version: 4
         | 
| 66 66 | 
             
            summary: HyperLogLog for Rails and Postgres
         |