active_hll 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +13 -7
- data/lib/active_hll/model.rb +30 -0
- data/lib/active_hll/utils.rb +1 -1
- data/lib/active_hll/version.rb +1 -1
- metadata +6 -6
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: a357d4e04aad048c5abddf17021edde474073438414e6d85e3f48748ff1fc304
         | 
| 4 | 
            +
              data.tar.gz: '03031590842b88853c7028e5a45174ff104f674ecf86e235f64fdb7120435c23'
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 2efc0fc07d80ae37f2063388151bfe3b0a7459d584a03b51661b1010f64b03d8e8b275e65664f859e77fb6c1fa48840ef3f7cae706f37c23afbefe6d358fe04f
         | 
| 7 | 
            +
              data.tar.gz: 29c3beb46f85e6a58ed522a4503414b4cbb2f322c5563c26a07a38e2160d84058c3c79ace3dd17165f961b76d7313b8e5c4f906b52d52106fa8d5713bccb8ffc
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    
    
        data/LICENSE.txt
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | @@ -4,7 +4,7 @@ | |
| 4 4 |  | 
| 5 5 | 
             
            For fast, approximate count-distinct queries
         | 
| 6 6 |  | 
| 7 | 
            -
            [](https://github.com/ankane/active_hll/actions)
         | 
| 8 8 |  | 
| 9 9 | 
             
            ## Installation
         | 
| 10 10 |  | 
| @@ -12,8 +12,8 @@ First, install the [hll extension](https://github.com/citusdata/postgresql-hll) | |
| 12 12 |  | 
| 13 13 | 
             
            ```sh
         | 
| 14 14 | 
             
            cd /tmp
         | 
| 15 | 
            -
            curl -L https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2. | 
| 16 | 
            -
            cd postgresql-hll-2. | 
| 15 | 
            +
            curl -L https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz | tar xz
         | 
| 16 | 
            +
            cd postgresql-hll-2.18
         | 
| 17 17 | 
             
            make
         | 
| 18 18 | 
             
            make install # may need sudo
         | 
| 19 19 | 
             
            ```
         | 
| @@ -39,7 +39,7 @@ HLLs provide an approximate count of unique values (like unique visitors). By ro | |
| 39 39 | 
             
            Create a table with an `hll` column
         | 
| 40 40 |  | 
| 41 41 | 
             
            ```ruby
         | 
| 42 | 
            -
            class CreateEventRollups < ActiveRecord::Migration[7. | 
| 42 | 
            +
            class CreateEventRollups < ActiveRecord::Migration[7.1]
         | 
| 43 43 | 
             
              def change
         | 
| 44 44 | 
             
                create_table :event_rollups do |t|
         | 
| 45 45 | 
             
                  t.date :time_bucket, index: {unique: true}
         | 
| @@ -53,7 +53,7 @@ You can use [batch](#batch) and [stream](#stream) approaches to build HLLs | |
| 53 53 |  | 
| 54 54 | 
             
            ### Batch
         | 
| 55 55 |  | 
| 56 | 
            -
             | 
| 56 | 
            +
            To generate HLLs from existing data, use the `hll_agg` method
         | 
| 57 57 |  | 
| 58 58 | 
             
            ```ruby
         | 
| 59 59 | 
             
            hlls = Event.group_by_day(:created_at).hll_agg(:visitor_id)
         | 
| @@ -74,12 +74,18 @@ For a large number of HLLs, use SQL to generate and upsert in a single statement | |
| 74 74 |  | 
| 75 75 | 
             
            ### Stream
         | 
| 76 76 |  | 
| 77 | 
            -
             | 
| 77 | 
            +
            To add new data to HLLs, use the `hll_add` method
         | 
| 78 78 |  | 
| 79 79 | 
             
            ```ruby
         | 
| 80 80 | 
             
            EventRollup.where(time_bucket: Date.current).hll_add(visitor_ids: ["visitor1", "visitor2"])
         | 
| 81 81 | 
             
            ```
         | 
| 82 82 |  | 
| 83 | 
            +
            or the `hll_upsert` method (experimental)
         | 
| 84 | 
            +
             | 
| 85 | 
            +
            ```ruby
         | 
| 86 | 
            +
            EventRollup.hll_upsert({time_bucket: Date.current, visitor_ids: ["visitor1", "visitor2"]})
         | 
| 87 | 
            +
            ```
         | 
| 88 | 
            +
             | 
| 83 89 | 
             
            ## Querying
         | 
| 84 90 |  | 
| 85 91 | 
             
            Get approximate unique values for a time range
         | 
| @@ -132,7 +138,7 @@ There’s not a way to remove data from an HLL, so to delete data for a specific | |
| 132 138 |  | 
| 133 139 | 
             
            ## Hosted Postgres
         | 
| 134 140 |  | 
| 135 | 
            -
            The `hll` extension is available on  | 
| 141 | 
            +
            The `hll` extension is available on a number of [hosted providers](https://github.com/ankane/active_hll/issues/4).
         | 
| 136 142 |  | 
| 137 143 | 
             
            ## History
         | 
| 138 144 |  | 
    
        data/lib/active_hll/model.rb
    CHANGED
    
    | @@ -50,6 +50,36 @@ module ActiveHll | |
| 50 50 |  | 
| 51 51 | 
             
                    update_all(set_clauses.join(", "))
         | 
| 52 52 | 
             
                  end
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                  # experimental
         | 
| 55 | 
            +
                  def hll_upsert(attributes)
         | 
| 56 | 
            +
                    hll_columns, other_columns = attributes.keys.partition { |a| columns_hash[a.to_s]&.type == :hll }
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                    # important! raise if column detection fails
         | 
| 59 | 
            +
                    if hll_columns.empty?
         | 
| 60 | 
            +
                      raise ArgumentError, "No hll columns"
         | 
| 61 | 
            +
                    end
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                    quoted_table = connection.quote_table_name(table_name)
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                    quoted_hll_columns = hll_columns.map { |k| connection.quote_column_name(k) }
         | 
| 66 | 
            +
                    quoted_other_columns = other_columns.map { |k| connection.quote_column_name(k) }
         | 
| 67 | 
            +
                    quoted_columns = quoted_other_columns + quoted_hll_columns
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                    hll_values =
         | 
| 70 | 
            +
                      hll_columns.map do |k|
         | 
| 71 | 
            +
                        vs = attributes[k]
         | 
| 72 | 
            +
                        vs = [vs] unless vs.is_a?(Array)
         | 
| 73 | 
            +
                        vs.map { |v| Utils.hll_hash_sql(self, v) }.join(" || ")
         | 
| 74 | 
            +
                      end
         | 
| 75 | 
            +
                    other_values = other_columns.map { |k| connection.quote(attributes[k]) }
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                    insert_values = other_values + hll_values.map { |v| "hll_empty()#{v.size > 0 ? " || #{v}" : ""}" }
         | 
| 78 | 
            +
                    update_values = quoted_hll_columns.zip(hll_values).map { |k, v| "#{k} = COALESCE(#{quoted_table}.#{k}, hll_empty())#{v.size > 0 ? " || #{v}" : ""}" }
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                    sql = "INSERT INTO #{quoted_table} (#{quoted_columns.join(", ")}) VALUES (#{insert_values.join(", ")}) ON CONFLICT (#{quoted_other_columns.join(", ")}) DO UPDATE SET #{update_values.join(", ")}"
         | 
| 81 | 
            +
                    connection.exec_insert(sql, "#{name} Upsert")
         | 
| 82 | 
            +
                  end
         | 
| 53 83 | 
             
                end
         | 
| 54 84 |  | 
| 55 85 | 
             
                # doesn't update in-memory record attribute for performance
         | 
    
        data/lib/active_hll/utils.rb
    CHANGED
    
    | @@ -46,7 +46,7 @@ module ActiveHll | |
| 46 46 | 
             
                    # matches table.column and column
         | 
| 47 47 | 
             
                    unless column.is_a?(Symbol) || column.is_a?(Arel::Nodes::SqlLiteral)
         | 
| 48 48 | 
             
                      column = column.to_s
         | 
| 49 | 
            -
                      unless /\A\w+(\.\w+)?\z/i.match(column)
         | 
| 49 | 
            +
                      unless /\A\w+(\.\w+)?\z/i.match?(column)
         | 
| 50 50 | 
             
                        raise ActiveRecord::UnknownAttributeReference, "Query method called with non-attribute argument(s): #{column.inspect}. Use Arel.sql() for known-safe values."
         | 
| 51 51 | 
             
                      end
         | 
| 52 52 | 
             
                    end
         | 
    
        data/lib/active_hll/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: active_hll
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.2.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Andrew Kane
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 11 | 
            +
            date: 2024-06-24 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: activerecord
         | 
| @@ -16,14 +16,14 @@ dependencies: | |
| 16 16 | 
             
                requirements:
         | 
| 17 17 | 
             
                - - ">="
         | 
| 18 18 | 
             
                  - !ruby/object:Gem::Version
         | 
| 19 | 
            -
                    version: '6'
         | 
| 19 | 
            +
                    version: '6.1'
         | 
| 20 20 | 
             
              type: :runtime
         | 
| 21 21 | 
             
              prerelease: false
         | 
| 22 22 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 23 | 
             
                requirements:
         | 
| 24 24 | 
             
                - - ">="
         | 
| 25 25 | 
             
                  - !ruby/object:Gem::Version
         | 
| 26 | 
            -
                    version: '6'
         | 
| 26 | 
            +
                    version: '6.1'
         | 
| 27 27 | 
             
            description:
         | 
| 28 28 | 
             
            email: andrew@ankane.org
         | 
| 29 29 | 
             
            executables: []
         | 
| @@ -53,14 +53,14 @@ required_ruby_version: !ruby/object:Gem::Requirement | |
| 53 53 | 
             
              requirements:
         | 
| 54 54 | 
             
              - - ">="
         | 
| 55 55 | 
             
                - !ruby/object:Gem::Version
         | 
| 56 | 
            -
                  version: ' | 
| 56 | 
            +
                  version: '3.1'
         | 
| 57 57 | 
             
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 58 58 | 
             
              requirements:
         | 
| 59 59 | 
             
              - - ">="
         | 
| 60 60 | 
             
                - !ruby/object:Gem::Version
         | 
| 61 61 | 
             
                  version: '0'
         | 
| 62 62 | 
             
            requirements: []
         | 
| 63 | 
            -
            rubygems_version: 3. | 
| 63 | 
            +
            rubygems_version: 3.5.11
         | 
| 64 64 | 
             
            signing_key:
         | 
| 65 65 | 
             
            specification_version: 4
         | 
| 66 66 | 
             
            summary: HyperLogLog for Rails and Postgres
         |