pg_hash_func 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 47be51c825e894ee45f7988db38544022028bb084cc82448f14409547e5eef38
4
- data.tar.gz: 48541492aa6ac7815e05580c9b76fd522b75b033d43edc76e73ccb32ef55baad
3
+ metadata.gz: 0303bfab8aabd0277360e7634888b7e5d537c89bf1dfd9ed69974d6932db62d5
4
+ data.tar.gz: 273b85ecaf3dc8763840c4a3960b6fc671f14d448e6feab14c7a3c6e1d88aec7
5
5
  SHA512:
6
- metadata.gz: 5255d4ab2d65c8ce8a5e9be8be446f40932b0ae2861d4c9794a513f8746f94381adcc3e968a06191ef3c66dc85295618ce5be70887b2658935cec12e56689c4c
7
- data.tar.gz: 7e80636f9c6a53328f3d46b0e096aba3efa44618d97b72b70b702c6f05ce721a25a7acb1f5ce953b8d3688b49b0ccb70b80a72cf3cb82733c12ff94cbdd8812e
6
+ metadata.gz: 8813ec5ac0235621d59b768be7c5ee1512d5900c8ccf370e5393abded30786c9cefca1d1becb2b3780c8c9a6506c0048502eda3cebe554ec6fe60440f250337a
7
+ data.tar.gz: 103a1377449085d683716668174c9d7b323b203bdbca4990e99c87f49a905b9927617affe7aa1381893e956394dd01bcf2d0573727a59bfa2136f194a5e56782
data/.rubocop.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # .rubocop.yml
2
2
 
3
- require:
3
+ plugins:
4
4
  - rubocop-performance
5
5
  - rubocop-rake
6
6
  - rubocop-rspec
@@ -41,10 +41,20 @@ Metrics/BlockLength:
41
41
  - "spec/**/*_spec.rb"
42
42
 
43
43
  RSpec/ExampleLength:
44
- Max: 10
44
+ Max: 25
45
+ Exclude:
46
+ - "spec/**/*_spec.rb"
45
47
 
46
48
  RSpec/MultipleExpectations:
47
49
  Max: 5
48
50
 
51
+ RSpec/MultipleMemoizedHelpers:
52
+ Max: 15
53
+ Exclude:
54
+ - "spec/**/*_spec.rb"
55
+
49
56
  Style/StringLiterals:
50
57
  EnforcedStyle: double_quotes
58
+
59
+ Metrics/ModuleLength:
60
+ Max: 200
data/Gemfile CHANGED
@@ -10,9 +10,10 @@ group :development, :test do
10
10
  gem "benchmark-ips"
11
11
  gem "bundler"
12
12
  gem "pg"
13
- gem "rake", "~> 13.0"
14
- gem "rspec", "~> 3.0"
15
- gem "rubocop", "~> 1.60" # Use a recent version
13
+ gem "pry"
14
+ gem "rake"
15
+ gem "rspec"
16
+ gem "rubocop"
16
17
  gem "rubocop-performance"
17
18
  gem "rubocop-rake"
18
19
  gem "rubocop-rspec"
data/Gemfile.lock CHANGED
@@ -1,23 +1,28 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pg_hash_func (0.1.0)
4
+ pg_hash_func (0.1.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
9
  ast (2.4.3)
10
10
  benchmark-ips (2.14.0)
11
- diff-lcs (1.6.1)
12
- json (2.11.3)
13
- language_server-protocol (3.17.0.4)
11
+ coderay (1.1.3)
12
+ diff-lcs (1.6.2)
13
+ json (2.12.0)
14
+ language_server-protocol (3.17.0.5)
14
15
  lint_roller (1.1.0)
16
+ method_source (1.1.0)
15
17
  parallel (1.27.0)
16
18
  parser (3.3.8.0)
17
19
  ast (~> 2.4.1)
18
20
  racc
19
21
  pg (1.5.9)
20
22
  prism (1.4.0)
23
+ pry (0.15.2)
24
+ coderay (~> 1.1)
25
+ method_source (~> 1.0)
21
26
  racc (1.8.1)
22
27
  rainbow (3.1.1)
23
28
  rake (13.2.1)
@@ -28,14 +33,14 @@ GEM
28
33
  rspec-mocks (~> 3.13.0)
29
34
  rspec-core (3.13.3)
30
35
  rspec-support (~> 3.13.0)
31
- rspec-expectations (3.13.3)
36
+ rspec-expectations (3.13.4)
32
37
  diff-lcs (>= 1.2.0, < 2.0)
33
38
  rspec-support (~> 3.13.0)
34
- rspec-mocks (3.13.2)
39
+ rspec-mocks (3.13.4)
35
40
  diff-lcs (>= 1.2.0, < 2.0)
36
41
  rspec-support (~> 3.13.0)
37
- rspec-support (3.13.2)
38
- rubocop (1.75.3)
42
+ rspec-support (3.13.3)
43
+ rubocop (1.75.6)
39
44
  json (~> 2.3)
40
45
  language_server-protocol (~> 3.17.0.2)
41
46
  lint_roller (~> 1.1.0)
@@ -73,9 +78,10 @@ DEPENDENCIES
73
78
  bundler
74
79
  pg
75
80
  pg_hash_func!
76
- rake (~> 13.0)
77
- rspec (~> 3.0)
78
- rubocop (~> 1.60)
81
+ pry
82
+ rake
83
+ rspec
84
+ rubocop
79
85
  rubocop-performance
80
86
  rubocop-rake
81
87
  rubocop-rspec
data/README.md CHANGED
@@ -3,7 +3,9 @@
3
3
  [![CI](https://github.com/shayonj/pg_hash_func/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/shayonj/pg_hash_func/actions/workflows/ci.yml)
4
4
  [![Gem Version](https://badge.fury.io/rb/pg_hash_func.svg)](https://badge.fury.io/rb/pg_hash_func)
5
5
 
6
- Determine the target partition index for an integer key according to PostgreSQL's default hash strategy, without querying the database.
6
+ This gem allows your application to calculate the target partition for integer keys in PostgreSQL **hash-partitioned** tables _without_ querying the database. Typically, when querying a parent partitioned table, PostgreSQL consults catalog tables to route the query, incurring network latency and lookup costs. By replicating PostgreSQL's native hashing logic, this gem allows your application to bypass the parent table and directly derive the partition name/index, leading to significant performance gains in read-heavy, latency-sensitive workloads.
7
+
8
+ This gem replicates the hashing logic PostgreSQL's `hashint8extended` (for `bigint`) and `hashint4extended` (for `integer` and `smallint`) in [src/backend/access/hash/hashfunc.c](https://github.com/postgres/postgres/blob/master/src/backend/access/hash/hashfunc.c)
7
9
 
8
10
  **Supported Types:**
9
11
 
data/benchmarks/file.rb CHANGED
@@ -1,14 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'benchmark/ips'
4
- require 'pg'
5
- require_relative '../lib/pg_hash_func'
3
+ require "benchmark/ips"
4
+ require "pg"
5
+ require_relative "../lib/pg_hash_func"
6
6
  DB_CONFIG = {
7
- dbname: ENV['PGDATABASE'] || 'postgres',
8
- user: ENV['PGUSER'] || 'postgres',
9
- password: ENV['PGPASSWORD'],
10
- host: ENV['PGHOST'] || 'localhost',
11
- port: ENV['PGPORT'] || 5432
7
+ dbname: ENV["PGDATABASE"] || "postgres",
8
+ user: ENV["PGUSER"] || "postgres",
9
+ password: ENV.fetch("PGPASSWORD", nil),
10
+ host: ENV["PGHOST"] || "localhost",
11
+ port: ENV["PGPORT"] || 5432
12
12
  }.compact
13
13
 
14
14
  # Constants from the gem
@@ -16,37 +16,49 @@ SEED = PgHashFunc::Hasher::HASH_PARTITION_SEED
16
16
  MAGIC = PgHashFunc::Hasher::PARTITION_MAGIC_CONSTANT
17
17
  UINT64_MODULUS = PgHashFunc::Hasher::UINT64_MASK + 1 # 2^64
18
18
 
19
- TEST_DATA = [
19
+ TEST_DATA_BIGINT = [
20
20
  [1, 16],
21
21
  [-1, 16],
22
22
  [540_364, 16],
23
- [2**31 - 1, 32],
23
+ [(2**31) - 1, 32],
24
24
  [-(2**31), 32],
25
- [2**63 - 1, 64],
25
+ [(2**63) - 1, 64],
26
26
  [-(2**63), 64],
27
27
  [123_456_789_012_345, 1024],
28
28
  [9_223_372_036_854_775_807, 2048]
29
29
  ].freeze
30
30
 
31
- SQL_QUERY = <<~SQL
31
+ TEST_DATA_INT4 = [
32
+ [1, 16],
33
+ [-1, 16],
34
+ [123_456, 16],
35
+ [(2**31) - 1, 32],
36
+ [-(2**31), 32]
37
+ ].freeze
38
+
39
+ SQL_QUERY_BIGINT = <<~SQL
32
40
  SELECT ( ( ((hashint8extended($1::bigint, $2::bigint)::numeric + $3::numeric) % $5::numeric) % $4::numeric ) + $4::numeric ) % $4::numeric;
33
41
  SQL
34
42
 
43
+ SQL_QUERY_INT4 = <<~SQL
44
+ SELECT (( ( (hashint4extended($1::integer, $2::bigint)::numeric % $4::numeric ) % $3::numeric ) + $3::numeric ) % $3::numeric);
45
+ SQL
46
+
35
47
  begin
36
48
  conn = PG.connect(DB_CONFIG)
37
- puts 'Connected to PostgreSQL.'
49
+ puts "Connected to PostgreSQL."
38
50
  rescue PG::ConnectionBad => e
39
- puts 'Failed to connect to PostgreSQL. Ensure DB is running and configured correctly.'
51
+ puts "Failed to connect to PostgreSQL. Ensure DB is running and configured correctly."
40
52
  puts "Error: #{e.message}"
41
53
  exit(1)
42
54
  end
43
55
 
44
- puts 'Warming up...'
56
+ puts "Warming up..."
45
57
 
46
58
  Benchmark.ips do |x|
47
- x.report('Ruby Calculation') do
48
- TEST_DATA.each do |key, num_partitions|
49
- PgHashFunc.calculate_partition_index(
59
+ x.report("Ruby Calculation (bigint)") do
60
+ TEST_DATA_BIGINT.each do |key, num_partitions|
61
+ PgHashFunc.calculate_partition_index_bigint(
50
62
  value: key,
51
63
  num_partitions: num_partitions,
52
64
  seed: SEED,
@@ -55,9 +67,26 @@ Benchmark.ips do |x|
55
67
  end
56
68
  end
57
69
 
58
- x.report('SQL Query') do
59
- TEST_DATA.each do |key, num_partitions|
60
- result = conn.exec_params(SQL_QUERY, [key, SEED, MAGIC, num_partitions, UINT64_MODULUS])
70
+ x.report("SQL Query (bigint)") do
71
+ TEST_DATA_BIGINT.each do |key, num_partitions|
72
+ result = conn.exec_params(SQL_QUERY_BIGINT, [key, SEED, MAGIC, num_partitions, UINT64_MODULUS])
73
+ result.getvalue(0, 0).to_i
74
+ end
75
+ end
76
+
77
+ x.report("Ruby Calculation (int4)") do
78
+ TEST_DATA_INT4.each do |key, num_partitions|
79
+ PgHashFunc.calculate_partition_index_int4(
80
+ value: key,
81
+ num_partitions: num_partitions,
82
+ seed: SEED
83
+ )
84
+ end
85
+ end
86
+
87
+ x.report("SQL Query (int4)") do
88
+ TEST_DATA_INT4.each do |key, num_partitions|
89
+ result = conn.exec_params(SQL_QUERY_INT4, [key, SEED, num_partitions, UINT64_MODULUS])
61
90
  result.getvalue(0, 0).to_i
62
91
  end
63
92
  end
@@ -66,20 +95,20 @@ Benchmark.ips do |x|
66
95
  end
67
96
 
68
97
  conn.close if conn && !conn.finished?
69
- puts 'Disconnected from PostgreSQL.'
98
+ puts "Disconnected from PostgreSQL."
70
99
 
71
100
  # Connected to PostgreSQL.
72
101
  # Warming up...
73
102
  # ruby 3.4.2 (2025-02-15 revision d2930f8e7a) +PRISM [arm64-darwin24]
74
103
  # Warming up --------------------------------------
75
- # Ruby Calculation 6.755k i/100ms
76
- # SQL Query 320.000 i/100ms
104
+ # Ruby Calculation (bigint) 6.755k i/100ms
105
+ # SQL Query (bigint) 320.000 i/100ms
77
106
  # Calculating -------------------------------------
78
- # Ruby Calculation 67.103k (± 3.4%) i/s (14.90 μs/i) - 337.750k in 5.040734s
79
- # SQL Query 3.192k (± 2.6%) i/s (313.26 μs/i) - 16.000k in 5.016067s
107
+ # Ruby Calculation (bigint) 67.103k (± 3.4%) i/s (14.90 μs/i) - 337.750k in 5.040734s
108
+ # SQL Query (bigint) 3.192k (± 2.6%) i/s (313.26 μs/i) - 16.000k in 5.016067s
80
109
 
81
110
  # Comparison:
82
- # Ruby Calculation: 67102.7 i/s
83
- # SQL Query: 3192.2 i/s - 21.02x slower
111
+ # Ruby Calculation (bigint): 67102.7 i/s
112
+ # SQL Query (bigint): 3192.2 i/s - 21.02x slower
84
113
 
85
114
  # Disconnected from PostgreSQL.
@@ -120,19 +120,42 @@ module PgHashFunc
120
120
 
121
121
  hash_val = hashint8extended(value: value, seed: seed)
122
122
 
123
- result = (hash_val + magic_constant) & UINT64_MASK
124
- idx = result % num_partitions
123
+ # First, interpret the 64-bit hash as signed, matching PostgreSQL's
124
+ # behavior where the C function's uint64 return value is received in SQL
125
+ # as a signed int8.
126
+ signed_hash = hash_val >= 0x8000_0000_0000_0000 ? hash_val - (1 << 64) : hash_val
127
+
128
+ # Now add the magic constant in signed 64-bit arithmetic (two's-
129
+ # complement wrap-around). We keep the wrap-around by masking back to
130
+ # 64-bits as PostgreSQL does with uint64 arithmetic before the cast.
131
+ unsigned_sum = (signed_hash + magic_constant) & UINT64_MASK
132
+
133
+ # Cast that wrapped result back to signed 64-bit for the final modulo.
134
+ signed_sum = unsigned_sum >= 0x8000_0000_0000_0000 ? unsigned_sum - (1 << 64) : unsigned_sum
135
+
136
+ # Follow the expression that postgres uses internally:
137
+ rem = signed_sum.remainder(num_partitions)
138
+ idx = (rem + num_partitions) % num_partitions
125
139
  idx.to_i
126
140
  end
127
141
 
128
142
  # Calculates the target partition index for a given int4 value.
129
- def self.calculate_partition_index_int4(value:, seed:, magic_constant:, num_partitions:)
143
+ def self.calculate_partition_index_int4(value:, seed:, num_partitions:)
130
144
  raise ArgumentError, "Number of partitions must be positive" unless num_partitions.positive?
131
145
 
132
146
  hash_val = hashint4extended(value: value, seed: seed)
133
147
 
134
- result = (hash_val + magic_constant) & UINT64_MASK
135
- idx = result % num_partitions
148
+ signed_hash = hash_val >= 0x8000_0000_0000_0000 ? hash_val - (1 << 64) : hash_val
149
+
150
+ # PostgreSQL does *not* add the partition magic constant for int2/int4
151
+ # hash partitioning (see get_hash_partition_greatest_modulus_int4 in the
152
+ # backend). Only bigint types add the magic. Therefore we skip it here.
153
+ unsigned_sum = signed_hash & UINT64_MASK
154
+
155
+ signed_sum = unsigned_sum >= 0x8000_0000_0000_0000 ? unsigned_sum - (1 << 64) : unsigned_sum
156
+
157
+ rem = signed_sum.remainder(num_partitions)
158
+ idx = (rem + num_partitions) % num_partitions
136
159
  idx.to_i
137
160
  end
138
161
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PgHashFunc
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
data/lib/pg_hash_func.rb CHANGED
@@ -36,17 +36,16 @@ module PgHashFunc
36
36
 
37
37
  # Calculates the target partition index for a given integer (int4) or smallint (int2) value based on
38
38
  # PostgreSQL's default hash partitioning strategy.
39
- # Mimics (hashint4extended(value, seed) + magic) % num_partitions using uint64 arithmetic.
40
- # Note: PostgreSQL uses the same hash function (`hashint4extended` equivalent) for both int2 and int4.
39
+ # Mimics hashint4extended(value, seed) % num_partitions using uint64 arithmetic.
40
+ # Note 1: PostgreSQL uses the same hash function (`hashint4extended` equivalent) for both int2 and int4.
41
+ # Note 2: Unlike bigint hash partitioning, PostgreSQL does NOT add the magic constant for int4/int2
42
+ # hash partitioning.
41
43
  #
42
44
  # @param value [Integer] The partitioning key value (treated as int4/int2).
43
45
  # @param num_partitions [Integer] The number of partitions for this level.
44
46
  # @param seed [Integer] The 64-bit seed. Defaults to PostgreSQL's standard HASH_PARTITION_SEED.
45
- # @param magic_constant [Integer] The magic constant. Defaults to PostgreSQL's standard PARTITION_MAGIC_CONSTANT.
46
47
  # @return [Integer] The calculated partition index (0-based).
47
- def self.calculate_partition_index_int4(value:, num_partitions:, seed: Hasher::HASH_PARTITION_SEED,
48
- magic_constant: Hasher::PARTITION_MAGIC_CONSTANT)
49
- Hasher.calculate_partition_index_int4(value: value, seed: seed, magic_constant: magic_constant,
50
- num_partitions: num_partitions)
48
+ def self.calculate_partition_index_int4(value:, num_partitions:, seed: Hasher::HASH_PARTITION_SEED)
49
+ Hasher.calculate_partition_index_int4(value: value, seed: seed, num_partitions: num_partitions)
51
50
  end
52
51
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pg_hash_func
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shayon Mukherjee
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-04-26 00:00:00.000000000 Z
10
+ date: 2025-05-17 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: |
13
13
  Replicates PostgreSQL's default hash partitioning calculations.