pg_hash_func 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +12 -2
- data/Gemfile +4 -3
- data/Gemfile.lock +17 -11
- data/README.md +3 -1
- data/benchmarks/file.rb +57 -28
- data/lib/pg_hash_func/hasher.rb +28 -5
- data/lib/pg_hash_func/version.rb +1 -1
- data/lib/pg_hash_func.rb +6 -7
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0303bfab8aabd0277360e7634888b7e5d537c89bf1dfd9ed69974d6932db62d5
|
4
|
+
data.tar.gz: 273b85ecaf3dc8763840c4a3960b6fc671f14d448e6feab14c7a3c6e1d88aec7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8813ec5ac0235621d59b768be7c5ee1512d5900c8ccf370e5393abded30786c9cefca1d1becb2b3780c8c9a6506c0048502eda3cebe554ec6fe60440f250337a
|
7
|
+
data.tar.gz: 103a1377449085d683716668174c9d7b323b203bdbca4990e99c87f49a905b9927617affe7aa1381893e956394dd01bcf2d0573727a59bfa2136f194a5e56782
|
data/.rubocop.yml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# .rubocop.yml
|
2
2
|
|
3
|
-
|
3
|
+
plugins:
|
4
4
|
- rubocop-performance
|
5
5
|
- rubocop-rake
|
6
6
|
- rubocop-rspec
|
@@ -41,10 +41,20 @@ Metrics/BlockLength:
|
|
41
41
|
- "spec/**/*_spec.rb"
|
42
42
|
|
43
43
|
RSpec/ExampleLength:
|
44
|
-
Max:
|
44
|
+
Max: 25
|
45
|
+
Exclude:
|
46
|
+
- "spec/**/*_spec.rb"
|
45
47
|
|
46
48
|
RSpec/MultipleExpectations:
|
47
49
|
Max: 5
|
48
50
|
|
51
|
+
RSpec/MultipleMemoizedHelpers:
|
52
|
+
Max: 15
|
53
|
+
Exclude:
|
54
|
+
- "spec/**/*_spec.rb"
|
55
|
+
|
49
56
|
Style/StringLiterals:
|
50
57
|
EnforcedStyle: double_quotes
|
58
|
+
|
59
|
+
Metrics/ModuleLength:
|
60
|
+
Max: 200
|
data/Gemfile
CHANGED
@@ -10,9 +10,10 @@ group :development, :test do
|
|
10
10
|
gem "benchmark-ips"
|
11
11
|
gem "bundler"
|
12
12
|
gem "pg"
|
13
|
-
gem "
|
14
|
-
gem "
|
15
|
-
gem "
|
13
|
+
gem "pry"
|
14
|
+
gem "rake"
|
15
|
+
gem "rspec"
|
16
|
+
gem "rubocop"
|
16
17
|
gem "rubocop-performance"
|
17
18
|
gem "rubocop-rake"
|
18
19
|
gem "rubocop-rspec"
|
data/Gemfile.lock
CHANGED
@@ -1,23 +1,28 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
pg_hash_func (0.1.
|
4
|
+
pg_hash_func (0.1.1)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
9
|
ast (2.4.3)
|
10
10
|
benchmark-ips (2.14.0)
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
coderay (1.1.3)
|
12
|
+
diff-lcs (1.6.2)
|
13
|
+
json (2.12.0)
|
14
|
+
language_server-protocol (3.17.0.5)
|
14
15
|
lint_roller (1.1.0)
|
16
|
+
method_source (1.1.0)
|
15
17
|
parallel (1.27.0)
|
16
18
|
parser (3.3.8.0)
|
17
19
|
ast (~> 2.4.1)
|
18
20
|
racc
|
19
21
|
pg (1.5.9)
|
20
22
|
prism (1.4.0)
|
23
|
+
pry (0.15.2)
|
24
|
+
coderay (~> 1.1)
|
25
|
+
method_source (~> 1.0)
|
21
26
|
racc (1.8.1)
|
22
27
|
rainbow (3.1.1)
|
23
28
|
rake (13.2.1)
|
@@ -28,14 +33,14 @@ GEM
|
|
28
33
|
rspec-mocks (~> 3.13.0)
|
29
34
|
rspec-core (3.13.3)
|
30
35
|
rspec-support (~> 3.13.0)
|
31
|
-
rspec-expectations (3.13.
|
36
|
+
rspec-expectations (3.13.4)
|
32
37
|
diff-lcs (>= 1.2.0, < 2.0)
|
33
38
|
rspec-support (~> 3.13.0)
|
34
|
-
rspec-mocks (3.13.
|
39
|
+
rspec-mocks (3.13.4)
|
35
40
|
diff-lcs (>= 1.2.0, < 2.0)
|
36
41
|
rspec-support (~> 3.13.0)
|
37
|
-
rspec-support (3.13.
|
38
|
-
rubocop (1.75.
|
42
|
+
rspec-support (3.13.3)
|
43
|
+
rubocop (1.75.6)
|
39
44
|
json (~> 2.3)
|
40
45
|
language_server-protocol (~> 3.17.0.2)
|
41
46
|
lint_roller (~> 1.1.0)
|
@@ -73,9 +78,10 @@ DEPENDENCIES
|
|
73
78
|
bundler
|
74
79
|
pg
|
75
80
|
pg_hash_func!
|
76
|
-
|
77
|
-
|
78
|
-
|
81
|
+
pry
|
82
|
+
rake
|
83
|
+
rspec
|
84
|
+
rubocop
|
79
85
|
rubocop-performance
|
80
86
|
rubocop-rake
|
81
87
|
rubocop-rspec
|
data/README.md
CHANGED
@@ -3,7 +3,9 @@
|
|
3
3
|
[](https://github.com/shayonj/pg_hash_func/actions/workflows/ci.yml)
|
4
4
|
[](https://badge.fury.io/rb/pg_hash_func)
|
5
5
|
|
6
|
-
|
6
|
+
This gem allows your application to calculate the target partition for integer keys in PostgreSQL **hash-partitioned** tables _without_ querying the database. Typically, when querying a parent partitioned table, PostgreSQL consults catalog tables to route the query, incurring network latency and lookup costs. By replicating PostgreSQL's native hashing logic, this gem allows your application to bypass the parent table and directly derive the partition name/index, leading to significant performance gains in read-heavy, latency-sensitive workloads.
|
7
|
+
|
8
|
+
This gem replicates the hashing logic PostgreSQL's `hashint8extended` (for `bigint`) and `hashint4extended` (for `integer` and `smallint`) in [src/backend/access/hash/hashfunc.c](https://github.com/postgres/postgres/blob/master/src/backend/access/hash/hashfunc.c)
|
7
9
|
|
8
10
|
**Supported Types:**
|
9
11
|
|
data/benchmarks/file.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require_relative
|
3
|
+
require "benchmark/ips"
|
4
|
+
require "pg"
|
5
|
+
require_relative "../lib/pg_hash_func"
|
6
6
|
DB_CONFIG = {
|
7
|
-
dbname: ENV[
|
8
|
-
user: ENV[
|
9
|
-
password: ENV
|
10
|
-
host: ENV[
|
11
|
-
port: ENV[
|
7
|
+
dbname: ENV["PGDATABASE"] || "postgres",
|
8
|
+
user: ENV["PGUSER"] || "postgres",
|
9
|
+
password: ENV.fetch("PGPASSWORD", nil),
|
10
|
+
host: ENV["PGHOST"] || "localhost",
|
11
|
+
port: ENV["PGPORT"] || 5432
|
12
12
|
}.compact
|
13
13
|
|
14
14
|
# Constants from the gem
|
@@ -16,37 +16,49 @@ SEED = PgHashFunc::Hasher::HASH_PARTITION_SEED
|
|
16
16
|
MAGIC = PgHashFunc::Hasher::PARTITION_MAGIC_CONSTANT
|
17
17
|
UINT64_MODULUS = PgHashFunc::Hasher::UINT64_MASK + 1 # 2^64
|
18
18
|
|
19
|
-
|
19
|
+
TEST_DATA_BIGINT = [
|
20
20
|
[1, 16],
|
21
21
|
[-1, 16],
|
22
22
|
[540_364, 16],
|
23
|
-
[2**31 - 1, 32],
|
23
|
+
[(2**31) - 1, 32],
|
24
24
|
[-(2**31), 32],
|
25
|
-
[2**63 - 1, 64],
|
25
|
+
[(2**63) - 1, 64],
|
26
26
|
[-(2**63), 64],
|
27
27
|
[123_456_789_012_345, 1024],
|
28
28
|
[9_223_372_036_854_775_807, 2048]
|
29
29
|
].freeze
|
30
30
|
|
31
|
-
|
31
|
+
TEST_DATA_INT4 = [
|
32
|
+
[1, 16],
|
33
|
+
[-1, 16],
|
34
|
+
[123_456, 16],
|
35
|
+
[(2**31) - 1, 32],
|
36
|
+
[-(2**31), 32]
|
37
|
+
].freeze
|
38
|
+
|
39
|
+
SQL_QUERY_BIGINT = <<~SQL
|
32
40
|
SELECT ( ( ((hashint8extended($1::bigint, $2::bigint)::numeric + $3::numeric) % $5::numeric) % $4::numeric ) + $4::numeric ) % $4::numeric;
|
33
41
|
SQL
|
34
42
|
|
43
|
+
SQL_QUERY_INT4 = <<~SQL
|
44
|
+
SELECT (( ( (hashint4extended($1::integer, $2::bigint)::numeric % $4::numeric ) % $3::numeric ) + $3::numeric ) % $3::numeric);
|
45
|
+
SQL
|
46
|
+
|
35
47
|
begin
|
36
48
|
conn = PG.connect(DB_CONFIG)
|
37
|
-
puts
|
49
|
+
puts "Connected to PostgreSQL."
|
38
50
|
rescue PG::ConnectionBad => e
|
39
|
-
puts
|
51
|
+
puts "Failed to connect to PostgreSQL. Ensure DB is running and configured correctly."
|
40
52
|
puts "Error: #{e.message}"
|
41
53
|
exit(1)
|
42
54
|
end
|
43
55
|
|
44
|
-
puts
|
56
|
+
puts "Warming up..."
|
45
57
|
|
46
58
|
Benchmark.ips do |x|
|
47
|
-
x.report(
|
48
|
-
|
49
|
-
PgHashFunc.
|
59
|
+
x.report("Ruby Calculation (bigint)") do
|
60
|
+
TEST_DATA_BIGINT.each do |key, num_partitions|
|
61
|
+
PgHashFunc.calculate_partition_index_bigint(
|
50
62
|
value: key,
|
51
63
|
num_partitions: num_partitions,
|
52
64
|
seed: SEED,
|
@@ -55,9 +67,26 @@ Benchmark.ips do |x|
|
|
55
67
|
end
|
56
68
|
end
|
57
69
|
|
58
|
-
x.report(
|
59
|
-
|
60
|
-
result = conn.exec_params(
|
70
|
+
x.report("SQL Query (bigint)") do
|
71
|
+
TEST_DATA_BIGINT.each do |key, num_partitions|
|
72
|
+
result = conn.exec_params(SQL_QUERY_BIGINT, [key, SEED, MAGIC, num_partitions, UINT64_MODULUS])
|
73
|
+
result.getvalue(0, 0).to_i
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
x.report("Ruby Calculation (int4)") do
|
78
|
+
TEST_DATA_INT4.each do |key, num_partitions|
|
79
|
+
PgHashFunc.calculate_partition_index_int4(
|
80
|
+
value: key,
|
81
|
+
num_partitions: num_partitions,
|
82
|
+
seed: SEED
|
83
|
+
)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
x.report("SQL Query (int4)") do
|
88
|
+
TEST_DATA_INT4.each do |key, num_partitions|
|
89
|
+
result = conn.exec_params(SQL_QUERY_INT4, [key, SEED, num_partitions, UINT64_MODULUS])
|
61
90
|
result.getvalue(0, 0).to_i
|
62
91
|
end
|
63
92
|
end
|
@@ -66,20 +95,20 @@ Benchmark.ips do |x|
|
|
66
95
|
end
|
67
96
|
|
68
97
|
conn.close if conn && !conn.finished?
|
69
|
-
puts
|
98
|
+
puts "Disconnected from PostgreSQL."
|
70
99
|
|
71
100
|
# Connected to PostgreSQL.
|
72
101
|
# Warming up...
|
73
102
|
# ruby 3.4.2 (2025-02-15 revision d2930f8e7a) +PRISM [arm64-darwin24]
|
74
103
|
# Warming up --------------------------------------
|
75
|
-
# Ruby Calculation 6.755k i/100ms
|
76
|
-
# SQL Query 320.000 i/100ms
|
104
|
+
# Ruby Calculation (bigint) 6.755k i/100ms
|
105
|
+
# SQL Query (bigint) 320.000 i/100ms
|
77
106
|
# Calculating -------------------------------------
|
78
|
-
# Ruby Calculation 67.103k (± 3.4%) i/s (14.90 μs/i) - 337.750k in 5.040734s
|
79
|
-
# SQL Query 3.192k (± 2.6%) i/s (313.26 μs/i) - 16.000k in 5.016067s
|
107
|
+
# Ruby Calculation (bigint) 67.103k (± 3.4%) i/s (14.90 μs/i) - 337.750k in 5.040734s
|
108
|
+
# SQL Query (bigint) 3.192k (± 2.6%) i/s (313.26 μs/i) - 16.000k in 5.016067s
|
80
109
|
|
81
110
|
# Comparison:
|
82
|
-
# Ruby Calculation: 67102.7 i/s
|
83
|
-
# SQL Query: 3192.2 i/s - 21.02x slower
|
111
|
+
# Ruby Calculation (bigint): 67102.7 i/s
|
112
|
+
# SQL Query (bigint): 3192.2 i/s - 21.02x slower
|
84
113
|
|
85
114
|
# Disconnected from PostgreSQL.
|
data/lib/pg_hash_func/hasher.rb
CHANGED
@@ -120,19 +120,42 @@ module PgHashFunc
|
|
120
120
|
|
121
121
|
hash_val = hashint8extended(value: value, seed: seed)
|
122
122
|
|
123
|
-
|
124
|
-
|
123
|
+
# First, interpret the 64-bit hash as signed, matching PostgreSQL's
|
124
|
+
# behavior where the C function's uint64 return value is received in SQL
|
125
|
+
# as a signed int8.
|
126
|
+
signed_hash = hash_val >= 0x8000_0000_0000_0000 ? hash_val - (1 << 64) : hash_val
|
127
|
+
|
128
|
+
# Now add the magic constant in signed 64-bit arithmetic (two's-
|
129
|
+
# complement wrap-around). We keep the wrap-around by masking back to
|
130
|
+
# 64-bits as PostgreSQL does with uint64 arithmetic before the cast.
|
131
|
+
unsigned_sum = (signed_hash + magic_constant) & UINT64_MASK
|
132
|
+
|
133
|
+
# Cast that wrapped result back to signed 64-bit for the final modulo.
|
134
|
+
signed_sum = unsigned_sum >= 0x8000_0000_0000_0000 ? unsigned_sum - (1 << 64) : unsigned_sum
|
135
|
+
|
136
|
+
# Follow the expression that postgres uses internally:
|
137
|
+
rem = signed_sum.remainder(num_partitions)
|
138
|
+
idx = (rem + num_partitions) % num_partitions
|
125
139
|
idx.to_i
|
126
140
|
end
|
127
141
|
|
128
142
|
# Calculates the target partition index for a given int4 value.
|
129
|
-
def self.calculate_partition_index_int4(value:, seed:,
|
143
|
+
def self.calculate_partition_index_int4(value:, seed:, num_partitions:)
|
130
144
|
raise ArgumentError, "Number of partitions must be positive" unless num_partitions.positive?
|
131
145
|
|
132
146
|
hash_val = hashint4extended(value: value, seed: seed)
|
133
147
|
|
134
|
-
|
135
|
-
|
148
|
+
signed_hash = hash_val >= 0x8000_0000_0000_0000 ? hash_val - (1 << 64) : hash_val
|
149
|
+
|
150
|
+
# PostgreSQL does *not* add the partition magic constant for int2/int4
|
151
|
+
# hash partitioning (see get_hash_partition_greatest_modulus_int4 in the
|
152
|
+
# backend). Only bigint types add the magic. Therefore we skip it here.
|
153
|
+
unsigned_sum = signed_hash & UINT64_MASK
|
154
|
+
|
155
|
+
signed_sum = unsigned_sum >= 0x8000_0000_0000_0000 ? unsigned_sum - (1 << 64) : unsigned_sum
|
156
|
+
|
157
|
+
rem = signed_sum.remainder(num_partitions)
|
158
|
+
idx = (rem + num_partitions) % num_partitions
|
136
159
|
idx.to_i
|
137
160
|
end
|
138
161
|
end
|
data/lib/pg_hash_func/version.rb
CHANGED
data/lib/pg_hash_func.rb
CHANGED
@@ -36,17 +36,16 @@ module PgHashFunc
|
|
36
36
|
|
37
37
|
# Calculates the target partition index for a given integer (int4) or smallint (int2) value based on
|
38
38
|
# PostgreSQL's default hash partitioning strategy.
|
39
|
-
# Mimics
|
40
|
-
# Note: PostgreSQL uses the same hash function (`hashint4extended` equivalent) for both int2 and int4.
|
39
|
+
# Mimics hashint4extended(value, seed) % num_partitions using uint64 arithmetic.
|
40
|
+
# Note 1: PostgreSQL uses the same hash function (`hashint4extended` equivalent) for both int2 and int4.
|
41
|
+
# Note 2: Unlike bigint hash partitioning, PostgreSQL does NOT add the magic constant for int4/int2
|
42
|
+
# hash partitioning.
|
41
43
|
#
|
42
44
|
# @param value [Integer] The partitioning key value (treated as int4/int2).
|
43
45
|
# @param num_partitions [Integer] The number of partitions for this level.
|
44
46
|
# @param seed [Integer] The 64-bit seed. Defaults to PostgreSQL's standard HASH_PARTITION_SEED.
|
45
|
-
# @param magic_constant [Integer] The magic constant. Defaults to PostgreSQL's standard PARTITION_MAGIC_CONSTANT.
|
46
47
|
# @return [Integer] The calculated partition index (0-based).
|
47
|
-
def self.calculate_partition_index_int4(value:, num_partitions:, seed: Hasher::HASH_PARTITION_SEED
|
48
|
-
|
49
|
-
Hasher.calculate_partition_index_int4(value: value, seed: seed, magic_constant: magic_constant,
|
50
|
-
num_partitions: num_partitions)
|
48
|
+
def self.calculate_partition_index_int4(value:, num_partitions:, seed: Hasher::HASH_PARTITION_SEED)
|
49
|
+
Hasher.calculate_partition_index_int4(value: value, seed: seed, num_partitions: num_partitions)
|
51
50
|
end
|
52
51
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pg_hash_func
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shayon Mukherjee
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-05-17 00:00:00.000000000 Z
|
11
11
|
dependencies: []
|
12
12
|
description: |
|
13
13
|
Replicates PostgreSQL's default hash partitioning calculations.
|