pg_hash_func 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +50 -0
- data/.ruby-version +1 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +19 -0
- data/Gemfile.lock +84 -0
- data/LICENSE.txt +21 -0
- data/README.md +149 -0
- data/Rakefile +14 -0
- data/benchmarks/file.rb +85 -0
- data/lib/pg_hash_func/hasher.rb +139 -0
- data/lib/pg_hash_func/version.rb +5 -0
- data/lib/pg_hash_func.rb +52 -0
- data/pg_hash_func.gemspec +43 -0
- data/scripts/release.sh +22 -0
- metadata +64 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 47be51c825e894ee45f7988db38544022028bb084cc82448f14409547e5eef38
|
4
|
+
data.tar.gz: 48541492aa6ac7815e05580c9b76fd522b75b033d43edc76e73ccb32ef55baad
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5255d4ab2d65c8ce8a5e9be8be446f40932b0ae2861d4c9794a513f8746f94381adcc3e968a06191ef3c66dc85295618ce5be70887b2658935cec12e56689c4c
|
7
|
+
data.tar.gz: 7e80636f9c6a53328f3d46b0e096aba3efa44618d97b72b70b702c6f05ce721a25a7acb1f5ce953b8d3688b49b0ccb70b80a72cf3cb82733c12ff94cbdd8812e
|
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# .rubocop.yml
|
2
|
+
|
3
|
+
require:
|
4
|
+
- rubocop-performance
|
5
|
+
- rubocop-rake
|
6
|
+
- rubocop-rspec
|
7
|
+
|
8
|
+
AllCops:
|
9
|
+
NewCops: enable
|
10
|
+
TargetRubyVersion: 3.0
|
11
|
+
Exclude:
|
12
|
+
- "bin/console"
|
13
|
+
- "benchmarks/**/*"
|
14
|
+
- "vendor/**/*"
|
15
|
+
- "tmp/**/*"
|
16
|
+
|
17
|
+
Style/Documentation:
|
18
|
+
Enabled: false
|
19
|
+
|
20
|
+
Naming/MethodParameterName:
|
21
|
+
MinNameLength: 2
|
22
|
+
AllowNamesEndingInNumbers: true
|
23
|
+
|
24
|
+
Layout/LineLength:
|
25
|
+
Max: 120
|
26
|
+
|
27
|
+
Metrics/MethodLength:
|
28
|
+
Max: 15
|
29
|
+
Exclude:
|
30
|
+
- "lib/pg_hash_func/hasher.rb"
|
31
|
+
|
32
|
+
Metrics/AbcSize:
|
33
|
+
Max: 20
|
34
|
+
Exclude:
|
35
|
+
- "lib/pg_hash_func/hasher.rb"
|
36
|
+
|
37
|
+
Metrics/BlockLength:
|
38
|
+
Max: 100
|
39
|
+
Exclude:
|
40
|
+
- "pg_hash_func.gemspec"
|
41
|
+
- "spec/**/*_spec.rb"
|
42
|
+
|
43
|
+
RSpec/ExampleLength:
|
44
|
+
Max: 10
|
45
|
+
|
46
|
+
RSpec/MultipleExpectations:
|
47
|
+
Max: 5
|
48
|
+
|
49
|
+
Style/StringLiterals:
|
50
|
+
EnforcedStyle: double_quotes
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
3.4.2
|
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
2
|
+
|
3
|
+
## Our Pledge
|
4
|
+
|
5
|
+
In the interest of fostering an open and welcoming environment, we as
|
6
|
+
contributors and maintainers pledge to making participation in our project and
|
7
|
+
our community a harassment-free experience for everyone, regardless of age, body
|
8
|
+
size, disability, ethnicity, gender identity and expression, level of experience,
|
9
|
+
nationality, personal appearance, race, religion, or sexual identity and
|
10
|
+
orientation.
|
11
|
+
|
12
|
+
## Our Standards
|
13
|
+
|
14
|
+
Examples of behavior that contributes to creating a positive environment
|
15
|
+
include:
|
16
|
+
|
17
|
+
* Using welcoming and inclusive language
|
18
|
+
* Being respectful of differing viewpoints and experiences
|
19
|
+
* Gracefully accepting constructive criticism
|
20
|
+
* Focusing on what is best for the community
|
21
|
+
* Showing empathy towards other community members
|
22
|
+
|
23
|
+
Examples of unacceptable behavior by participants include:
|
24
|
+
|
25
|
+
* The use of sexualized language or imagery and unwelcome sexual attention or
|
26
|
+
advances
|
27
|
+
* Trolling, insulting/derogatory comments, and personal or political attacks
|
28
|
+
* Public or private harassment
|
29
|
+
* Publishing others' private information, such as a physical or electronic
|
30
|
+
address, without explicit permission
|
31
|
+
* Other conduct which could reasonably be considered inappropriate in a
|
32
|
+
professional setting
|
33
|
+
|
34
|
+
## Our Responsibilities
|
35
|
+
|
36
|
+
Project maintainers are responsible for clarifying the standards of acceptable
|
37
|
+
behavior and are expected to take appropriate and fair corrective action in
|
38
|
+
response to any instances of unacceptable behavior.
|
39
|
+
|
40
|
+
Project maintainers have the right and responsibility to remove, edit, or
|
41
|
+
reject comments, commits, code, wiki edits, issues, and other contributions
|
42
|
+
that are not aligned to this Code of Conduct, or to ban temporarily or
|
43
|
+
permanently any contributor for other behaviors that they deem inappropriate,
|
44
|
+
threatening, offensive, or harmful.
|
45
|
+
|
46
|
+
## Scope
|
47
|
+
|
48
|
+
This Code of Conduct applies both within project spaces and in public spaces
|
49
|
+
when an individual is representing the project or its community. Examples of
|
50
|
+
representing a project or community include using an official project e-mail
|
51
|
+
address, posting via an official social media account, or acting as an appointed
|
52
|
+
representative at an online or offline event. Representation of a project may be
|
53
|
+
further defined and clarified by project maintainers.
|
54
|
+
|
55
|
+
## Enforcement
|
56
|
+
|
57
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
58
|
+
reported by contacting the project team at shayonj@gmail.com. All
|
59
|
+
complaints will be reviewed and investigated and will result in a response that
|
60
|
+
is deemed necessary and appropriate to the circumstances. The project team is
|
61
|
+
obligated to maintain confidentiality with regard to the reporter of an incident.
|
62
|
+
Further details of specific enforcement policies may be posted separately.
|
63
|
+
|
64
|
+
Project maintainers who do not follow or enforce the Code of Conduct in good
|
65
|
+
faith may face temporary or permanent repercussions as determined by other
|
66
|
+
members of the project's leadership.
|
67
|
+
|
68
|
+
## Attribution
|
69
|
+
|
70
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
71
|
+
available at [http://contributor-covenant.org/version/1/4][version]
|
72
|
+
|
73
|
+
[homepage]: http://contributor-covenant.org
|
74
|
+
[version]: http://contributor-covenant.org/version/1/4/
|
data/Gemfile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source "https://rubygems.org"
|
4
|
+
|
5
|
+
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
6
|
+
|
7
|
+
gemspec
|
8
|
+
|
9
|
+
group :development, :test do
|
10
|
+
gem "benchmark-ips"
|
11
|
+
gem "bundler"
|
12
|
+
gem "pg"
|
13
|
+
gem "rake", "~> 13.0"
|
14
|
+
gem "rspec", "~> 3.0"
|
15
|
+
gem "rubocop", "~> 1.60" # Use a recent version
|
16
|
+
gem "rubocop-performance"
|
17
|
+
gem "rubocop-rake"
|
18
|
+
gem "rubocop-rspec"
|
19
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
pg_hash_func (0.1.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
ast (2.4.3)
|
10
|
+
benchmark-ips (2.14.0)
|
11
|
+
diff-lcs (1.6.1)
|
12
|
+
json (2.11.3)
|
13
|
+
language_server-protocol (3.17.0.4)
|
14
|
+
lint_roller (1.1.0)
|
15
|
+
parallel (1.27.0)
|
16
|
+
parser (3.3.8.0)
|
17
|
+
ast (~> 2.4.1)
|
18
|
+
racc
|
19
|
+
pg (1.5.9)
|
20
|
+
prism (1.4.0)
|
21
|
+
racc (1.8.1)
|
22
|
+
rainbow (3.1.1)
|
23
|
+
rake (13.2.1)
|
24
|
+
regexp_parser (2.10.0)
|
25
|
+
rspec (3.13.0)
|
26
|
+
rspec-core (~> 3.13.0)
|
27
|
+
rspec-expectations (~> 3.13.0)
|
28
|
+
rspec-mocks (~> 3.13.0)
|
29
|
+
rspec-core (3.13.3)
|
30
|
+
rspec-support (~> 3.13.0)
|
31
|
+
rspec-expectations (3.13.3)
|
32
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
33
|
+
rspec-support (~> 3.13.0)
|
34
|
+
rspec-mocks (3.13.2)
|
35
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
36
|
+
rspec-support (~> 3.13.0)
|
37
|
+
rspec-support (3.13.2)
|
38
|
+
rubocop (1.75.3)
|
39
|
+
json (~> 2.3)
|
40
|
+
language_server-protocol (~> 3.17.0.2)
|
41
|
+
lint_roller (~> 1.1.0)
|
42
|
+
parallel (~> 1.10)
|
43
|
+
parser (>= 3.3.0.2)
|
44
|
+
rainbow (>= 2.2.2, < 4.0)
|
45
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
46
|
+
rubocop-ast (>= 1.44.0, < 2.0)
|
47
|
+
ruby-progressbar (~> 1.7)
|
48
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
49
|
+
rubocop-ast (1.44.1)
|
50
|
+
parser (>= 3.3.7.2)
|
51
|
+
prism (~> 1.4)
|
52
|
+
rubocop-performance (1.25.0)
|
53
|
+
lint_roller (~> 1.1)
|
54
|
+
rubocop (>= 1.75.0, < 2.0)
|
55
|
+
rubocop-ast (>= 1.38.0, < 2.0)
|
56
|
+
rubocop-rake (0.7.1)
|
57
|
+
lint_roller (~> 1.1)
|
58
|
+
rubocop (>= 1.72.1)
|
59
|
+
rubocop-rspec (3.6.0)
|
60
|
+
lint_roller (~> 1.1)
|
61
|
+
rubocop (~> 1.72, >= 1.72.1)
|
62
|
+
ruby-progressbar (1.13.0)
|
63
|
+
unicode-display_width (3.1.4)
|
64
|
+
unicode-emoji (~> 4.0, >= 4.0.4)
|
65
|
+
unicode-emoji (4.0.4)
|
66
|
+
|
67
|
+
PLATFORMS
|
68
|
+
arm64-darwin-24
|
69
|
+
ruby
|
70
|
+
|
71
|
+
DEPENDENCIES
|
72
|
+
benchmark-ips
|
73
|
+
bundler
|
74
|
+
pg
|
75
|
+
pg_hash_func!
|
76
|
+
rake (~> 13.0)
|
77
|
+
rspec (~> 3.0)
|
78
|
+
rubocop (~> 1.60)
|
79
|
+
rubocop-performance
|
80
|
+
rubocop-rake
|
81
|
+
rubocop-rspec
|
82
|
+
|
83
|
+
BUNDLED WITH
|
84
|
+
2.6.8
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2025 Shayon Mukherjee
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
# pg_hash_func
|
2
|
+
|
3
|
+
[](https://github.com/shayonj/pg_hash_func/actions/workflows/ci.yml)
|
4
|
+
[](https://badge.fury.io/rb/pg_hash_func)
|
5
|
+
|
6
|
+
Determine the target partition index for an integer key according to PostgreSQL's default hash strategy, without querying the database.
|
7
|
+
|
8
|
+
**Supported Types:**
|
9
|
+
|
10
|
+
- **`bigint` (`int8`)**: Use `PgHashFunc.calculate_partition_index_bigint`.
|
11
|
+
- **`integer` (`int4`)** and **`smallint` (`int2`)**: Use `PgHashFunc.calculate_partition_index_int4`. (PostgreSQL uses the same underlying hash function for both `int4` and `int2`.)
|
12
|
+
|
13
|
+
**Limitations:**
|
14
|
+
|
15
|
+
- Only replicates the default `hash` partitioning strategy.
|
16
|
+
- Only supports integer-based keys (`bigint`, `integer`, `smallint`).
|
17
|
+
- Does not support hashing other data types (text, dates, floats, etc.).
|
18
|
+
- Does not support other partitioning strategies (list, range).
|
19
|
+
- Assumes PostgreSQL's standard internal seed and magic constants by default.
|
20
|
+
|
21
|
+
**Compatibility:**
|
22
|
+
|
23
|
+
- Ruby `>= 3.0.0`
|
24
|
+
- PostgreSQL `>= 11` (tested up to 16)
|
25
|
+
|
26
|
+
Note: PRs and support very much welcome
|
27
|
+
|
28
|
+
## Installation
|
29
|
+
|
30
|
+
Add this line to your application's Gemfile:
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
gem 'pg_hash_func'
|
34
|
+
```
|
35
|
+
|
36
|
+
And then execute:
|
37
|
+
|
38
|
+
$ bundle install
|
39
|
+
|
40
|
+
Or install it yourself as:
|
41
|
+
|
42
|
+
$ gem install pg_hash_func
|
43
|
+
|
44
|
+
## Usage
|
45
|
+
|
46
|
+
**Example 1: Partitioning by bigint (e.g., User ID)**
|
47
|
+
|
48
|
+
```ruby
|
49
|
+
TABLE_PREFIX_BIGINT = "events"
|
50
|
+
USER_ID = 123_456_789_012_345 # bigint value
|
51
|
+
NUM_PARTITIONS_BIGINT = 16
|
52
|
+
index_bigint = PgHashFunc.calculate_partition_index_bigint(
|
53
|
+
value: USER_ID,
|
54
|
+
num_partitions: NUM_PARTITIONS_BIGINT
|
55
|
+
)
|
56
|
+
|
57
|
+
# Construct the partition table name
|
58
|
+
partition_name_bigint = [TABLE_PREFIX_BIGINT, index_bigint].join("_")
|
59
|
+
|
60
|
+
puts "User #{USER_ID} (bigint) belongs to partition: #{partition_name_bigint}"
|
61
|
+
# => User 123456789012345 (bigint) belongs to partition: events_14
|
62
|
+
```
|
63
|
+
|
64
|
+
**Example 2: Partitioning by integer (e.g., Tenant ID)**
|
65
|
+
|
66
|
+
```ruby
|
67
|
+
TABLE_PREFIX_INT = "tenant_data"
|
68
|
+
TENANT_ID = 987_654 # An integer value (fits in int4/int2)
|
69
|
+
NUM_PARTITIONS_INT = 32
|
70
|
+
|
71
|
+
# Calculate the index using the int4 function
|
72
|
+
# This also works correctly if TENANT_ID was a smallint
|
73
|
+
index_int = PgHashFunc.calculate_partition_index_int4(
|
74
|
+
value: TENANT_ID,
|
75
|
+
num_partitions: NUM_PARTITIONS_INT
|
76
|
+
)
|
77
|
+
|
78
|
+
partition_name_int = [TABLE_PREFIX_INT, index_int].join("_")
|
79
|
+
|
80
|
+
puts "Tenant 987654 (int) belongs to partition: tenant_data_28"
|
81
|
+
# => "tenant_data_22"
|
82
|
+
```
|
83
|
+
|
84
|
+
**Example 3: Two-Level Partitioning (bigint then int4)**
|
85
|
+
|
86
|
+
```ruby
|
87
|
+
TABLE_PREFIX_MULTI = "user_settings"
|
88
|
+
CUSTOMER_ID = 555_444_333_222_111 # bigint
|
89
|
+
SETTING_TYPE = 101 # integer
|
90
|
+
NUM_PARTITIONS_L1 = 64 # For CUSTOMER_ID
|
91
|
+
NUM_PARTITIONS_L2 = 8 # For SETTING_TYPE
|
92
|
+
|
93
|
+
# Calculate index for each level separately using the correct function
|
94
|
+
index_l1 = PgHashFunc.calculate_partition_index_bigint(value: CUSTOMER_ID, num_partitions: NUM_PARTITIONS_L1)
|
95
|
+
index_l2 = PgHashFunc.calculate_partition_index_int4(value: SETTING_TYPE, num_partitions: NUM_PARTITIONS_L2)
|
96
|
+
|
97
|
+
partition_name_multi = [TABLE_PREFIX_MULTI, index_l1, index_l2].join("_")
|
98
|
+
|
99
|
+
puts "Settings for Customer=#{CUSTOMER_ID}, Type=#{SETTING_TYPE} belong to: #{partition_name_multi}"
|
100
|
+
# => Settings for Customer=555444333222111, Type=101 belong to: user_settings_44_0
|
101
|
+
```
|
102
|
+
|
103
|
+
**Raw Hash Function**
|
104
|
+
|
105
|
+
Access the underlying PostgreSQL `hashint8extended` function directly. Primarily useful for debugging or specific integration scenarios.
|
106
|
+
|
107
|
+
```ruby
|
108
|
+
USER_ID = 123_456_789_012_345 # From Example 1
|
109
|
+
|
110
|
+
raw_hash_bigint = PgHashFunc.hashint8extended(value: USER_ID)
|
111
|
+
|
112
|
+
puts "Raw bigint hash for #{USER_ID}: #{raw_hash_bigint}"
|
113
|
+
# => Raw bigint hash for 123456789012345: 1245190300417211467
|
114
|
+
```
|
115
|
+
|
116
|
+
## Development
|
117
|
+
|
118
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bundle exec rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
119
|
+
|
120
|
+
To install this gem onto your local machine, run `bundle exec rake install`.
|
121
|
+
|
122
|
+
## Releasing
|
123
|
+
|
124
|
+
1. Update the `VERSION` constant in `lib/pg_hash_func/version.rb`.
|
125
|
+
2. Commit the changes.
|
126
|
+
3. Run the release script, providing the version number:
|
127
|
+
```bash
|
128
|
+
scripts/release.sh <VERSION>
|
129
|
+
# e.g., scripts/release.sh 0.1.0
|
130
|
+
```
|
131
|
+
This script will:
|
132
|
+
- Build the gem.
|
133
|
+
- Push the gem to RubyGems.
|
134
|
+
- Create a git tag (e.g., `v0.1.0`).
|
135
|
+
- Push the tag to GitHub.
|
136
|
+
- Clean up the local gem file.
|
137
|
+
4. Create a release on GitHub using the tag created.
|
138
|
+
|
139
|
+
## Contributing
|
140
|
+
|
141
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/shayonj/pg_hash_func.
|
142
|
+
|
143
|
+
## License
|
144
|
+
|
145
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
146
|
+
|
147
|
+
## Code of Conduct
|
148
|
+
|
149
|
+
Everyone interacting in the PgHashFunc project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/shayonj/pg_hash_func/blob/master/CODE_OF_CONDUCT.md).
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "bundler/gem_tasks"
|
4
|
+
require "rspec/core/rake_task"
|
5
|
+
require "rubocop/rake_task"
|
6
|
+
|
7
|
+
# Run specs
|
8
|
+
RSpec::Core::RakeTask.new(:spec)
|
9
|
+
|
10
|
+
# Run RuboCop
|
11
|
+
RuboCop::RakeTask.new(:rubocop)
|
12
|
+
|
13
|
+
# Default task: run specs and RuboCop
|
14
|
+
task default: %i[spec rubocop]
|
data/benchmarks/file.rb
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'benchmark/ips'
|
4
|
+
require 'pg'
|
5
|
+
require_relative '../lib/pg_hash_func'
|
6
|
+
DB_CONFIG = {
|
7
|
+
dbname: ENV['PGDATABASE'] || 'postgres',
|
8
|
+
user: ENV['PGUSER'] || 'postgres',
|
9
|
+
password: ENV['PGPASSWORD'],
|
10
|
+
host: ENV['PGHOST'] || 'localhost',
|
11
|
+
port: ENV['PGPORT'] || 5432
|
12
|
+
}.compact
|
13
|
+
|
14
|
+
# Constants from the gem
|
15
|
+
SEED = PgHashFunc::Hasher::HASH_PARTITION_SEED
|
16
|
+
MAGIC = PgHashFunc::Hasher::PARTITION_MAGIC_CONSTANT
|
17
|
+
UINT64_MODULUS = PgHashFunc::Hasher::UINT64_MASK + 1 # 2^64
|
18
|
+
|
19
|
+
TEST_DATA = [
|
20
|
+
[1, 16],
|
21
|
+
[-1, 16],
|
22
|
+
[540_364, 16],
|
23
|
+
[2**31 - 1, 32],
|
24
|
+
[-(2**31), 32],
|
25
|
+
[2**63 - 1, 64],
|
26
|
+
[-(2**63), 64],
|
27
|
+
[123_456_789_012_345, 1024],
|
28
|
+
[9_223_372_036_854_775_807, 2048]
|
29
|
+
].freeze
|
30
|
+
|
31
|
+
SQL_QUERY = <<~SQL
|
32
|
+
SELECT ( ( ((hashint8extended($1::bigint, $2::bigint)::numeric + $3::numeric) % $5::numeric) % $4::numeric ) + $4::numeric ) % $4::numeric;
|
33
|
+
SQL
|
34
|
+
|
35
|
+
begin
|
36
|
+
conn = PG.connect(DB_CONFIG)
|
37
|
+
puts 'Connected to PostgreSQL.'
|
38
|
+
rescue PG::ConnectionBad => e
|
39
|
+
puts 'Failed to connect to PostgreSQL. Ensure DB is running and configured correctly.'
|
40
|
+
puts "Error: #{e.message}"
|
41
|
+
exit(1)
|
42
|
+
end
|
43
|
+
|
44
|
+
puts 'Warming up...'
|
45
|
+
|
46
|
+
Benchmark.ips do |x|
|
47
|
+
x.report('Ruby Calculation') do
|
48
|
+
TEST_DATA.each do |key, num_partitions|
|
49
|
+
PgHashFunc.calculate_partition_index(
|
50
|
+
value: key,
|
51
|
+
num_partitions: num_partitions,
|
52
|
+
seed: SEED,
|
53
|
+
magic_constant: MAGIC
|
54
|
+
)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
x.report('SQL Query') do
|
59
|
+
TEST_DATA.each do |key, num_partitions|
|
60
|
+
result = conn.exec_params(SQL_QUERY, [key, SEED, MAGIC, num_partitions, UINT64_MODULUS])
|
61
|
+
result.getvalue(0, 0).to_i
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
x.compare!
|
66
|
+
end
|
67
|
+
|
68
|
+
conn.close if conn && !conn.finished?
|
69
|
+
puts 'Disconnected from PostgreSQL.'
|
70
|
+
|
71
|
+
# Connected to PostgreSQL.
|
72
|
+
# Warming up...
|
73
|
+
# ruby 3.4.2 (2025-02-15 revision d2930f8e7a) +PRISM [arm64-darwin24]
|
74
|
+
# Warming up --------------------------------------
|
75
|
+
# Ruby Calculation 6.755k i/100ms
|
76
|
+
# SQL Query 320.000 i/100ms
|
77
|
+
# Calculating -------------------------------------
|
78
|
+
# Ruby Calculation 67.103k (± 3.4%) i/s (14.90 μs/i) - 337.750k in 5.040734s
|
79
|
+
# SQL Query 3.192k (± 2.6%) i/s (313.26 μs/i) - 16.000k in 5.016067s
|
80
|
+
|
81
|
+
# Comparison:
|
82
|
+
# Ruby Calculation: 67102.7 i/s
|
83
|
+
# SQL Query: 3192.2 i/s - 21.02x slower
|
84
|
+
|
85
|
+
# Disconnected from PostgreSQL.
|
@@ -0,0 +1,139 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Based on PostgreSQL's src/common/hashfn.c (Bob Jenkins' lookup3 hash)
|
4
|
+
# and src/backend/access/hash/hashfunc.c
|
5
|
+
|
6
|
+
# Namespace for PgHashFunc implementation details.
|
7
|
+
module PgHashFunc
|
8
|
+
# Internal implementation of PostgreSQL hashing logic.
|
9
|
+
module Hasher
|
10
|
+
# Constants derived from PostgreSQL source/behavior
|
11
|
+
HASH_PARTITION_SEED = 0x7A5B22367996DCFD
|
12
|
+
PARTITION_MAGIC_CONSTANT = 0x4992394d24f64163
|
13
|
+
|
14
|
+
UINT32_MASK = 0xFFFFFFFF
|
15
|
+
UINT64_MASK = 0xFFFFFFFFFFFFFFFF
|
16
|
+
|
17
|
+
# Corresponds to rot(x, k) -> pg_rotate_left32(x, k)
|
18
|
+
def self.rot(value, rotation_bits)
|
19
|
+
value &= UINT32_MASK
|
20
|
+
(((value << rotation_bits) | (value >> (32 - rotation_bits))) & UINT32_MASK)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Corresponds to mix(a, b, c) macro in hashfn.c
|
24
|
+
def self.mix(state)
|
25
|
+
a, b, c = state
|
26
|
+
a = (a - c) & UINT32_MASK
|
27
|
+
a ^= rot(c, 4)
|
28
|
+
c = (c + b) & UINT32_MASK
|
29
|
+
b = (b - a) & UINT32_MASK
|
30
|
+
b ^= rot(a, 6)
|
31
|
+
a = (a + c) & UINT32_MASK
|
32
|
+
c = (c - b) & UINT32_MASK
|
33
|
+
c ^= rot(b, 8)
|
34
|
+
b = (b + a) & UINT32_MASK
|
35
|
+
a = (a - c) & UINT32_MASK
|
36
|
+
a ^= rot(c, 16)
|
37
|
+
c = (c + b) & UINT32_MASK
|
38
|
+
b = (b - a) & UINT32_MASK
|
39
|
+
b ^= rot(a, 19)
|
40
|
+
a = (a + c) & UINT32_MASK
|
41
|
+
c = (c - b) & UINT32_MASK
|
42
|
+
c ^= rot(b, 4)
|
43
|
+
b = (b + a) & UINT32_MASK
|
44
|
+
[a, b, c]
|
45
|
+
end
|
46
|
+
|
47
|
+
# Corresponds to final(a, b, c) macro in hashfn.c
|
48
|
+
def self.final(state)
|
49
|
+
a, b, c = state
|
50
|
+
c ^= b
|
51
|
+
c = (c - rot(b, 14)) & UINT32_MASK
|
52
|
+
a ^= c
|
53
|
+
a = (a - rot(c, 11)) & UINT32_MASK
|
54
|
+
b ^= a
|
55
|
+
b = (b - rot(a, 25)) & UINT32_MASK
|
56
|
+
c ^= b
|
57
|
+
c = (c - rot(b, 16)) & UINT32_MASK
|
58
|
+
a ^= c
|
59
|
+
a = (a - rot(c, 4)) & UINT32_MASK
|
60
|
+
b ^= a
|
61
|
+
b = (b - rot(a, 14)) & UINT32_MASK
|
62
|
+
c ^= b
|
63
|
+
c = (c - rot(b, 24)) & UINT32_MASK
|
64
|
+
[a, b, c]
|
65
|
+
end
|
66
|
+
|
67
|
+
# Corresponds to hash_bytes_uint32_extended(uint32 k, uint64 seed)
|
68
|
+
# This implementation is based on analysis of specific PostgreSQL code paths
|
69
|
+
# related to partitioning, and may differ slightly from a general lookup3 implementation.
|
70
|
+
def self.hash_uint32_extended(key_value, seed)
|
71
|
+
key_value &= UINT32_MASK
|
72
|
+
seed &= UINT64_MASK
|
73
|
+
|
74
|
+
initval = 0x9e3779b9 + 4 + 3_923_095
|
75
|
+
a = b = c = initval & UINT32_MASK
|
76
|
+
|
77
|
+
# Perturb state with seed parts and mix if seed is non-zero
|
78
|
+
if seed != 0
|
79
|
+
a = (a + (seed >> 32)) & UINT32_MASK
|
80
|
+
b = (b + (seed & UINT32_MASK)) & UINT32_MASK
|
81
|
+
a, b, c = mix([a, b, c])
|
82
|
+
end
|
83
|
+
|
84
|
+
a = (a + key_value) & UINT32_MASK
|
85
|
+
_, b, c = final([a, b, c])
|
86
|
+
|
87
|
+
(((b.to_i << 32) | c.to_i) & UINT64_MASK)
|
88
|
+
end
|
89
|
+
|
90
|
+
# Corresponds to hashint8extended(int64 val, uint64 seed) logic
|
91
|
+
def self.hashint8extended(value:, seed:)
|
92
|
+
val = value.to_i
|
93
|
+
seed &= UINT64_MASK
|
94
|
+
|
95
|
+
val_masked64 = val & UINT64_MASK
|
96
|
+
lohalf = (val_masked64 & UINT32_MASK)
|
97
|
+
hihalf = ((val_masked64 >> 32) & UINT32_MASK)
|
98
|
+
|
99
|
+
val_int64 = val_masked64 > 0x7FFFFFFFFFFFFFFF ? (val_masked64 - (1 << 64)) : val_masked64
|
100
|
+
is_positive_or_zero_int64 = (val_int64 >= 0)
|
101
|
+
|
102
|
+
lohalf ^= if is_positive_or_zero_int64
|
103
|
+
hihalf
|
104
|
+
else
|
105
|
+
(~hihalf & UINT32_MASK)
|
106
|
+
end
|
107
|
+
|
108
|
+
hash_uint32_extended(lohalf, seed)
|
109
|
+
end
|
110
|
+
|
111
|
+
# Corresponds to hashint4extended(int32 val, uint64 seed) logic
|
112
|
+
def self.hashint4extended(value:, seed:)
|
113
|
+
val32 = value.to_i & UINT32_MASK
|
114
|
+
hash_uint32_extended(val32, seed & UINT64_MASK)
|
115
|
+
end
|
116
|
+
|
117
|
+
# Calculates the target partition index for a given bigint value.
|
118
|
+
def self.calculate_partition_index_bigint(value:, seed:, magic_constant:, num_partitions:)
|
119
|
+
raise ArgumentError, "Number of partitions must be positive" unless num_partitions.positive?
|
120
|
+
|
121
|
+
hash_val = hashint8extended(value: value, seed: seed)
|
122
|
+
|
123
|
+
result = (hash_val + magic_constant) & UINT64_MASK
|
124
|
+
idx = result % num_partitions
|
125
|
+
idx.to_i
|
126
|
+
end
|
127
|
+
|
128
|
+
# Calculates the target partition index for a given int4 value.
|
129
|
+
def self.calculate_partition_index_int4(value:, seed:, magic_constant:, num_partitions:)
|
130
|
+
raise ArgumentError, "Number of partitions must be positive" unless num_partitions.positive?
|
131
|
+
|
132
|
+
hash_val = hashint4extended(value: value, seed: seed)
|
133
|
+
|
134
|
+
result = (hash_val + magic_constant) & UINT64_MASK
|
135
|
+
idx = result % num_partitions
|
136
|
+
idx.to_i
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
data/lib/pg_hash_func.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "pg_hash_func/version"
|
4
|
+
require_relative "pg_hash_func/hasher"
|
5
|
+
|
6
|
+
# Module providing functions to replicate PostgreSQL's bigint hash partitioning logic.
|
7
|
+
module PgHashFunc
|
8
|
+
class Error < StandardError; end
|
9
|
+
|
10
|
+
# Provides functions to replicate PostgreSQL's bigint hash partitioning logic.
|
11
|
+
|
12
|
+
# Expose the raw hash function (`hashint8extended`) if needed.
|
13
|
+
# This is the core PostgreSQL hash function for bigint values.
|
14
|
+
#
|
15
|
+
# @param value [Integer] The integer value to hash.
|
16
|
+
# @param seed [Integer] The 64-bit seed. Defaults to PostgreSQL's standard HASH_PARTITION_SEED.
|
17
|
+
# @return [Integer] The 64-bit hash result (as a Ruby Integer).
|
18
|
+
def self.hashint8extended(value:, seed: Hasher::HASH_PARTITION_SEED)
|
19
|
+
Hasher.hashint8extended(value: value, seed: seed)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Calculates the target partition index for a given bigint (int8) value based on
|
23
|
+
# PostgreSQL's default hash partitioning strategy.
|
24
|
+
# Mimics (hashint8extended(value, seed) + magic) % num_partitions using uint64 arithmetic.
|
25
|
+
#
|
26
|
+
# @param value [Integer] The partitioning key value (treated as bigint).
|
27
|
+
# @param num_partitions [Integer] The number of partitions for this level.
|
28
|
+
# @param seed [Integer] The 64-bit seed. Defaults to PostgreSQL's standard HASH_PARTITION_SEED.
|
29
|
+
# @param magic_constant [Integer] The magic constant. Defaults to PostgreSQL's standard PARTITION_MAGIC_CONSTANT.
|
30
|
+
# @return [Integer] The calculated partition index (0-based).
|
31
|
+
def self.calculate_partition_index_bigint(value:, num_partitions:, seed: Hasher::HASH_PARTITION_SEED,
|
32
|
+
magic_constant: Hasher::PARTITION_MAGIC_CONSTANT)
|
33
|
+
Hasher.calculate_partition_index_bigint(value: value, seed: seed, magic_constant: magic_constant,
|
34
|
+
num_partitions: num_partitions)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Calculates the target partition index for a given integer (int4) or smallint (int2) value based on
|
38
|
+
# PostgreSQL's default hash partitioning strategy.
|
39
|
+
# Mimics (hashint4extended(value, seed) + magic) % num_partitions using uint64 arithmetic.
|
40
|
+
# Note: PostgreSQL uses the same hash function (`hashint4extended` equivalent) for both int2 and int4.
|
41
|
+
#
|
42
|
+
# @param value [Integer] The partitioning key value (treated as int4/int2).
|
43
|
+
# @param num_partitions [Integer] The number of partitions for this level.
|
44
|
+
# @param seed [Integer] The 64-bit seed. Defaults to PostgreSQL's standard HASH_PARTITION_SEED.
|
45
|
+
# @param magic_constant [Integer] The magic constant. Defaults to PostgreSQL's standard PARTITION_MAGIC_CONSTANT.
|
46
|
+
# @return [Integer] The calculated partition index (0-based).
|
47
|
+
def self.calculate_partition_index_int4(value:, num_partitions:, seed: Hasher::HASH_PARTITION_SEED,
|
48
|
+
magic_constant: Hasher::PARTITION_MAGIC_CONSTANT)
|
49
|
+
Hasher.calculate_partition_index_int4(value: value, seed: seed, magic_constant: magic_constant,
|
50
|
+
num_partitions: num_partitions)
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/pg_hash_func/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "pg_hash_func"
|
7
|
+
spec.version = PgHashFunc::VERSION
|
8
|
+
spec.authors = ["Shayon Mukherjee"]
|
9
|
+
spec.email = ["shayonj@gmail.com"]
|
10
|
+
|
11
|
+
spec.summary = "Determine the target partition index for an integer key according " \
|
12
|
+
"to PostgreSQL's default hash strategy, without querying the database."
|
13
|
+
spec.description = <<~DESC
|
14
|
+
Replicates PostgreSQL's default hash partitioning calculations.
|
15
|
+
Specifically targets the logic within `hashint8extended` (for bigint)
|
16
|
+
and `hashint4extended` (for integer/smallint) from PostgreSQL's
|
17
|
+
`src/backend/access/hash/hashfunc.c`.
|
18
|
+
DESC
|
19
|
+
spec.homepage = "https://github.com/shayonj/pg_hash_func"
|
20
|
+
spec.license = "MIT"
|
21
|
+
spec.required_ruby_version = ">= 3.0.0"
|
22
|
+
|
23
|
+
if spec.respond_to?(:metadata)
|
24
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
25
|
+
spec.metadata["source_code_uri"] = "https://github.com/shayonj/pg_hash_func"
|
26
|
+
spec.metadata["changelog_uri"] = "https://github.com/shayonj/pg_hash_func/blob/main/CHANGELOG.md"
|
27
|
+
else
|
28
|
+
raise "RubyGems 2.0 or newer is required to protect against " \
|
29
|
+
"public gem pushes."
|
30
|
+
end
|
31
|
+
|
32
|
+
# Specify which files should be added to the gem when it is released.
|
33
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
34
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
35
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
36
|
+
f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
|
37
|
+
end
|
38
|
+
end
|
39
|
+
spec.bindir = "exe"
|
40
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
41
|
+
spec.require_paths = ["lib"]
|
42
|
+
spec.metadata["rubygems_mfa_required"] = "true"
|
43
|
+
end
|
data/scripts/release.sh
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
set -euo pipefail
|
4
|
+
|
5
|
+
export VERSION=$1
|
6
|
+
echo "VERSION: ${VERSION}"
|
7
|
+
|
8
|
+
echo "=== Building Gem ===="
|
9
|
+
gem build pg_hash_func.gemspec
|
10
|
+
|
11
|
+
echo "=== Pushing gem ===="
|
12
|
+
gem push pg_hash_func-"$VERSION".gem
|
13
|
+
|
14
|
+
echo "=== Sleeping for 15s ===="
|
15
|
+
sleep 15
|
16
|
+
|
17
|
+
echo "=== Pushing tags to github ===="
|
18
|
+
git tag v"$VERSION"
|
19
|
+
git push origin --tags
|
20
|
+
|
21
|
+
echo "=== Cleaning up ===="
|
22
|
+
rm pg_hash_func-"$VERSION".gem
|
metadata
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pg_hash_func
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Shayon Mukherjee
|
8
|
+
bindir: exe
|
9
|
+
cert_chain: []
|
10
|
+
date: 2025-04-26 00:00:00.000000000 Z
|
11
|
+
dependencies: []
|
12
|
+
description: |
|
13
|
+
Replicates PostgreSQL's default hash partitioning calculations.
|
14
|
+
Specifically targets the logic within `hashint8extended` (for bigint)
|
15
|
+
and `hashint4extended` (for integer/smallint) from PostgreSQL's
|
16
|
+
`src/backend/access/hash/hashfunc.c`.
|
17
|
+
email:
|
18
|
+
- shayonj@gmail.com
|
19
|
+
executables: []
|
20
|
+
extensions: []
|
21
|
+
extra_rdoc_files: []
|
22
|
+
files:
|
23
|
+
- ".rspec"
|
24
|
+
- ".rubocop.yml"
|
25
|
+
- ".ruby-version"
|
26
|
+
- CODE_OF_CONDUCT.md
|
27
|
+
- Gemfile
|
28
|
+
- Gemfile.lock
|
29
|
+
- LICENSE.txt
|
30
|
+
- README.md
|
31
|
+
- Rakefile
|
32
|
+
- benchmarks/file.rb
|
33
|
+
- lib/pg_hash_func.rb
|
34
|
+
- lib/pg_hash_func/hasher.rb
|
35
|
+
- lib/pg_hash_func/version.rb
|
36
|
+
- pg_hash_func.gemspec
|
37
|
+
- scripts/release.sh
|
38
|
+
homepage: https://github.com/shayonj/pg_hash_func
|
39
|
+
licenses:
|
40
|
+
- MIT
|
41
|
+
metadata:
|
42
|
+
homepage_uri: https://github.com/shayonj/pg_hash_func
|
43
|
+
source_code_uri: https://github.com/shayonj/pg_hash_func
|
44
|
+
changelog_uri: https://github.com/shayonj/pg_hash_func/blob/main/CHANGELOG.md
|
45
|
+
rubygems_mfa_required: 'true'
|
46
|
+
rdoc_options: []
|
47
|
+
require_paths:
|
48
|
+
- lib
|
49
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 3.0.0
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
requirements: []
|
60
|
+
rubygems_version: 3.6.6
|
61
|
+
specification_version: 4
|
62
|
+
summary: Determine the target partition index for an integer key according to PostgreSQL's
|
63
|
+
default hash strategy, without querying the database.
|
64
|
+
test_files: []
|