pii_cipher 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,131 @@
1
+ use magnus::{define_module, function, prelude::*, Error};
2
+ use hmac::{Hmac, Mac};
3
+ use sha2::Sha256;
4
+
5
+ type HmacSha256 = Hmac<Sha256>;
6
+
7
+ // Generate an array of HMAC-SHA256 hashes, one per sliding n-gram window of `n`
8
+ // characters across `text`. `n` defaults to 3 on the Ruby side (trigrams), but
9
+ // is configurable per attribute via `use_pii_cipher(..., gram_size:)`.
10
+ //
11
+ // Case-folding / normalization is handled on the Ruby side before this is
12
+ // called, so the same transformation is applied consistently on writes and
13
+ // queries. This function hashes exactly what it is given.
14
+ fn generate_ngram_hashes(text: String, secret_key: String, n: usize) -> Vec<String> {
15
+ let mut hashes = Vec::new();
16
+
17
+ // A window size of 0 is meaningless; return nothing rather than panic.
18
+ if n == 0 {
19
+ return hashes;
20
+ }
21
+
22
+ let chars: Vec<char> = text.chars().collect();
23
+
24
+ // If the value is shorter than the window there are no full n-grams to
25
+ // slide, so fall back to hashing the whole value. This keeps short values
26
+ // (and short exact-equal search terms) matchable against each other.
27
+ if chars.len() < n {
28
+ hashes.push(hash_string(&text, &secret_key));
29
+ return hashes;
30
+ }
31
+
32
+ // Slide a window of `n` characters across the string.
33
+ for i in 0..=(chars.len() - n) {
34
+ let gram: String = chars[i..i + n].iter().collect();
35
+ hashes.push(hash_string(&gram, &secret_key));
36
+ }
37
+
38
+ hashes
39
+ }
40
+
41
+ // Helper function to create an HMAC-SHA256 hash, hex-encoded (lowercase).
42
+ fn hash_string(data: &str, key: &str) -> String {
43
+ let mut mac =
44
+ HmacSha256::new_from_slice(key.as_bytes()).expect("HMAC can take key of any size");
45
+ mac.update(data.as_bytes());
46
+ let result = mac.finalize();
47
+ format!("{:x}", result.into_bytes())
48
+ }
49
+
50
+ // Returns a single HMAC hash of the full value — used for exact-match blind indexing.
51
+ fn generate_blind_index(text: String, secret_key: String) -> String {
52
+ hash_string(&text, &secret_key)
53
+ }
54
+
55
+ // The Ruby initialization point
56
+ #[magnus::init]
57
+ fn init() -> Result<(), Error> {
58
+ let module = define_module("PiiCipher")?;
59
+ module.define_singleton_method("generate_ngram_hashes", function!(generate_ngram_hashes, 3))?;
60
+ module.define_singleton_method("generate_blind_index", function!(generate_blind_index, 2))?;
61
+ Ok(())
62
+ }
63
+
64
+ #[cfg(test)]
65
+ mod tests {
66
+ use super::*;
67
+
68
+ const KEY: &str = "test-secret-key";
69
+
70
+ #[test]
71
+ fn blind_index_is_deterministic() {
72
+ assert_eq!(
73
+ generate_blind_index("alice@example.com".into(), KEY.into()),
74
+ generate_blind_index("alice@example.com".into(), KEY.into())
75
+ );
76
+ }
77
+
78
+ #[test]
79
+ fn blind_index_is_key_sensitive() {
80
+ assert_ne!(
81
+ generate_blind_index("alice".into(), KEY.into()),
82
+ generate_blind_index("alice".into(), "other-key".into())
83
+ );
84
+ }
85
+
86
+ #[test]
87
+ fn blind_index_is_hex_sha256_length() {
88
+ // HMAC-SHA256 -> 32 bytes -> 64 hex chars.
89
+ assert_eq!(generate_blind_index("x".into(), KEY.into()).len(), 64);
90
+ }
91
+
92
+ #[test]
93
+ fn trigrams_produce_one_hash_per_window() {
94
+ // "Smith" has 5 chars -> 3 trigrams (Smi, mit, ith).
95
+ let hashes = generate_ngram_hashes("Smith".into(), KEY.into(), 3);
96
+ assert_eq!(hashes.len(), 3);
97
+ }
98
+
99
+ #[test]
100
+ fn ngram_size_is_configurable() {
101
+ // 4-grams of "Smith" -> Smit, mith == 2 windows.
102
+ let hashes = generate_ngram_hashes("Smith".into(), KEY.into(), 4);
103
+ assert_eq!(hashes.len(), 2);
104
+ }
105
+
106
+ #[test]
107
+ fn value_shorter_than_window_hashes_whole_value() {
108
+ let hashes = generate_ngram_hashes("ab".into(), KEY.into(), 3);
109
+ assert_eq!(hashes.len(), 1);
110
+ assert_eq!(hashes[0], generate_blind_index("ab".into(), KEY.into()));
111
+ }
112
+
113
+ #[test]
114
+ fn zero_window_returns_empty() {
115
+ assert!(generate_ngram_hashes("anything".into(), KEY.into(), 0).is_empty());
116
+ }
117
+
118
+ #[test]
119
+ fn ngram_hashes_match_blind_index_of_each_window() {
120
+ let hashes = generate_ngram_hashes("abcd".into(), KEY.into(), 3);
121
+ assert_eq!(hashes[0], generate_blind_index("abc".into(), KEY.into()));
122
+ assert_eq!(hashes[1], generate_blind_index("bcd".into(), KEY.into()));
123
+ }
124
+
125
+ #[test]
126
+ fn handles_multibyte_characters_by_scalar() {
127
+ // 3 scalar chars -> 1 trigram window.
128
+ let hashes = generate_ngram_hashes("áéí".into(), KEY.into(), 3);
129
+ assert_eq!(hashes.len(), 1);
130
+ }
131
+ }
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ # lib/pii_cipher/active_record_ext.rb
4
+ require "active_support/concern"
5
+
6
+ module PiiCipher
7
+ # Default sliding-window size for partial (n-gram) blind indexes.
8
+ DEFAULT_GRAM_SIZE = 3
9
+
10
+ module ActiveRecordExt
11
+ extend ActiveSupport::Concern
12
+
13
+ class_methods do
14
+ # Declare one or more attributes as searchable encrypted PII.
15
+ #
16
+ # use_pii_cipher :email # partial trigram search
17
+ # use_pii_cipher :ssn, partial: false # exact-match search
18
+ # use_pii_cipher :name, gram_size: 4 # 4-gram partial search
19
+ # use_pii_cipher :email, case_sensitive: true # do not downcase
20
+ #
21
+ # Options:
22
+ # partial: true -> trigram/n-gram array in `<attr>_bidx_array`
23
+ # false -> single hash in `<attr>_bidx`
24
+ # gram_size: window size for partial search (default: 3). Ignored
25
+ # when partial: false.
26
+ # case_sensitive: false (default) downcases values before hashing so
27
+ # searches are case-insensitive. Must match between the
28
+ # stored index and queries — changing it invalidates
29
+ # existing indexes.
30
+ def use_pii_cipher(*attributes, partial: true, gram_size: PiiCipher::DEFAULT_GRAM_SIZE,
31
+ case_sensitive: false)
32
+ if partial && (!gram_size.is_a?(Integer) || gram_size < 1)
33
+ raise ArgumentError, "gram_size must be a positive integer (got #{gram_size.inspect})"
34
+ end
35
+
36
+ # Registry of which attributes are indexed and how. Built with merge so
37
+ # repeated calls accumulate, and so subclasses (STI) get their own copy
38
+ # rather than mutating a parent's shared hash.
39
+ class_attribute :pii_cipher_configs unless respond_to?(:pii_cipher_configs)
40
+ self.pii_cipher_configs ||= {}
41
+
42
+ new_configs = attributes.each_with_object({}) do |attr, acc|
43
+ acc[attr.to_sym] = {
44
+ partial: partial,
45
+ gram_size: gram_size,
46
+ case_sensitive: case_sensitive
47
+ }
48
+ end
49
+ self.pii_cipher_configs = pii_cipher_configs.merge(new_configs)
50
+
51
+ # Install callbacks and the query patch once per model.
52
+ unless defined?(@_pii_cipher_configured) && @_pii_cipher_configured
53
+ before_save :generate_pii_ciphers!
54
+ PiiCipher.install_query_patch!
55
+ @_pii_cipher_configured = true
56
+ end
57
+ end
58
+ end
59
+
60
+ private
61
+
62
+ # Runs automatically before `record.save`. Reads the still-plaintext value
63
+ # (Rails AR Encryption serializes at the DB layer, not via callbacks) and
64
+ # writes the blind index(es).
65
+ def generate_pii_ciphers!
66
+ secret = PiiCipher.secret_key
67
+
68
+ self.class.pii_cipher_configs.each do |column, config|
69
+ raw_value = send(column)
70
+
71
+ if raw_value.blank?
72
+ # Clear any stale index so a removed/blanked value stops matching.
73
+ if config[:partial]
74
+ send("#{column}_bidx_array=", nil)
75
+ else
76
+ send("#{column}_bidx=", nil)
77
+ end
78
+ next
79
+ end
80
+
81
+ value = PiiCipher.normalize(raw_value, config)
82
+
83
+ if config[:partial]
84
+ hashes = PiiCipher.generate_ngram_hashes(value, secret, config[:gram_size])
85
+ send("#{column}_bidx_array=", hashes)
86
+ else
87
+ send("#{column}_bidx=", PiiCipher.generate_blind_index(value, secret))
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ # lib/pii_cipher/query_interceptor.rb
4
+
5
+ module PiiCipher
6
+ # Prepended onto ActiveRecord::Relation so that `where(hash)` is rewritten to
7
+ # search blind indexes — for the model class itself AND for any relation
8
+ # derived from it. Because `Model.where(...)` delegates to `Model.all.where`,
9
+ # patching the relation also covers class-level calls, scopes, and chains
10
+ # like `Model.active.where(email: "alice")`.
11
+ #
12
+ # Only hash-form `where` on attributes declared with `use_pii_cipher` is
13
+ # rewritten. String/array conditions, and models that don't use PiiCipher,
14
+ # pass straight through to ActiveRecord untouched.
15
+ module RelationExt
16
+ def where(*args, &block)
17
+ opts = args.first
18
+
19
+ configs = pii_cipher_configs_for_relation
20
+ if configs && opts.is_a?(Hash)
21
+ encrypted_keys = opts.keys.select { |k| configs.key?(k.to_sym) }
22
+
23
+ if encrypted_keys.any?
24
+ secret = PiiCipher.secret_key
25
+ # Dup so we never mutate the caller's hash (e.g. `where(params)`).
26
+ remaining = opts.dup
27
+ relation = self
28
+
29
+ encrypted_keys.each do |key|
30
+ raw_term = remaining.delete(key)
31
+ config = configs[key.to_sym]
32
+
33
+ # nil means "search for records with no value" — match the cleared
34
+ # (NULL) blind index rather than hashing nil.
35
+ if raw_term.nil?
36
+ column = config[:partial] ? "#{key}_bidx_array" : "#{key}_bidx"
37
+ relation = relation.where(column => nil)
38
+ next
39
+ end
40
+
41
+ value = PiiCipher.normalize(raw_term, config)
42
+
43
+ relation =
44
+ if config[:partial]
45
+ hashes = PiiCipher.generate_ngram_hashes(value, secret, config[:gram_size])
46
+ relation.where("#{key}_bidx_array @> ?::jsonb", hashes.to_json)
47
+ else
48
+ relation.where("#{key}_bidx" => PiiCipher.generate_blind_index(value, secret))
49
+ end
50
+ end
51
+
52
+ # Chain any remaining standard columns (e.g. status: "active").
53
+ relation = relation.where(remaining) if remaining.any?
54
+ return relation
55
+ end
56
+ end
57
+
58
+ super
59
+ end
60
+
61
+ private
62
+
63
+ # The PiiCipher config for this relation's model, or nil if it doesn't use
64
+ # PiiCipher. Guarded so prepending to the shared Relation class is a no-op
65
+ # for every other model.
66
+ def pii_cipher_configs_for_relation
67
+ k = klass
68
+ return nil unless k.respond_to?(:pii_cipher_configs)
69
+
70
+ k.pii_cipher_configs
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ # lib/pii_cipher/railtie.rb
4
+ require "rails/railtie"
5
+
6
+ module PiiCipher
7
+ class Railtie < Rails::Railtie
8
+ initializer "pii_cipher.initialize" do
9
+ ActiveSupport.on_load(:active_record) do
10
+ # Injects our macro into ApplicationRecord automatically.
11
+ include PiiCipher::ActiveRecordExt
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PiiCipher
4
+ VERSION = "0.1.0"
5
+ end
data/lib/pii_cipher.rb ADDED
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ # lib/pii_cipher.rb
4
+ require_relative "pii_cipher/version"
5
+
6
+ # 1. Load the compiled Rust extension. It defines:
7
+ # PiiCipher.generate_ngram_hashes(text, secret, n) -> [hash, ...]
8
+ # PiiCipher.generate_blind_index(text, secret) -> hash
9
+ require_relative "pii_cipher/pii_cipher"
10
+
11
+ # 2. Load our Ruby logic
12
+ require_relative "pii_cipher/active_record_ext"
13
+ require_relative "pii_cipher/query_interceptor"
14
+
15
+ module PiiCipher
16
+ # Raised when the HMAC secret key is not configured.
17
+ class MissingSecretKeyError < StandardError; end
18
+
19
+ class << self
20
+ # The HMAC secret used for all blind indexes. Read from the
21
+ # `PII_SECRET_KEY` environment variable. Changing it invalidates every
22
+ # existing blind index.
23
+ def secret_key
24
+ ENV.fetch("PII_SECRET_KEY") do
25
+ raise MissingSecretKeyError,
26
+ "PII_SECRET_KEY is not set. PiiCipher needs it to generate blind indexes."
27
+ end
28
+ end
29
+
30
+ # Apply the per-attribute normalization (currently just case-folding) that
31
+ # must be identical on writes and queries.
32
+ def normalize(value, config)
33
+ str = value.to_s
34
+ config[:case_sensitive] ? str : str.downcase
35
+ end
36
+
37
+ # Idempotently prepend the query patch onto ActiveRecord::Relation. Called
38
+ # the first time any model declares `use_pii_cipher`, by which point
39
+ # ActiveRecord is guaranteed to be loaded. Guarded so it only happens once.
40
+ def install_query_patch!
41
+ return if @query_patch_installed
42
+ return unless defined?(ActiveRecord::Relation)
43
+
44
+ ActiveRecord::Relation.prepend(PiiCipher::RelationExt)
45
+ @query_patch_installed = true
46
+ end
47
+ end
48
+ end
49
+
50
+ # 3. Load the Railtie ONLY if Rails is present in the user's app
51
+ require_relative "pii_cipher/railtie" if defined?(Rails)
data/mise.toml ADDED
@@ -0,0 +1,2 @@
1
+ [tools]
2
+ ruby = "3.3"
@@ -0,0 +1,16 @@
1
+ module PiiCipher
2
+ VERSION: String
3
+ DEFAULT_GRAM_SIZE: Integer
4
+
5
+ class MissingSecretKeyError < StandardError
6
+ end
7
+
8
+ # Implemented in the Rust extension.
9
+ def self.generate_ngram_hashes: (String text, String secret_key, Integer n) -> Array[String]
10
+ def self.generate_blind_index: (String text, String secret_key) -> String
11
+
12
+ # Implemented in Ruby (lib/pii_cipher.rb).
13
+ def self.secret_key: () -> String
14
+ def self.normalize: (untyped value, Hash[Symbol, untyped] config) -> String
15
+ def self.install_query_patch!: () -> void
16
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pii_cipher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Selva Chezhian
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2026-06-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rb_sys
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.128
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.128
27
+ - !ruby/object:Gem::Dependency
28
+ name: activerecord
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '7.1'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '7.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: railties
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '7.1'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '7.1'
55
+ description: |
56
+ PiiCipher lets you search encrypted PII columns in ActiveRecord without ever
57
+ storing or querying plaintext. It generates HMAC-SHA256 blind indexes alongside
58
+ your ciphertext — trigram arrays for partial (substring) searches and single
59
+ hashes for exact-match lookups. The hash functions run in a native Rust
60
+ extension for performance. Query interception is transparent: call `where`
61
+ as normal and PiiCipher rewrites the query against the blind index automatically.
62
+ email:
63
+ - selvachezhian.labam@gmail.com
64
+ executables: []
65
+ extensions:
66
+ - ext/pii_cipher/extconf.rb
67
+ extra_rdoc_files: []
68
+ files:
69
+ - CHANGELOG.md
70
+ - CODE_OF_CONDUCT.md
71
+ - Cargo.lock
72
+ - Cargo.toml
73
+ - LICENSE.txt
74
+ - README.md
75
+ - Rakefile
76
+ - benchmarks/run.rb
77
+ - ext/pii_cipher/Cargo.toml
78
+ - ext/pii_cipher/build.rs
79
+ - ext/pii_cipher/extconf.rb
80
+ - ext/pii_cipher/src/lib.rs
81
+ - lib/pii_cipher.rb
82
+ - lib/pii_cipher/active_record_ext.rb
83
+ - lib/pii_cipher/query_interceptor.rb
84
+ - lib/pii_cipher/railtie.rb
85
+ - lib/pii_cipher/version.rb
86
+ - mise.toml
87
+ - sig/pii_cipher.rbs
88
+ homepage: https://github.com/selvachezhian/pii_cipher
89
+ licenses:
90
+ - MIT
91
+ metadata:
92
+ allowed_push_host: https://rubygems.org
93
+ homepage_uri: https://github.com/selvachezhian/pii_cipher
94
+ source_code_uri: https://github.com/selvachezhian/pii_cipher
95
+ changelog_uri: https://github.com/selvachezhian/pii_cipher/blob/main/CHANGELOG.md
96
+ post_install_message:
97
+ rdoc_options: []
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: 3.1.0
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ requirements: []
111
+ rubygems_version: 3.5.22
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: Searchable blind indexing for PII fields in Rails, powered by a Rust extension.
115
+ test_files: []