lex-privatecore 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/lex-privatecore.gemspec +2 -0
- data/lib/legion/extensions/privatecore/actors/audit_prune.rb +1 -1
- data/lib/legion/extensions/privatecore/client.rb +3 -0
- data/lib/legion/extensions/privatecore/helpers/boundary.rb +122 -18
- data/lib/legion/extensions/privatecore/helpers/ner_client.rb +113 -0
- data/lib/legion/extensions/privatecore/helpers/patterns.rb +174 -0
- data/lib/legion/extensions/privatecore/helpers/redactor.rb +112 -0
- data/lib/legion/extensions/privatecore/runners/embedding_guard.rb +3 -3
- data/lib/legion/extensions/privatecore/runners/privatecore.rb +40 -15
- data/lib/legion/extensions/privatecore/version.rb +1 -1
- data/lib/legion/extensions/privatecore.rb +4 -1
- data/spec/legion/extensions/privatecore/helpers/boundary_spec.rb +43 -155
- data/spec/legion/extensions/privatecore/helpers/ner_client_spec.rb +109 -0
- data/spec/legion/extensions/privatecore/helpers/patterns_spec.rb +251 -0
- data/spec/legion/extensions/privatecore/helpers/redactor_spec.rb +137 -0
- data/spec/legion/extensions/privatecore/runners/privatecore_spec.rb +48 -0
- metadata +21 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0a5caecc722556ab9de44e192040ce1445eb96741e40e3ae18a6132af59a9d6a
|
|
4
|
+
data.tar.gz: '09f8d7852b8cd3f75f081fa7aa17a2fbd76156ed8a058ad942a0e982ffc8a2a0'
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b26c19b98e4adf30b439d6ef01c4f920b51ef200c94d83e50e789dac6b65519d89e9765b7ad6b38c8239c307f78e9f00384285cbccb4a65bdf953b526a106952
|
|
7
|
+
data.tar.gz: f38425334a86061f4e7e21ce91bcb77298c547ee414df1aeca80a2cde5e4dc8d929d01f69aed2dca6d69b919724cef9945573d4c493011bd4d5863ea7b8c62ce
|
data/Gemfile
CHANGED
data/lex-privatecore.gemspec
CHANGED
|
@@ -6,7 +6,7 @@ module Legion
|
|
|
6
6
|
module Extensions
|
|
7
7
|
module Privatecore
|
|
8
8
|
module Actor
|
|
9
|
-
class AuditPrune < Legion::Extensions::Actors::Every
|
|
9
|
+
class AuditPrune < Legion::Extensions::Actors::Every # rubocop:disable Legion/Extension/EveryActorRequiresTime
|
|
10
10
|
def runner_class
|
|
11
11
|
Legion::Extensions::Privatecore::Runners::Privatecore
|
|
12
12
|
end
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'legion/extensions/privatecore/helpers/patterns'
|
|
4
|
+
require 'legion/extensions/privatecore/helpers/redactor'
|
|
5
|
+
require 'legion/extensions/privatecore/helpers/ner_client'
|
|
3
6
|
require 'legion/extensions/privatecore/helpers/boundary'
|
|
4
7
|
require 'legion/extensions/privatecore/helpers/erasure'
|
|
5
8
|
require 'legion/extensions/privatecore/helpers/similarity'
|
|
@@ -5,15 +5,6 @@ module Legion
|
|
|
5
5
|
module Privatecore
|
|
6
6
|
module Helpers
|
|
7
7
|
module Boundary
|
|
8
|
-
# PII patterns to strip before boundary crossing
|
|
9
|
-
PII_PATTERNS = {
|
|
10
|
-
email: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/,
|
|
11
|
-
phone: /\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/,
|
|
12
|
-
ssn: /\b\d{3}-\d{2}-\d{4}\b/,
|
|
13
|
-
ip: /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/
|
|
14
|
-
}.freeze
|
|
15
|
-
|
|
16
|
-
# Probe detection patterns (attempts to extract private data)
|
|
17
8
|
PROBE_PATTERNS = [
|
|
18
9
|
/what (?:does|did) .+ tell you/i,
|
|
19
10
|
/share .+ private/i,
|
|
@@ -25,16 +16,48 @@ module Legion
|
|
|
25
16
|
REDACTION_MARKER = '[REDACTED]'
|
|
26
17
|
MAX_AUDIT_LOG_SIZE = 1000
|
|
27
18
|
|
|
19
|
+
DEFAULT_ENABLED = %i[email phone ssn ip].freeze
|
|
20
|
+
DEFAULT_MODE = :redact
|
|
21
|
+
|
|
28
22
|
module_function
|
|
29
23
|
|
|
30
|
-
def strip_pii(text)
|
|
31
|
-
return text unless text.is_a?(String)
|
|
24
|
+
def strip_pii(text, mode: nil, service_url: nil)
|
|
25
|
+
return { cleaned: text, mapping: {}, detections: [], source: :none } unless text.is_a?(String)
|
|
26
|
+
|
|
27
|
+
effective_mode = resolve_setting(mode, :redaction, :mode) || DEFAULT_MODE
|
|
28
|
+
effective_enabled = resolve_setting(nil, :patterns, :enabled) || DEFAULT_ENABLED
|
|
29
|
+
effective_validation = resolve_setting(nil, :patterns, :validation) || {}
|
|
30
|
+
|
|
31
|
+
detections = Patterns.detect(text, enabled: effective_enabled, validation: effective_validation)
|
|
32
|
+
ner_fallback = apply_ner(detections, text, service_url)
|
|
33
|
+
|
|
34
|
+
result = Redactor.redact(text, detections: detections, mode: effective_mode)
|
|
35
|
+
source = determine_source(detections, ner_fallback)
|
|
36
|
+
mapping_key = persist_mapping_if_configured(result[:mapping])
|
|
37
|
+
|
|
38
|
+
result.merge(source: source, mapping_key: mapping_key)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def contains_pii?(text, service_url: nil)
|
|
42
|
+
return false unless text.is_a?(String)
|
|
43
|
+
|
|
44
|
+
effective_enabled = resolve_setting(nil, :patterns, :enabled) || DEFAULT_ENABLED
|
|
45
|
+
effective_validation = resolve_setting(nil, :patterns, :validation) || {}
|
|
32
46
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
47
|
+
detections = Patterns.detect(text, enabled: effective_enabled, validation: effective_validation)
|
|
48
|
+
return true unless detections.empty?
|
|
49
|
+
|
|
50
|
+
if service_url || ner_enabled?
|
|
51
|
+
ner_result = run_ner(text, service_url)
|
|
52
|
+
ner_detections = if ner_result.is_a?(Hash) && ner_result[:fallback]
|
|
53
|
+
ner_result[:detections]
|
|
54
|
+
else
|
|
55
|
+
ner_result
|
|
56
|
+
end
|
|
57
|
+
return true unless ner_detections.empty?
|
|
36
58
|
end
|
|
37
|
-
|
|
59
|
+
|
|
60
|
+
false
|
|
38
61
|
end
|
|
39
62
|
|
|
40
63
|
def detect_probe(text)
|
|
@@ -43,10 +66,91 @@ module Legion
|
|
|
43
66
|
PROBE_PATTERNS.any? { |p| p.match?(text) }
|
|
44
67
|
end
|
|
45
68
|
|
|
46
|
-
def
|
|
47
|
-
return false unless
|
|
69
|
+
def apply_ner(detections, text, service_url)
|
|
70
|
+
return false unless service_url || ner_enabled?
|
|
48
71
|
|
|
49
|
-
|
|
72
|
+
ner_result = run_ner(text, service_url)
|
|
73
|
+
if ner_result.is_a?(Hash) && ner_result[:fallback]
|
|
74
|
+
ner_detections = ner_result[:detections]
|
|
75
|
+
detections.replace(merge_detections(detections, ner_detections))
|
|
76
|
+
true
|
|
77
|
+
else
|
|
78
|
+
detections.replace(merge_detections(detections, ner_result))
|
|
79
|
+
false
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def determine_source(detections, ner_fallback)
|
|
84
|
+
has_ner = detections.any? { |d| d[:source] == :ner }
|
|
85
|
+
has_regex = detections.any? { |d| d[:source] != :ner }
|
|
86
|
+
|
|
87
|
+
if detections.empty?
|
|
88
|
+
:none
|
|
89
|
+
elsif ner_fallback
|
|
90
|
+
:regex_fallback
|
|
91
|
+
elsif has_ner && has_regex
|
|
92
|
+
:ner_and_regex
|
|
93
|
+
elsif has_ner
|
|
94
|
+
:ner
|
|
95
|
+
else
|
|
96
|
+
:regex
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def persist_mapping_if_configured(mapping)
|
|
101
|
+
return nil if mapping.empty?
|
|
102
|
+
return nil unless resolve_setting(nil, :redaction, :cache_mappings) == true
|
|
103
|
+
|
|
104
|
+
cache_ttl = resolve_setting(nil, :redaction, :cache_ttl) || 3600
|
|
105
|
+
Redactor.persist_mapping(mapping: mapping, key: nil, ttl: cache_ttl)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def resolve_setting(override, *keys)
|
|
109
|
+
return override unless override.nil?
|
|
110
|
+
return nil unless defined?(Legion::Settings)
|
|
111
|
+
|
|
112
|
+
Legion::Settings.dig(:privatecore, *keys)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def ner_enabled?
|
|
116
|
+
return false unless defined?(Legion::Settings)
|
|
117
|
+
|
|
118
|
+
Legion::Settings.dig(:privatecore, :ner, :enabled) == true
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def run_ner(text, service_url)
|
|
122
|
+
url = service_url || resolve_setting(nil, :ner, :service_url)
|
|
123
|
+
return [] unless url
|
|
124
|
+
|
|
125
|
+
allow_http = resolve_setting(nil, :ner, :allow_http) == true
|
|
126
|
+
return [] unless allow_http || url.start_with?('https://')
|
|
127
|
+
|
|
128
|
+
timeout = resolve_setting(nil, :ner, :timeout) || 5
|
|
129
|
+
fallback = resolve_setting(nil, :ner, :fallback) || :transparent
|
|
130
|
+
conn = NerClient.build_connection(service_url: url, timeout: timeout)
|
|
131
|
+
NerClient.analyze(text: text, connection: conn, fallback: fallback, timeout: timeout)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def merge_detections(regex_detections, ner_detections)
|
|
135
|
+
return regex_detections if ner_detections.empty?
|
|
136
|
+
return ner_detections if regex_detections.empty?
|
|
137
|
+
|
|
138
|
+
all = regex_detections.map { |d| d.merge(source: :regex) } +
|
|
139
|
+
ner_detections
|
|
140
|
+
all.sort_by! { |d| [d[:start], -(d[:end] - d[:start])] }
|
|
141
|
+
|
|
142
|
+
merged = []
|
|
143
|
+
all.each do |detection|
|
|
144
|
+
if merged.empty? || detection[:start] >= merged.last[:end]
|
|
145
|
+
merged << detection
|
|
146
|
+
else
|
|
147
|
+
prev = merged.last
|
|
148
|
+
det_span = detection[:end] - detection[:start]
|
|
149
|
+
prev_span = prev[:end] - prev[:start]
|
|
150
|
+
merged[-1] = detection if det_span > prev_span
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
merged
|
|
50
154
|
end
|
|
51
155
|
end
|
|
52
156
|
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'faraday'
|
|
4
|
+
require 'json'
|
|
5
|
+
|
|
6
|
+
module Legion
|
|
7
|
+
module Extensions
|
|
8
|
+
module Privatecore
|
|
9
|
+
module Helpers
|
|
10
|
+
module NerClient
|
|
11
|
+
class NerServiceUnavailable < StandardError; end
|
|
12
|
+
|
|
13
|
+
ENTITY_MAP = {
|
|
14
|
+
'EMAIL_ADDRESS' => :email,
|
|
15
|
+
'PHONE_NUMBER' => :phone,
|
|
16
|
+
'US_SSN' => :ssn,
|
|
17
|
+
'IP_ADDRESS' => :ip,
|
|
18
|
+
'CREDIT_CARD' => :credit_card,
|
|
19
|
+
'DATE_TIME' => :dob,
|
|
20
|
+
'MEDICAL_LICENSE' => :mrn,
|
|
21
|
+
'PERSON' => :person_name,
|
|
22
|
+
'ORGANIZATION' => :organization,
|
|
23
|
+
'LOCATION' => :location,
|
|
24
|
+
'IBAN_CODE' => :iban,
|
|
25
|
+
'US_PASSPORT' => :passport,
|
|
26
|
+
'US_DRIVER_LICENSE' => :drivers_license,
|
|
27
|
+
'CRYPTO' => :crypto,
|
|
28
|
+
'NRP' => :national_id
|
|
29
|
+
}.freeze
|
|
30
|
+
|
|
31
|
+
NER_CATEGORIES = {
|
|
32
|
+
person_name: :personal,
|
|
33
|
+
organization: :entity,
|
|
34
|
+
location: :location,
|
|
35
|
+
national_id: :government_id,
|
|
36
|
+
crypto: :crypto
|
|
37
|
+
}.freeze
|
|
38
|
+
|
|
39
|
+
module_function
|
|
40
|
+
|
|
41
|
+
def analyze(text:, connection:, fallback: :transparent, timeout: 5)
|
|
42
|
+
response = connection.post do |req|
|
|
43
|
+
req.headers['Content-Type'] = 'application/json'
|
|
44
|
+
req.body = ::JSON.generate(text: text, language: 'en')
|
|
45
|
+
req.options.timeout = timeout
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
parse_response(response, text)
|
|
49
|
+
rescue Faraday::Error, ::JSON::ParserError => e
|
|
50
|
+
handle_fallback(fallback, e)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def available?(connection:)
|
|
54
|
+
response = connection.get('/health')
|
|
55
|
+
response.status == 200
|
|
56
|
+
rescue Faraday::Error => e
|
|
57
|
+
Legion::Logging.warn "[privatecore] NER health check failed: #{e.message}" # rubocop:disable Legion/HelperMigration/DirectLogging
|
|
58
|
+
false
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def build_connection(service_url:, timeout: 5)
|
|
62
|
+
require 'faraday'
|
|
63
|
+
Faraday.new(url: service_url) do |f|
|
|
64
|
+
f.options.timeout = timeout
|
|
65
|
+
f.options.open_timeout = timeout
|
|
66
|
+
f.adapter Faraday.default_adapter
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def parse_response(response, text)
|
|
71
|
+
return [] unless response.status == 200
|
|
72
|
+
|
|
73
|
+
entities = ::JSON.parse(response.body)
|
|
74
|
+
entities.filter_map do |entity|
|
|
75
|
+
type = ENTITY_MAP[entity['entity_type']]
|
|
76
|
+
next unless type
|
|
77
|
+
|
|
78
|
+
category = NER_CATEGORIES[type]
|
|
79
|
+
if category.nil?
|
|
80
|
+
begin
|
|
81
|
+
category = Patterns::PATTERNS.dig(type, :category) || :unknown
|
|
82
|
+
rescue NameError => _e
|
|
83
|
+
category = :unknown
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
{
|
|
88
|
+
type: type,
|
|
89
|
+
category: category,
|
|
90
|
+
start: entity['start'],
|
|
91
|
+
end: entity['end'],
|
|
92
|
+
match: text[entity['start']...entity['end']],
|
|
93
|
+
score: entity['score'],
|
|
94
|
+
source: :ner
|
|
95
|
+
}
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def handle_fallback(fallback, error)
|
|
100
|
+
case fallback
|
|
101
|
+
when :transparent
|
|
102
|
+
{ fallback: true, detections: [] }
|
|
103
|
+
when :strict
|
|
104
|
+
raise NerServiceUnavailable, "NER service unavailable: #{error.message}"
|
|
105
|
+
else
|
|
106
|
+
[]
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Privatecore
|
|
6
|
+
module Helpers
|
|
7
|
+
module Patterns
|
|
8
|
+
PATTERNS = {
|
|
9
|
+
email: { regex: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/,
|
|
10
|
+
category: :contact },
|
|
11
|
+
phone: { regex: /\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/,
|
|
12
|
+
category: :contact },
|
|
13
|
+
ssn: { regex: /\b\d{3}-\d{2}-\d{4}\b/,
|
|
14
|
+
category: :government_id },
|
|
15
|
+
ip: { regex: /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/,
|
|
16
|
+
category: :network },
|
|
17
|
+
credit_card: { regex: /\b(?:\d[ -]*?){13,19}\b/,
|
|
18
|
+
category: :financial, checksum: :luhn },
|
|
19
|
+
dob: { regex: %r{(?:DOB|date of birth)\s*:\s*(\d{1,4}[-/]\d{1,2}[-/]\d{1,4})}i,
|
|
20
|
+
category: :personal },
|
|
21
|
+
mrn: { regex: /(?:MRN|medical record)\s*:\s*(\d{5,15})/i,
|
|
22
|
+
category: :medical },
|
|
23
|
+
passport: { regex: /\b[A-Z]\d{8}\b/,
|
|
24
|
+
category: :government_id },
|
|
25
|
+
iban: { regex: /\b[A-Z]{2}\d{2}[A-Z0-9]{11,30}\b/,
|
|
26
|
+
category: :financial, checksum: :iban },
|
|
27
|
+
drivers_license: { regex: /\b[A-Z]\d{3}-?\d{4}-?\d{4}\b/,
|
|
28
|
+
category: :government_id },
|
|
29
|
+
url: { regex: %r{https?://[^\s<>"{}|\\^`\[\]]+},
|
|
30
|
+
category: :network },
|
|
31
|
+
btc_address: { regex: /\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b/,
|
|
32
|
+
category: :crypto, checksum: :base58check },
|
|
33
|
+
eth_address: { regex: /\b0x[0-9a-fA-F]{40}\b/,
|
|
34
|
+
category: :crypto },
|
|
35
|
+
itin: { regex: /\b9\d{2}-[7-9]\d-\d{4}\b/,
|
|
36
|
+
category: :government_id },
|
|
37
|
+
aadhaar: { regex: /\b[2-9]\d{3}\s?\d{4}\s?\d{4}\b/,
|
|
38
|
+
category: :government_id, checksum: :verhoeff },
|
|
39
|
+
api_key: { regex: /\b(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{20,}\b/,
|
|
40
|
+
category: :credential },
|
|
41
|
+
bearer_token: { regex: %r{Bearer\s+[A-Za-z0-9\-._~+/]+=*},
|
|
42
|
+
category: :credential },
|
|
43
|
+
aws_key: { regex: /\bAKIA[0-9A-Z]{16}\b/,
|
|
44
|
+
category: :credential }
|
|
45
|
+
}.freeze
|
|
46
|
+
|
|
47
|
+
module_function
|
|
48
|
+
|
|
49
|
+
CHECKSUM_VALIDATORS = {
|
|
50
|
+
luhn: ->(digits) { luhn_valid?(digits) },
|
|
51
|
+
iban: ->(text) { iban_valid?(text) },
|
|
52
|
+
verhoeff: ->(digits) { verhoeff_valid?(digits) },
|
|
53
|
+
base58check: ->(addr) { base58check_valid?(addr) }
|
|
54
|
+
}.freeze
|
|
55
|
+
|
|
56
|
+
def detect(text, enabled:, validation:)
|
|
57
|
+
return [] unless text.is_a?(String)
|
|
58
|
+
|
|
59
|
+
detections = []
|
|
60
|
+
PATTERNS.each do |type, meta|
|
|
61
|
+
next unless enabled.include?(type)
|
|
62
|
+
|
|
63
|
+
text.scan(meta[:regex]) do
|
|
64
|
+
md = Regexp.last_match
|
|
65
|
+
capture_index = md.captures.each_index.find { |index| !md[index + 1].nil? }
|
|
66
|
+
match_index = capture_index ? capture_index + 1 : 0
|
|
67
|
+
matched_text = md[match_index]
|
|
68
|
+
next if validation[type] == :checksum && !validate_checksum(type, matched_text)
|
|
69
|
+
|
|
70
|
+
detections << {
|
|
71
|
+
type: type,
|
|
72
|
+
category: meta[:category],
|
|
73
|
+
start: md.begin(match_index),
|
|
74
|
+
end: md.end(match_index),
|
|
75
|
+
match: matched_text
|
|
76
|
+
}
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
detections
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def validate_checksum(type, match)
|
|
83
|
+
meta = PATTERNS[type]
|
|
84
|
+
return true unless meta && meta[:checksum]
|
|
85
|
+
|
|
86
|
+
validator = CHECKSUM_VALIDATORS[meta[:checksum]]
|
|
87
|
+
return true unless validator
|
|
88
|
+
|
|
89
|
+
cleaned = match.gsub(/[\s-]/, '')
|
|
90
|
+
validator.call(cleaned)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def luhn_valid?(number)
|
|
94
|
+
digits = number.chars.map(&:to_i)
|
|
95
|
+
sum = 0
|
|
96
|
+
digits.reverse.each_with_index do |d, i|
|
|
97
|
+
d *= 2 if i.odd?
|
|
98
|
+
d -= 9 if d > 9
|
|
99
|
+
sum += d
|
|
100
|
+
end
|
|
101
|
+
(sum % 10).zero?
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def iban_valid?(iban)
|
|
105
|
+
rearranged = iban[4..] + iban[0..3]
|
|
106
|
+
numeric = rearranged.chars.map { |c| c.match?(/\d/) ? c : (c.upcase.ord - 55).to_s }.join
|
|
107
|
+
(numeric.to_i % 97) == 1
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
VERHOEFF_D = [
|
|
111
|
+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [1, 2, 3, 4, 0, 6, 7, 8, 9, 5],
|
|
112
|
+
[2, 3, 4, 0, 1, 7, 8, 9, 5, 6], [3, 4, 0, 1, 2, 8, 9, 5, 6, 7],
|
|
113
|
+
[4, 0, 1, 2, 3, 9, 5, 6, 7, 8], [5, 9, 8, 7, 6, 0, 4, 3, 2, 1],
|
|
114
|
+
[6, 5, 9, 8, 7, 1, 0, 4, 3, 2], [7, 6, 5, 9, 8, 2, 1, 0, 4, 3],
|
|
115
|
+
[8, 7, 6, 5, 9, 3, 2, 1, 0, 4], [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
|
|
116
|
+
].freeze
|
|
117
|
+
|
|
118
|
+
VERHOEFF_P = [
|
|
119
|
+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [1, 5, 7, 6, 2, 8, 3, 0, 9, 4],
|
|
120
|
+
[5, 8, 0, 3, 7, 9, 6, 1, 4, 2], [8, 9, 1, 6, 0, 4, 3, 5, 2, 7],
|
|
121
|
+
[9, 4, 5, 3, 1, 2, 6, 8, 7, 0], [4, 2, 8, 6, 5, 7, 3, 9, 0, 1],
|
|
122
|
+
[2, 7, 9, 3, 8, 0, 6, 4, 1, 5], [7, 0, 4, 6, 9, 1, 3, 2, 5, 8]
|
|
123
|
+
].freeze
|
|
124
|
+
|
|
125
|
+
def verhoeff_valid?(number)
|
|
126
|
+
digits = number.chars.map(&:to_i).reverse
|
|
127
|
+
c = 0
|
|
128
|
+
digits.each_with_index { |d, i| c = VERHOEFF_D[c][VERHOEFF_P[i % 8][d]] }
|
|
129
|
+
c.zero?
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
BASE58_ALPHABET = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
|
133
|
+
|
|
134
|
+
def base58check_valid?(address)
|
|
135
|
+
return false unless address.match?(/\A[13][a-km-zA-HJ-NP-Z1-9]{25,34}\z/)
|
|
136
|
+
|
|
137
|
+
# Decode Base58 to integer
|
|
138
|
+
num = 0
|
|
139
|
+
address.each_char do |char|
|
|
140
|
+
index = BASE58_ALPHABET.index(char)
|
|
141
|
+
return false if index.nil?
|
|
142
|
+
|
|
143
|
+
num = (num * 58) + index
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Convert to variable-length big-endian bytes, then restore
|
|
147
|
+
# leading zero bytes represented by leading '1' characters.
|
|
148
|
+
bytes = []
|
|
149
|
+
while num.positive?
|
|
150
|
+
bytes.unshift(num & 0xff)
|
|
151
|
+
num >>= 8
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
leading_ones = address.each_char.take_while { |char| char == '1' }.size
|
|
155
|
+
bytes = ([0] * leading_ones) + bytes
|
|
156
|
+
|
|
157
|
+
# Base58Check P2PKH/P2SH addresses decode to:
|
|
158
|
+
# 1 version byte + 20 payload bytes + 4 checksum bytes.
|
|
159
|
+
return false unless bytes.size == 25
|
|
160
|
+
|
|
161
|
+
payload = bytes[0...-4]
|
|
162
|
+
checksum = bytes[-4..]
|
|
163
|
+
|
|
164
|
+
# Double SHA-256 of payload; compare first 4 bytes
|
|
165
|
+
require 'digest'
|
|
166
|
+
first_hash = Digest::SHA256.digest(payload.pack('C*'))
|
|
167
|
+
second_hash = Digest::SHA256.digest(first_hash)
|
|
168
|
+
second_hash.unpack('C*').first(4) == checksum
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'securerandom'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Privatecore
|
|
8
|
+
module Helpers
|
|
9
|
+
module Redactor
|
|
10
|
+
REDACTION_MARKER = '[REDACTED]'
|
|
11
|
+
|
|
12
|
+
module_function
|
|
13
|
+
|
|
14
|
+
def redact(text, detections:, mode:)
|
|
15
|
+
return { cleaned: text, mapping: {}, detections: detections } unless text.is_a?(String)
|
|
16
|
+
return { cleaned: text, mapping: {}, detections: detections } if detections.empty?
|
|
17
|
+
|
|
18
|
+
mapping = {}
|
|
19
|
+
type_counters = Hash.new(0)
|
|
20
|
+
cleaned = text.dup
|
|
21
|
+
|
|
22
|
+
sorted = detections.sort_by { |d| -d[:start] }
|
|
23
|
+
|
|
24
|
+
sorted.each do |detection|
|
|
25
|
+
replacement = build_replacement(detection, mode, type_counters, mapping)
|
|
26
|
+
cleaned[detection[:start]...detection[:end]] = replacement
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
{ cleaned: cleaned, mapping: mapping, detections: detections }
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def build_replacement(detection, mode, type_counters, mapping)
|
|
33
|
+
case mode
|
|
34
|
+
when :placeholder
|
|
35
|
+
type_counters[detection[:type]] += 1
|
|
36
|
+
tag = "[#{detection[:type].upcase}_#{type_counters[detection[:type]]}]"
|
|
37
|
+
mapping[tag] = detection[:match]
|
|
38
|
+
tag
|
|
39
|
+
when :mask
|
|
40
|
+
mask_value(detection[:match])
|
|
41
|
+
when :synthetic
|
|
42
|
+
fake = generate_synthetic(detection[:type], detection[:match])
|
|
43
|
+
mapping[fake] = detection[:match]
|
|
44
|
+
fake
|
|
45
|
+
else
|
|
46
|
+
REDACTION_MARKER
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def mask_value(original)
|
|
51
|
+
original.gsub(/[A-Za-z]/, '*').gsub(/\d/, '*')
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def generate_synthetic(type, original)
|
|
55
|
+
case type
|
|
56
|
+
when :ssn, :itin
|
|
57
|
+
"#{rand(100..999)}-#{rand(10..99)}-#{rand(1000..9999)}"
|
|
58
|
+
when :phone
|
|
59
|
+
"#{rand(200..999)}-#{rand(200..999)}-#{rand(1000..9999)}"
|
|
60
|
+
when :email
|
|
61
|
+
"user#{rand(1000..9999)}@example.net"
|
|
62
|
+
when :credit_card
|
|
63
|
+
generate_luhn_number(16)
|
|
64
|
+
when :ip
|
|
65
|
+
"#{rand(1..254)}.#{rand(0..255)}.#{rand(0..255)}.#{rand(1..254)}"
|
|
66
|
+
when :aadhaar
|
|
67
|
+
"#{rand(2000..9999)} #{rand(1000..9999)} #{rand(1000..9999)}"
|
|
68
|
+
when :passport
|
|
69
|
+
"#{('A'..'Z').to_a.sample}#{rand(10_000_000..99_999_999)}"
|
|
70
|
+
when :aws_key
|
|
71
|
+
"AKIA#{Array.new(16) { (('0'..'9').to_a + ('A'..'Z').to_a).sample }.join}"
|
|
72
|
+
else
|
|
73
|
+
SecureRandom.hex([((original.length + 1) / 2), 1].max)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def generate_luhn_number(length)
|
|
78
|
+
digits = Array.new(length - 1) { rand(0..9) }
|
|
79
|
+
sum = 0
|
|
80
|
+
digits.reverse.each_with_index do |d, i|
|
|
81
|
+
v = i.even? ? d * 2 : d
|
|
82
|
+
v -= 9 if v > 9
|
|
83
|
+
sum += v
|
|
84
|
+
end
|
|
85
|
+
check = (10 - (sum % 10)) % 10
|
|
86
|
+
(digits << check).join
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def restore(text:, mapping:)
|
|
90
|
+
return text if mapping.nil? || mapping.empty?
|
|
91
|
+
|
|
92
|
+
result = text.dup
|
|
93
|
+
mapping.each { |placeholder, original| result.gsub!(placeholder, original) }
|
|
94
|
+
result
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def persist_mapping(mapping:, key:, ttl:)
|
|
98
|
+
actual_key = key || SecureRandom.uuid
|
|
99
|
+
Legion::Cache.set("privatecore:mapping:#{actual_key}", mapping, ttl: ttl) if defined?(Legion::Cache) # rubocop:disable Legion/HelperMigration/DirectCache
|
|
100
|
+
actual_key
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def retrieve_mapping(key:)
|
|
104
|
+
return nil unless defined?(Legion::Cache)
|
|
105
|
+
|
|
106
|
+
Legion::Cache.get("privatecore:mapping:#{key}") # rubocop:disable Legion/HelperMigration/DirectCache
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
@@ -4,7 +4,7 @@ module Legion
|
|
|
4
4
|
module Extensions
|
|
5
5
|
module Privatecore
|
|
6
6
|
module Runners
|
|
7
|
-
module EmbeddingGuard
|
|
7
|
+
module EmbeddingGuard # rubocop:disable Legion/Extension/RunnerIncludeHelpers
|
|
8
8
|
DEFAULT_ADVERSARIAL_PATTERNS = [
|
|
9
9
|
'ignore previous instructions',
|
|
10
10
|
'you are now',
|
|
@@ -61,7 +61,7 @@ module Legion
|
|
|
61
61
|
private
|
|
62
62
|
|
|
63
63
|
def resolve_threshold(override)
|
|
64
|
-
return override unless override.nil?
|
|
64
|
+
return override unless override.nil? # rubocop:disable Legion/Extension/RunnerReturnHash
|
|
65
65
|
|
|
66
66
|
if defined?(Legion::Settings)
|
|
67
67
|
Legion::Settings.dig(:privatecore, :embedding_guard, :threshold) || 0.85
|
|
@@ -71,7 +71,7 @@ module Legion
|
|
|
71
71
|
end
|
|
72
72
|
|
|
73
73
|
def embed(text)
|
|
74
|
-
Legion::LLM.embed(text)
|
|
74
|
+
Legion::LLM.embed(text) # rubocop:disable Legion/HelperMigration/DirectLlm
|
|
75
75
|
rescue StandardError => e
|
|
76
76
|
log.debug "[privatecore] embed error: #{e.message}"
|
|
77
77
|
nil
|