top_secret 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +10 -0
- data/lib/top_secret/text.rb +32 -6
- data/lib/top_secret/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c6f07bbe9856d60d42dc7b7e334c734b6613db3601461d70821ac1933d094961
|
|
4
|
+
data.tar.gz: 630437ccebb58c67b2e6b85adf3f32043aed2bd87c5a438c0152ceff6187fb27
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f973d27a7df49c7e30b3d715ebb1bd0cc6a9e42803bbf023bb24ac91f74814a418f088aa03ea32e4159b0e3650cbc4d62a705c67c494490046b78f8cec85a155
|
|
7
|
+
data.tar.gz: 5aedf48c31dd3132ef8a7151f26dbfe09e6ef4e005839f5b0e0e09ffdd9fd21956cd814b328381bc36da252552f5736835d1ee6f7dfe831795d9bbf7016975eb
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
+
## [0.4.0] - 2025-10-31
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- Added automatic caching of MITIE NER model to improve performance by avoiding expensive reinitialization
|
|
8
|
+
- Added `TopSecret::Text.clear_model_cache!` method to clear the cached model when needed
|
|
9
|
+
|
|
3
10
|
## [0.3.0] - 2025-09-19
|
|
4
11
|
|
|
5
12
|
### Added
|
data/README.md
CHANGED
|
@@ -568,6 +568,16 @@ TopSecret.configure do |config|
|
|
|
568
568
|
end
|
|
569
569
|
```
|
|
570
570
|
|
|
571
|
+
### Model caching
|
|
572
|
+
|
|
573
|
+
The MITIE NER model is automatically cached after the first initialization to avoid expensive reloading. All `TopSecret::Text` instances share the same cached model, significantly improving performance.
|
|
574
|
+
|
|
575
|
+
If you need to clear the cache (e.g., after changing the model path), use:
|
|
576
|
+
|
|
577
|
+
```ruby
|
|
578
|
+
TopSecret::Text.clear_model_cache!
|
|
579
|
+
```
|
|
580
|
+
|
|
571
581
|
### Disabling NER filtering
|
|
572
582
|
|
|
573
583
|
For improved performance or when the MITIE model file cannot be deployed, you can disable NER-based filtering entirely. This will disable people and location detection but retain all regex-based filters (credit cards, emails, phone numbers, SSNs):
|
data/lib/top_secret/text.rb
CHANGED
|
@@ -10,6 +10,36 @@ require_relative "text/global_mapping"
|
|
|
10
10
|
module TopSecret
|
|
11
11
|
# Processes text to identify and redact sensitive information using configured filters.
|
|
12
12
|
class Text
|
|
13
|
+
@mutex = Mutex.new
|
|
14
|
+
|
|
15
|
+
class << self
|
|
16
|
+
# Returns a cached MITIE model instance to avoid expensive reinitialization
|
|
17
|
+
#
|
|
18
|
+
# @return [Mitie::NER, NullModel] The cached model instance
|
|
19
|
+
def shared_model
|
|
20
|
+
return @shared_model if @shared_model
|
|
21
|
+
|
|
22
|
+
@mutex.synchronize do
|
|
23
|
+
return @shared_model if @shared_model
|
|
24
|
+
|
|
25
|
+
@shared_model = if TopSecret.model_path
|
|
26
|
+
Mitie::NER.new(TopSecret.model_path)
|
|
27
|
+
else
|
|
28
|
+
NullModel.new
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Clears the cached model, forcing reinitialization on next access
|
|
34
|
+
#
|
|
35
|
+
# @return [void]
|
|
36
|
+
def clear_model_cache!
|
|
37
|
+
@mutex.synchronize do
|
|
38
|
+
@shared_model = nil
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
13
43
|
# @param input [String] The original text to be filtered
|
|
14
44
|
# @param filters [Hash, nil] Optional set of filters to override the defaults
|
|
15
45
|
# @param custom_filters [Array] Additional custom filters to apply
|
|
@@ -224,15 +254,11 @@ module TopSecret
|
|
|
224
254
|
end
|
|
225
255
|
|
|
226
256
|
# Creates the default model based on configuration.
|
|
227
|
-
# Returns
|
|
257
|
+
# Returns the cached shared model to avoid expensive reinitialization.
|
|
228
258
|
#
|
|
229
259
|
# @return [Mitie::NER, NullModel] The model instance to use for NER processing
|
|
230
260
|
def default_model
|
|
231
|
-
|
|
232
|
-
Mitie::NER.new(TopSecret.model_path)
|
|
233
|
-
else
|
|
234
|
-
NullModel.new
|
|
235
|
-
end
|
|
261
|
+
Text.shared_model
|
|
236
262
|
end
|
|
237
263
|
end
|
|
238
264
|
end
|
data/lib/top_secret/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: top_secret
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Steve Polito
|
|
@@ -94,7 +94,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
94
94
|
- !ruby/object:Gem::Version
|
|
95
95
|
version: '0'
|
|
96
96
|
requirements: []
|
|
97
|
-
rubygems_version: 3.
|
|
97
|
+
rubygems_version: 3.7.2
|
|
98
98
|
specification_version: 4
|
|
99
99
|
summary: Filter sensitive information from free text.
|
|
100
100
|
test_files: []
|