red-candle 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/candle/ner.rb +11 -0
- data/lib/candle/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 15a070c7424707802e4e82d00ef5532691d76d7627d28ab0a4b5f0ac7522471f
|
4
|
+
data.tar.gz: 112d038f09eda1a6b1751935057bfefbc2355d7b4962c85461de192e3c667980
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf43744df320c1d69773dad4713bb41e6fa9bd0f75359d3651aa53e429ac48615705682a61e793487d4a09a1f0d5a4aa28df18b3375de63916d9d1b91b2c98b2
|
7
|
+
data.tar.gz: 81fbfe62ba6135b22b34cfeb8ea99b39b12b3dbb2e4c56ac76a758903554ff9f1911132f0be86e8ca130077d6e44997cfcf61a96d48452c8e13995acf76c7e88
|
data/README.md
CHANGED
@@ -699,7 +699,7 @@ Perfect for specialized domains:
|
|
699
699
|
```ruby
|
700
700
|
# Biomedical entities
|
701
701
|
gene_patterns = [
|
702
|
-
/\b[A-Z][A-Z0-9]{2,}\b/,
|
702
|
+
/\b[A-Z][A-Z0-9]{2,10}\b/, # TP53, BRCA1, EGFR (bounded for safety)
|
703
703
|
/\bCD\d+\b/, # CD4, CD8, CD34
|
704
704
|
/\b[A-Z]+\d[A-Z]\d*\b/ # RAD51C, PALB2
|
705
705
|
]
|
data/lib/candle/ner.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
# Pattern validation available but not forced
|
4
|
+
# require_relative 'pattern_validator' # Uncomment if needed
|
5
|
+
|
3
6
|
module Candle
|
4
7
|
# Named Entity Recognition (NER) for token classification
|
5
8
|
#
|
@@ -189,6 +192,14 @@ module Candle
|
|
189
192
|
def recognize(text, tokenizer = nil)
|
190
193
|
entities = []
|
191
194
|
|
195
|
+
# Limit text length to prevent ReDoS on very long strings
|
196
|
+
# This is especially important for Ruby < 3.2
|
197
|
+
max_length = 1_000_000 # 1MB of text
|
198
|
+
if text.length > max_length
|
199
|
+
warn "PatternEntityRecognizer: Text truncated from #{text.length} to #{max_length} chars for safety"
|
200
|
+
text = text[0...max_length]
|
201
|
+
end
|
202
|
+
|
192
203
|
@patterns.each do |pattern|
|
193
204
|
regex = pattern.is_a?(Regexp) ? pattern : Regexp.new(pattern)
|
194
205
|
|
data/lib/candle/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-candle
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christopher Petersen
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2025-08-
|
12
|
+
date: 2025-08-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rb_sys
|