rbbt-text 1.3.2 → 1.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/ner/token_trieNER.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d25c6d473e1ee0a8ba79af357571181539b6e18e6b8d11e85fcca037069be3bf
|
4
|
+
data.tar.gz: dbc3621f7fbc0ab5569b9f98a527c20cbc4192c6db211504a904364452518caf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7a6568d91518fa0c4aedd748fe2b7c2db745a2997efb03c00993ebc24f6682422d209aa266912bcaca32c2033b6babbf9b14db2bf39973b4a33a69fa9ed07eca
|
7
|
+
data.tar.gz: 2c87eeccbb22e90c87611024429918e4a3fbdcf8212d6b6538e063d6e0116a457a6c855bafad4ac4621bfb7ae91ff18d2cea0cbcf56dfdff79c2ec88666cbf18
|
@@ -42,7 +42,7 @@ class TokenTrieNER < NER
|
|
42
42
|
end
|
43
43
|
|
44
44
|
def self.tokenize(text, extend_to_token = true, split_at = nil, no_clean = false, stem = false, start = 0)
|
45
|
-
split_at = /\s|(\(|\)|[-."'
|
45
|
+
split_at = /\s|(\(|\)|[-."':,;])/ if split_at.nil?
|
46
46
|
|
47
47
|
tokens = []
|
48
48
|
while matchdata = text.match(split_at)
|
@@ -308,7 +308,7 @@ class TokenTrieNER < NER
|
|
308
308
|
|
309
309
|
tokens.extend EnumeratedArray
|
310
310
|
tokens.pos = 0
|
311
|
-
|
311
|
+
|
312
312
|
matches = []
|
313
313
|
while tokens.left?
|
314
314
|
new_matches = TokenTrieNER.find(@index, tokens, longest_match, slack)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|