rbbt-text 1.2.0 → 1.3.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/bow/bow.rb +5 -2
- data/lib/rbbt/bow/dictionary.rb +27 -23
- data/lib/rbbt/document.rb +55 -0
- data/lib/rbbt/document/annotation.rb +45 -0
- data/lib/rbbt/document/corpus.rb +63 -0
- data/lib/rbbt/document/corpus/pubmed.rb +33 -0
- data/lib/rbbt/ner/NER.rb +3 -3
- data/lib/rbbt/ner/abner.rb +1 -1
- data/lib/rbbt/ner/banner.rb +1 -1
- data/lib/rbbt/ner/brat.rb +1 -1
- data/lib/rbbt/ner/chemical_tagger.rb +1 -2
- data/lib/rbbt/ner/g_norm_plus.rb +26 -3
- data/lib/rbbt/ner/linnaeus.rb +3 -3
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +3 -3
- data/lib/rbbt/ner/oscar3.rb +1 -2
- data/lib/rbbt/ner/oscar4.rb +3 -3
- data/lib/rbbt/ner/patterns.rb +5 -5
- data/lib/rbbt/ner/regexpNER.rb +1 -2
- data/lib/rbbt/ner/token_trieNER.rb +35 -22
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +3 -2
- data/lib/rbbt/nlp/nlp.rb +5 -5
- data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +37 -36
- data/lib/rbbt/nlp/spaCy.rb +52 -0
- data/lib/rbbt/segment.rb +179 -0
- data/lib/rbbt/segment/annotation.rb +58 -0
- data/lib/rbbt/segment/encoding.rb +18 -0
- data/lib/rbbt/{text/segment → segment}/named_entity.rb +14 -11
- data/lib/rbbt/segment/overlaps.rb +63 -0
- data/lib/rbbt/segment/range_index.rb +35 -0
- data/lib/rbbt/segment/relationship.rb +7 -0
- data/lib/rbbt/{text/segment → segment}/segmented.rb +1 -1
- data/lib/rbbt/segment/token.rb +23 -0
- data/lib/rbbt/{text/segment → segment}/transformed.rb +12 -10
- data/lib/rbbt/segment/tsv.rb +41 -0
- data/share/install/software/Linnaeus +1 -1
- data/share/install/software/OpenNLP +1 -1
- data/test/rbbt/document/corpus/test_pubmed.rb +15 -0
- data/test/rbbt/document/test_annotation.rb +140 -0
- data/test/rbbt/document/test_corpus.rb +33 -0
- data/test/rbbt/ner/test_finder.rb +3 -3
- data/test/rbbt/ner/test_g_norm_plus.rb +20 -2
- data/test/rbbt/ner/test_patterns.rb +9 -9
- data/test/rbbt/ner/test_regexpNER.rb +14 -14
- data/test/rbbt/ner/test_rnorm.rb +3 -4
- data/test/rbbt/ner/test_token_trieNER.rb +1 -0
- data/test/rbbt/nlp/genia/test_sentence_splitter.rb +37 -3
- data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +20 -4
- data/test/rbbt/segment/test_annotation.rb +39 -0
- data/test/rbbt/segment/test_corpus.rb +36 -0
- data/test/rbbt/segment/test_encoding.rb +24 -0
- data/test/rbbt/{text/segment → segment}/test_named_entity.rb +15 -11
- data/test/rbbt/segment/test_overlaps.rb +69 -0
- data/test/rbbt/segment/test_range_index.rb +42 -0
- data/test/rbbt/{text/segment → segment}/test_transformed.rb +105 -51
- data/test/rbbt/test_document.rb +14 -0
- data/test/rbbt/test_segment.rb +182 -0
- data/test/test_helper.rb +5 -3
- data/test/test_spaCy.rb +32 -0
- metadata +44 -32
- data/lib/rbbt/text/corpus.rb +0 -106
- data/lib/rbbt/text/corpus/document.rb +0 -383
- data/lib/rbbt/text/corpus/document_repo.rb +0 -68
- data/lib/rbbt/text/corpus/sources/pmid.rb +0 -34
- data/lib/rbbt/text/document.rb +0 -39
- data/lib/rbbt/text/segment.rb +0 -363
- data/lib/rbbt/text/segment/docid.rb +0 -46
- data/lib/rbbt/text/segment/relationship.rb +0 -24
- data/lib/rbbt/text/segment/token.rb +0 -49
- data/test/rbbt/text/corpus/sources/test_pmid.rb +0 -33
- data/test/rbbt/text/corpus/test_document.rb +0 -82
- data/test/rbbt/text/segment/test_relationship.rb +0 -0
- data/test/rbbt/text/segment/test_segmented.rb +0 -23
- data/test/rbbt/text/test_corpus.rb +0 -34
- data/test/rbbt/text/test_document.rb +0 -58
- data/test/rbbt/text/test_segment.rb +0 -100
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 496288d7d3ff1215ded1fd210192d5887a6a071eea5f322295a669a5d648d77b
|
4
|
+
data.tar.gz: 47996496009cbcdaab38a9dc9bf6efbbe7fc0145f315b0a48bfab0f543742f94
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 36e7415ad06207066844a30001c8541865f066d1e83a4a2ddc5182c54b704cd3d442cbccce219bd2114717a83656d07558c42725eca75597fea239b6e13244ab
|
7
|
+
data.tar.gz: 988eff4d242d0425910b96fac4188df079c8c53c3abea2825cc97d5af5118841680705fa33461a5b4cfa7b8d6b32a486465e44b75f20fad324e4623c6c8083d8
|
data/lib/rbbt/bow/bow.rb
CHANGED
@@ -69,6 +69,11 @@ module BagOfWords
|
|
69
69
|
count = bigrams ? count(bigrams(text)) : count(words(text))
|
70
70
|
count.values_at(*terms)
|
71
71
|
end
|
72
|
+
|
73
|
+
def self.weighted_features(text, weights)
|
74
|
+
features = features(text, weights.keys)
|
75
|
+
features.zip(weights.values).collect{|f,w| f * w }
|
76
|
+
end
|
72
77
|
end
|
73
78
|
|
74
79
|
class String
|
@@ -82,5 +87,3 @@ class String
|
|
82
87
|
BagOfWords.bigrams(self)
|
83
88
|
end
|
84
89
|
end
|
85
|
-
|
86
|
-
|
data/lib/rbbt/bow/dictionary.rb
CHANGED
@@ -74,28 +74,32 @@ class Dictionary::TF_IDF
|
|
74
74
|
end
|
75
75
|
|
76
76
|
def best(options = {})
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
77
|
+
key = Misc.obj2digest(options)
|
78
|
+
@best ||= {}
|
79
|
+
@best[key] ||= begin
|
80
|
+
high, low, limit = {
|
81
|
+
:low => 0,
|
82
|
+
:high => 1,
|
83
|
+
}.merge(options).
|
84
|
+
values_at(:high, :low, :limit)
|
85
|
+
|
86
|
+
num_docs = @num_docs.to_f
|
87
|
+
best = df.select{|term, value|
|
88
|
+
value >= low && value <= high
|
89
|
+
}.collect{|p|
|
90
|
+
term = p.first
|
91
|
+
df_value = p.last
|
92
|
+
[term,
|
93
|
+
@terms[term].to_f / num_docs * Math::log(1.0/df_value)
|
94
|
+
]
|
95
|
+
}
|
96
|
+
|
97
|
+
if limit
|
98
|
+
Hash[*best.sort{|a,b| b[1] <=> a[1]}.slice(0, limit-1).flatten]
|
99
|
+
else
|
100
|
+
Hash[*best.flatten]
|
101
|
+
end
|
102
|
+
end
|
99
103
|
end
|
100
104
|
|
101
105
|
def weights(options = {})
|
@@ -173,7 +177,7 @@ class Dictionary::KL
|
|
173
177
|
best[term] = pos * Math::log(pos / neg) + neg * Math::log(neg / pos)
|
174
178
|
}
|
175
179
|
if limit
|
176
|
-
Hash[*best.sort{|a,b| b[1] <=> a[1]}.slice(0, limit).flatten]
|
180
|
+
Hash[*best.sort{|a,b| b[1] <=> a[1]}.slice(0, limit-1).flatten]
|
177
181
|
else
|
178
182
|
best
|
179
183
|
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/entity'
|
3
|
+
|
4
|
+
module DocID
|
5
|
+
extend Entity
|
6
|
+
self.annotation :corpus
|
7
|
+
|
8
|
+
class << self
|
9
|
+
attr_accessor :default_corpus
|
10
|
+
end
|
11
|
+
|
12
|
+
def corpus
|
13
|
+
annotation_values[:corpus] || DocID.default_corpus
|
14
|
+
end
|
15
|
+
|
16
|
+
property :to do |type|
|
17
|
+
namespace, code = self.split(":")
|
18
|
+
DocID.setup([namespace, code, "title"] * ":", :corpus => corpus)
|
19
|
+
end
|
20
|
+
|
21
|
+
property :document => :both do
|
22
|
+
if Array === self
|
23
|
+
namespace, id, type = nil, nil, nil
|
24
|
+
docs = self.collect do |docid|
|
25
|
+
text = self.corpus[docid]
|
26
|
+
namespace, id, type = docid.split(":")
|
27
|
+
text
|
28
|
+
end
|
29
|
+
Document.setup(docs, :corpus => corpus)
|
30
|
+
else
|
31
|
+
text = self.corpus[self]
|
32
|
+
namespace, id, type = self.split(":")
|
33
|
+
Document.setup(text, :namespace => namespace, :code => id, :type => type, :corpus => corpus)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
module Document
|
39
|
+
extend Entity
|
40
|
+
self.annotation :namespace, :code, :type, :corpus
|
41
|
+
|
42
|
+
property :docid do |corpus=nil|
|
43
|
+
digest = Misc.digest(self)
|
44
|
+
corpus = self.corpus if corpus.nil?
|
45
|
+
|
46
|
+
DocID.setup([namespace, code, type, digest] * ":", :corpus => corpus)
|
47
|
+
end
|
48
|
+
|
49
|
+
property :to do |type|
|
50
|
+
docid.to(type).document
|
51
|
+
end
|
52
|
+
|
53
|
+
alias id docid
|
54
|
+
end
|
55
|
+
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'rbbt/segment'
|
2
|
+
require 'rbbt/segment/annotation'
|
3
|
+
|
4
|
+
module Document
|
5
|
+
def self.define(type, &block)
|
6
|
+
send :property, type do
|
7
|
+
segments = self.instance_exec &block
|
8
|
+
|
9
|
+
Segment.align(self, segments) unless segments.empty? || (Segment === segments && segments.offset) || (Segment === segments.first && segments.first.offset)
|
10
|
+
|
11
|
+
segments.each do |segment|
|
12
|
+
SegmentAnnotation.setup(segment, :type => type.to_s) unless SegmentAnnotation === segment && segment.type
|
13
|
+
end
|
14
|
+
|
15
|
+
docid = self.docid
|
16
|
+
segments.each{|s| s.docid = docid if s.docid.nil? }
|
17
|
+
|
18
|
+
segments
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.define_multiple(type, &block)
|
23
|
+
send :property, type => :multiple do |list|
|
24
|
+
doc_segments = self.instance_exec list, &block
|
25
|
+
|
26
|
+
doc_segments = doc_segments.chunked_values_at(list) if Hash === doc_segments
|
27
|
+
|
28
|
+
doc_segments.each_with_index do |segments,i|
|
29
|
+
next if segments.nil?
|
30
|
+
document = list[i]
|
31
|
+
Segment.align(document, segments) unless segments.nil? || segments.empty? || (Segment === segments && segments.offset) || (Segment === segments.first && segments.first.offset)
|
32
|
+
|
33
|
+
segments.each do |segment|
|
34
|
+
SegmentAnnotation.setup(segment, :type => type.to_s) unless SegmentAnnotation === segment && segment.type
|
35
|
+
end
|
36
|
+
|
37
|
+
docid = document.docid
|
38
|
+
|
39
|
+
segments.each{|s| s.docid = docid if s.docid.nil? }
|
40
|
+
|
41
|
+
segments
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
|
3
|
+
module Document::Corpus
|
4
|
+
|
5
|
+
def self.setup(corpus)
|
6
|
+
corpus.extend Document::Corpus unless Document::Corpus === corpus
|
7
|
+
corpus.extend Persist::TSVAdapter unless Persist::TSVAdapter === corpus
|
8
|
+
corpus
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_document(document)
|
12
|
+
docid = document.docid
|
13
|
+
return self[docid] if self.include?(docid)
|
14
|
+
self.write_and_close do
|
15
|
+
self[docid] = document
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def docids(prefix)
|
20
|
+
prefix += ":" unless prefix == :all || prefix[-1] == ":"
|
21
|
+
docids = self.read_and_close do
|
22
|
+
prefix == :all ? self.keys : self.prefix(prefix)
|
23
|
+
end
|
24
|
+
DocID.setup(docids, :corpus => self)
|
25
|
+
end
|
26
|
+
|
27
|
+
def documents(prefix)
|
28
|
+
self.docids(prefix).document
|
29
|
+
end
|
30
|
+
|
31
|
+
def [](*args)
|
32
|
+
docid, *rest = args
|
33
|
+
|
34
|
+
res = self.read_and_close do
|
35
|
+
super(*args)
|
36
|
+
end
|
37
|
+
|
38
|
+
res.force_encoding(Encoding.default_external) if res
|
39
|
+
return res if args.length > 1
|
40
|
+
|
41
|
+
namespace, id, type = docid.split(":")
|
42
|
+
|
43
|
+
if res.nil?
|
44
|
+
if Document::Corpus.claims.include?(namespace.to_s)
|
45
|
+
res = self.instance_exec(id, type, &Document::Corpus.claims[namespace.to_s])
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
res.force_encoding(Encoding.default_external) if res
|
50
|
+
Document.setup(res, namespace, id, type, self) unless res.nil?
|
51
|
+
|
52
|
+
res
|
53
|
+
end
|
54
|
+
|
55
|
+
class << self
|
56
|
+
attr_accessor :claims
|
57
|
+
def claim(namespace, &block)
|
58
|
+
@claims = {}
|
59
|
+
@claims[namespace.to_s] = block
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'rbbt/sources/pubmed'
|
2
|
+
|
3
|
+
module Document::Corpus
|
4
|
+
def add_pmid(pmid, type = nil)
|
5
|
+
pmids = Array === pmid ? pmid : [pmid]
|
6
|
+
type = nil if String === type and type.empty?
|
7
|
+
|
8
|
+
res = PubMed.get_article(pmids).collect do |pmid, article|
|
9
|
+
document = if type.nil? || type.to_sym == :abstract
|
10
|
+
Document.setup(article.abstract || "", "PMID", pmid, :abstract, self, :corpus => self)
|
11
|
+
elsif type.to_sym == :title
|
12
|
+
Document.setup(article.title, :PMID, pmid, :title, self)
|
13
|
+
else
|
14
|
+
raise "No FullText available for #{ pmid }" if article.full_text.nil?
|
15
|
+
Document.setup(article.full_text, :PMID, pmid, :fulltext, self, :corpus => self)
|
16
|
+
end
|
17
|
+
Log.debug "Loading pmid #{pmid}"
|
18
|
+
add_document(document)
|
19
|
+
end
|
20
|
+
|
21
|
+
Document.setup(res)
|
22
|
+
end
|
23
|
+
|
24
|
+
def add_pubmed_query(query, max = 3000, type = nil)
|
25
|
+
pmids = PubMed.query(query, max)
|
26
|
+
add_pmid(pmids, type)
|
27
|
+
end
|
28
|
+
|
29
|
+
self.claim "PMID" do |id, type|
|
30
|
+
Log.debug "Claiming #{id}"
|
31
|
+
self.add_pmid(id, type).first
|
32
|
+
end
|
33
|
+
end
|
data/lib/rbbt/ner/NER.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
require 'rbbt/
|
2
|
-
require 'rbbt/
|
3
|
-
require 'rbbt/
|
1
|
+
require 'rbbt/segment'
|
2
|
+
require 'rbbt/segment/named_entity'
|
3
|
+
require 'rbbt/segment/segmented'
|
4
4
|
|
5
5
|
class NER
|
6
6
|
def entities(text, protect = false, *args)
|
data/lib/rbbt/ner/abner.rb
CHANGED
data/lib/rbbt/ner/banner.rb
CHANGED
data/lib/rbbt/ner/brat.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'rbbt'
|
2
2
|
require 'rjb'
|
3
|
-
require 'rbbt/text/segment'
|
4
3
|
require 'rbbt/ner/NER'
|
5
4
|
require 'rbbt/util/log'
|
6
5
|
|
@@ -8,7 +7,7 @@ class ChemicalTagger < NER
|
|
8
7
|
Rbbt.claim Rbbt.software.opt.ChemicalTagger, :install, Rbbt.share.install.software.ChemicalTagger.find
|
9
8
|
|
10
9
|
def self.init
|
11
|
-
ENV["CLASSPATH"] = ENV["CLASSPATH"].split(":").reverse * ":"
|
10
|
+
ENV["CLASSPATH"] = [ENV["CLASSPATH"].split(":"), Rbbt.software.opt.ChemicalTagger.produce.glob("*.jar").first].reverse * ":"
|
12
11
|
Rjb::load(nil, jvmargs = ['-Xms1G','-Xmx2G']) unless Rjb.loaded?
|
13
12
|
@@RbbtChemicalTagger ||= Rjb::import('RbbtChemicalTagger')
|
14
13
|
end
|
data/lib/rbbt/ner/g_norm_plus.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require 'rbbt-util'
|
2
|
+
require 'rbbt/segment'
|
3
|
+
require 'rbbt/segment/named_entity'
|
2
4
|
module GNormPlus
|
3
5
|
|
4
6
|
Rbbt.claim Rbbt.software.opt.GNormPlus, :install do
|
@@ -35,8 +37,8 @@ module GNormPlus
|
|
35
37
|
HomologeneID = False
|
36
38
|
Normalization2Protein = False
|
37
39
|
ShowUnNormalizedMention = False
|
40
|
+
IgnoreNER = False
|
38
41
|
DeleteTmp = True
|
39
|
-
IgnoreNER = True
|
40
42
|
EOF
|
41
43
|
|
42
44
|
def self.process(texts)
|
@@ -53,11 +55,16 @@ EOF
|
|
53
55
|
Open.mkdir 'tmp'
|
54
56
|
|
55
57
|
texts.each do |name,text|
|
58
|
+
text = Misc.fixutf8(text)
|
59
|
+
|
60
|
+
text = text.gsub('|', '#').gsub("\n", " ").gsub(/\t/,' ')
|
61
|
+
|
56
62
|
Open.write("input/#{name}.txt") do |f|
|
57
|
-
f.puts "#{name}|a|" << text
|
63
|
+
f.puts "#{name}|a|" << text
|
58
64
|
f.puts
|
59
65
|
end
|
60
66
|
end
|
67
|
+
|
61
68
|
Open.write('config', CONFIG)
|
62
69
|
CMD.cmd_log("java -Xmx20G -Xms20G -jar '#{Rbbt.software.opt.GNormPlus.produce.find}/GNormPlus.jar' 'input' 'output' 'config'")
|
63
70
|
|
@@ -69,7 +76,7 @@ EOF
|
|
69
76
|
tsv = TSV.setup({}, :key_field => key_field, :fields => ["Entities"], :type => :flat)
|
70
77
|
Dir.glob("output/*.txt").each do |file|
|
71
78
|
name = File.basename(file).sub(".txt",'')
|
72
|
-
entities = Open.read(file).split("\n")[1..-1].collect{|l| l.gsub(':', '
|
79
|
+
entities = Open.read(file).split("\n")[1..-1].collect{|l| l.gsub(':', '·').split("\t")[1..-1] * ":"}
|
73
80
|
tsv[name] = entities
|
74
81
|
end
|
75
82
|
|
@@ -79,6 +86,22 @@ EOF
|
|
79
86
|
end
|
80
87
|
end
|
81
88
|
end
|
89
|
+
|
90
|
+
def self.entities(texts)
|
91
|
+
res = {}
|
92
|
+
process(texts).each do |name, entities|
|
93
|
+
|
94
|
+
segments = entities.collect do |entity|
|
95
|
+
start, eend, literal, type, code = entity.split(":")
|
96
|
+
literal.gsub!('·',':')
|
97
|
+
|
98
|
+
NamedEntity.setup(literal, :offset => start.to_i, :entity_type => type, :code => code)
|
99
|
+
end
|
100
|
+
|
101
|
+
res[name] = segments
|
102
|
+
end
|
103
|
+
res
|
104
|
+
end
|
82
105
|
end
|
83
106
|
|
84
107
|
if __FILE__ == $0
|
data/lib/rbbt/ner/linnaeus.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
require 'rjb'
|
2
2
|
require 'rbbt'
|
3
|
-
require 'rbbt/
|
3
|
+
require 'rbbt/segment/named_entity'
|
4
4
|
|
5
5
|
module Linnaeus
|
6
6
|
|
7
7
|
Rbbt.claim Rbbt.software.opt.Linnaeus, :install, Rbbt.share.install.software.Linnaeus.find
|
8
8
|
|
9
|
-
ARGS = ["--properties", Rbbt.software.opt.Linnaeus["species-proxy/properties.conf"].find]
|
9
|
+
ARGS = ["--properties", Rbbt.software.opt.Linnaeus.produce["species-proxy/properties.conf"].find]
|
10
10
|
|
11
11
|
|
12
12
|
Rjb::load(nil, jvmargs = ['-Xms2G','-Xmx2G']) unless Rjb.loaded?
|
@@ -31,7 +31,7 @@ module Linnaeus
|
|
31
31
|
init unless defined? @@Matcher
|
32
32
|
|
33
33
|
@@Matcher.match(text).toArray().collect do |mention|
|
34
|
-
NamedEntity.setup(mention.text(), mention.start(), "Organism", mention.ids(), mention.probabilities())
|
34
|
+
NamedEntity.setup(mention.text(), :offset => mention.start(), :entity_type => "Organism", :code => mention.ids(), :score => mention.probabilities())
|
35
35
|
end
|
36
36
|
end
|
37
37
|
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'rbbt'
|
2
2
|
require 'rbbt/util/misc'
|
3
3
|
require 'rbbt/tsv'
|
4
|
-
require 'rbbt/
|
5
|
-
require 'rbbt/
|
4
|
+
require 'rbbt/segment'
|
5
|
+
require 'rbbt/segment/token'
|
6
6
|
require 'rbbt/ner/NER'
|
7
7
|
require 'inline'
|
8
8
|
|
@@ -150,7 +150,7 @@ VALUE fast_start_with(VALUE str, VALUE cmp, int offset)
|
|
150
150
|
|
151
151
|
def match(text)
|
152
152
|
matches = NGramPrefixDictionary.match(index, (case_insensitive ? text.downcase : text)).collect{|name, code, offset|
|
153
|
-
NamedEntity.setup(name, offset, type, code)
|
153
|
+
NamedEntity.setup(name, :offset => offset, :entity_type => type, :code => code)
|
154
154
|
}
|
155
155
|
|
156
156
|
if case_insensitive
|