rbbt-text 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt/bow/bow.rb +87 -0
- data/lib/rbbt/bow/dictionary.rb +187 -0
- data/lib/rbbt/bow/misc.rb +7 -0
- data/lib/rbbt/ner/regexpNER.rb +61 -0
- data/test/rbbt/bow/test_bow.rb +30 -0
- data/test/rbbt/bow/test_dictionary.rb +91 -0
- data/test/rbbt/bow/test_misc.rb +9 -0
- data/test/rbbt/ner/test_regexpNER.rb +32 -0
- data/test/test_helper.rb +4 -0
- metadata +92 -0
data/lib/rbbt/bow/bow.rb
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'rbbt'
|
2
|
+
require 'rbbt/bow/misc'
|
3
|
+
require 'stemmer'
|
4
|
+
|
5
|
+
# This module provides methods to extract a bag of words (or bag of bigrams)
|
6
|
+
# representation for strings of text, and to produce a vector representations
|
7
|
+
# of that bag of words for a given list of terms. This BOW representations of
|
8
|
+
# the texts is usually first used to build a Dictionary, and then, with the
|
9
|
+
# best selection of terms as determined by the Dictionary::TF_IDF.best of
|
10
|
+
# Dictionary::KL.best methods, determine the vector representations for that
|
11
|
+
# text.
|
12
|
+
module BagOfWords
|
13
|
+
# Divide the input string into an array of words (sequences of \w characters).
|
14
|
+
# Words are stemmed and filtered to remove stopwords and words with less than
|
15
|
+
# 2 characters. The list of stopwords is a global variable defined in
|
16
|
+
# 'rbbt/util/misc'.
|
17
|
+
def self.words(text)
|
18
|
+
return [] if text.nil?
|
19
|
+
raise "Stopword list not loaded. Have you installed the wordlists? (rbbt_config prepare wordlists)" if $stopwords.nil?
|
20
|
+
text.scan(/\w+/).
|
21
|
+
collect{|word| word.downcase.stem}.
|
22
|
+
select{|word|
|
23
|
+
! $stopwords.include?(word) &&
|
24
|
+
word.length > 2 &&
|
25
|
+
word =~ /[a-z]/
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
# Take the array of words for the text and form all the bigrams
|
30
|
+
def self.bigrams(text)
|
31
|
+
words = words(text)
|
32
|
+
bigrams = []
|
33
|
+
lastword = nil
|
34
|
+
|
35
|
+
words.each{|word|
|
36
|
+
if lastword
|
37
|
+
bigrams << "#{lastword} #{word}"
|
38
|
+
end
|
39
|
+
lastword = word
|
40
|
+
}
|
41
|
+
|
42
|
+
words + bigrams
|
43
|
+
end
|
44
|
+
|
45
|
+
# Given an array of terms return a hash with the number of appearances of
|
46
|
+
# each term
|
47
|
+
def self.count(terms)
|
48
|
+
count = Hash.new(0)
|
49
|
+
terms.each{|word| count[word] += 1}
|
50
|
+
count
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# Given a string of text find all the words (or bigrams) and return a hash
|
55
|
+
# with their counts
|
56
|
+
def self.terms(text, bigrams = true)
|
57
|
+
|
58
|
+
if bigrams
|
59
|
+
count(bigrams(text))
|
60
|
+
else
|
61
|
+
count(words(text))
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Given a string of text and a list of terms, which may or may not contain
|
66
|
+
# bigrams, return an array with one entry per term which holds the number of
|
67
|
+
# occurrences of each term in the text.
|
68
|
+
def self.features(text, terms, bigrams = nil)
|
69
|
+
bigrams ||= terms.select{|term| term =~ / /}.any?
|
70
|
+
count = bigrams ? count(bigrams(text)) : count(words(text))
|
71
|
+
count.values_at(*terms)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class String
|
76
|
+
# Shortcut for BagOfWords.words(self)
|
77
|
+
def words
|
78
|
+
BagOfWords.words(self)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Shortcut for BagOfWords.bigrams(self)
|
82
|
+
def bigrams
|
83
|
+
BagOfWords.bigrams(self)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
|
@@ -0,0 +1,187 @@
|
|
1
|
+
class Dictionary
|
2
|
+
attr_reader :terms
|
3
|
+
def initialize
|
4
|
+
@terms = Hash.new(0)
|
5
|
+
end
|
6
|
+
|
7
|
+
def add(terms, &block)
|
8
|
+
terms.each{|term, count|
|
9
|
+
@terms[term] += count
|
10
|
+
}
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class Dictionary::TF_IDF
|
15
|
+
attr_reader :terms, :docs, :total_terms, :num_docs
|
16
|
+
|
17
|
+
def initialize(options = {})
|
18
|
+
@term_limit = {
|
19
|
+
:limit => 500_000,
|
20
|
+
}.merge(options)[:limit]
|
21
|
+
|
22
|
+
@terms = Hash.new(0)
|
23
|
+
@docs = Hash.new(0)
|
24
|
+
@num_docs = 0
|
25
|
+
@total_terms = 0
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
def add(terms)
|
30
|
+
if @term_limit && @terms.length > @term_limit
|
31
|
+
terms = terms.delete_if{|term, count| !@terms.include? term }
|
32
|
+
end
|
33
|
+
|
34
|
+
terms.each{|term, count|
|
35
|
+
@terms[term] += count
|
36
|
+
@total_terms += count
|
37
|
+
@docs[term] += 1
|
38
|
+
}
|
39
|
+
@num_docs += 1
|
40
|
+
end
|
41
|
+
|
42
|
+
def df
|
43
|
+
df = Hash.new(0)
|
44
|
+
@docs.each{|term, count|
|
45
|
+
df[term] = count.to_f / @num_docs
|
46
|
+
}
|
47
|
+
df
|
48
|
+
end
|
49
|
+
|
50
|
+
def tf
|
51
|
+
tf = Hash.new(0)
|
52
|
+
@terms.each{|term, count|
|
53
|
+
tf[term] = count.to_f / @total_terms
|
54
|
+
}
|
55
|
+
tf
|
56
|
+
end
|
57
|
+
|
58
|
+
def idf
|
59
|
+
idf = Hash.new(0)
|
60
|
+
num_docs = @num_docs.to_f
|
61
|
+
@docs.each{|term, count|
|
62
|
+
idf[term] = Math::log(num_docs / count)
|
63
|
+
}
|
64
|
+
idf
|
65
|
+
end
|
66
|
+
|
67
|
+
def tf_idf
|
68
|
+
tf_idf = Hash.new(0)
|
69
|
+
num_docs = @num_docs.to_f
|
70
|
+
@docs.each{|term, count|
|
71
|
+
tf_idf[term] = @terms[term].to_f / @total_terms * Math::log(num_docs / count)
|
72
|
+
}
|
73
|
+
tf_idf
|
74
|
+
end
|
75
|
+
|
76
|
+
def best(options = {})
|
77
|
+
hi, low, limit = {
|
78
|
+
:low => 0,
|
79
|
+
:hi => 1,
|
80
|
+
}.merge(options).
|
81
|
+
values_at(:hi, :low, :limit)
|
82
|
+
|
83
|
+
num_docs = @num_docs.to_f
|
84
|
+
best = df.select{|term, value|
|
85
|
+
value >= low && value <= hi
|
86
|
+
}.collect{|p|
|
87
|
+
term = p.first
|
88
|
+
df_value = p.last
|
89
|
+
[term,
|
90
|
+
@terms[term].to_f / num_docs * Math::log(1.0/df_value)
|
91
|
+
]
|
92
|
+
}
|
93
|
+
if limit
|
94
|
+
Hash[*best.sort{|a,b| b[1] <=> a[1]}.slice(0, limit).flatten]
|
95
|
+
else
|
96
|
+
Hash[*best.flatten]
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def weights(options = {})
|
101
|
+
best_terms = best(options).keys
|
102
|
+
weights = {}
|
103
|
+
|
104
|
+
num_docs = @num_docs.to_f
|
105
|
+
best_terms.each{|term|
|
106
|
+
weights[term] = Math::log(num_docs / @docs[term])
|
107
|
+
}
|
108
|
+
weights
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
112
|
+
|
113
|
+
class Dictionary::KL
|
114
|
+
attr_reader :pos_dict, :neg_dict
|
115
|
+
|
116
|
+
def initialize(options = {})
|
117
|
+
@pos_dict = Dictionary::TF_IDF.new(options)
|
118
|
+
@neg_dict = Dictionary::TF_IDF.new(options)
|
119
|
+
end
|
120
|
+
|
121
|
+
def terms
|
122
|
+
(pos_dict.terms.keys + neg_dict.terms.keys).uniq
|
123
|
+
end
|
124
|
+
|
125
|
+
def add(terms, c)
|
126
|
+
dict = (c == :+ || c == '+' ? @pos_dict : @neg_dict)
|
127
|
+
dict.add(terms)
|
128
|
+
end
|
129
|
+
|
130
|
+
def kl
|
131
|
+
kl = {}
|
132
|
+
pos_df = @pos_dict.df
|
133
|
+
neg_df = @neg_dict.df
|
134
|
+
|
135
|
+
terms.each{|term|
|
136
|
+
pos = pos_df[term]
|
137
|
+
neg = neg_df[term]
|
138
|
+
|
139
|
+
pos = 0.000001 if pos == 0
|
140
|
+
pos = 0.999999 if pos == 1
|
141
|
+
neg = 0.000001 if neg == 0
|
142
|
+
neg = 0.999999 if neg == 1
|
143
|
+
|
144
|
+
kl[term] = pos * Math::log(pos / neg) + neg * Math::log(neg / pos)
|
145
|
+
}
|
146
|
+
kl
|
147
|
+
end
|
148
|
+
|
149
|
+
def best(options = {})
|
150
|
+
hi, low, limit = {
|
151
|
+
:low => 0,
|
152
|
+
:hi => 1,
|
153
|
+
}.merge(options).
|
154
|
+
values_at(:hi, :low, :limit)
|
155
|
+
|
156
|
+
pos_df = @pos_dict.df
|
157
|
+
neg_df = @neg_dict.df
|
158
|
+
|
159
|
+
best = {}
|
160
|
+
terms.select{|term|
|
161
|
+
pos_df[term] >= low && pos_df[term] <= hi ||
|
162
|
+
neg_df[term] >= low && neg_df[term] <= hi
|
163
|
+
}.each{|term|
|
164
|
+
pos = pos_df[term]
|
165
|
+
neg = neg_df[term]
|
166
|
+
|
167
|
+
pos = 0.000001 if pos == 0
|
168
|
+
pos = 0.999999 if pos == 1
|
169
|
+
neg = 0.000001 if neg == 0
|
170
|
+
neg = 0.999999 if neg == 1
|
171
|
+
|
172
|
+
best[term] = pos * Math::log(pos / neg) + neg * Math::log(neg / pos)
|
173
|
+
}
|
174
|
+
if limit
|
175
|
+
Hash[*best.sort{|a,b| b[1] <=> a[1]}.slice(0, limit).flatten]
|
176
|
+
else
|
177
|
+
Hash[*best.flatten]
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def weights(options = {})
|
182
|
+
best(options)
|
183
|
+
end
|
184
|
+
|
185
|
+
|
186
|
+
|
187
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
require 'rbbt/bow/misc'
|
3
|
+
|
4
|
+
class RegExpNER
|
5
|
+
|
6
|
+
def self.build_re(names, ignorecase=true)
|
7
|
+
res = names.compact.reject{|n| n.empty?}.
|
8
|
+
sort_by{|a| a.length}.reverse.collect{|n| Regexp.quote(n) }
|
9
|
+
|
10
|
+
/\b(#{ res.join("|").gsub(/\\?\s/,'\s+') })\b/
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(lexicon, options = {})
|
14
|
+
options = Misc.add_defaults options, :flatten => true, :case_insensitive => true, :stopwords => nil
|
15
|
+
|
16
|
+
if $stopwords and (options[:stopwords].nil? || options[:stopwords] == true)
|
17
|
+
options[:stopwords] = $stopwords
|
18
|
+
else
|
19
|
+
options[:stopwords] = []
|
20
|
+
end
|
21
|
+
|
22
|
+
data = TSV.new(lexicon, options)
|
23
|
+
|
24
|
+
@index = {}
|
25
|
+
data.collect{|code, names|
|
26
|
+
next if code.nil? || code == ""
|
27
|
+
if options[:stopwords].any?
|
28
|
+
names = names.select{|n|
|
29
|
+
! options[:stopwords].include?(options[:case_insensitive] ? n.downcase : n)
|
30
|
+
}
|
31
|
+
end
|
32
|
+
@index[code] = RegExpNER.build_re(names, options[:case_insensitive])
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.match_re(text, res)
|
37
|
+
res = [res] unless Array === res
|
38
|
+
|
39
|
+
res.collect{|re|
|
40
|
+
text.scan(re)
|
41
|
+
}.flatten
|
42
|
+
end
|
43
|
+
|
44
|
+
def match_hash(text)
|
45
|
+
return {} if text.nil? or text.empty?
|
46
|
+
matches = {}
|
47
|
+
@index.each{|code, re|
|
48
|
+
RegExpNER.match_re(text, re).each{|match|
|
49
|
+
matches[code] ||= []
|
50
|
+
matches[code] << match
|
51
|
+
}
|
52
|
+
}
|
53
|
+
matches
|
54
|
+
end
|
55
|
+
|
56
|
+
def match(text)
|
57
|
+
match_hash(text)
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
require 'rbbt/bow/bow'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class TestBow < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_words
|
8
|
+
assert_equal(["hello", "world"], "Hello World".words)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_terms
|
12
|
+
text = "Hello World"
|
13
|
+
|
14
|
+
assert_equal(["hello", "world"], BagOfWords.terms(text,false).keys.sort)
|
15
|
+
assert_equal(["hello", "hello world", "world"], BagOfWords.terms(text,true).keys.sort)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_features
|
19
|
+
text = "Hello world!"
|
20
|
+
text += "Hello World Again!"
|
21
|
+
|
22
|
+
assert_equal([2, 2], BagOfWords.features(text, "Hello World".words.uniq.sort))
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_stem
|
26
|
+
assert_equal(["protein"], "Proteins".words)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
+
require 'rbbt/bow/dictionary'
|
3
|
+
require 'rbbt/bow/bow'
|
4
|
+
require 'test/unit'
|
5
|
+
|
6
|
+
class TestDictionary < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_standard
|
9
|
+
docs = []
|
10
|
+
docs << BagOfWords.terms("Hello World", false)
|
11
|
+
docs << BagOfWords.terms("Hello Yin Yin", false)
|
12
|
+
|
13
|
+
dict = Dictionary.new
|
14
|
+
docs.each{|doc| dict.add doc}
|
15
|
+
|
16
|
+
assert_equal(2, dict.terms["hello"])
|
17
|
+
assert_equal(2, dict.terms["yin"])
|
18
|
+
assert_equal(0, dict.terms["bye"])
|
19
|
+
assert_equal(1, dict.terms["world"])
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_tf_idf
|
23
|
+
docs = []
|
24
|
+
docs << BagOfWords.terms("Hello World", false)
|
25
|
+
docs << BagOfWords.terms("Hello Yin Yin", false)
|
26
|
+
|
27
|
+
|
28
|
+
dict = Dictionary::TF_IDF.new
|
29
|
+
docs.each{|doc| dict.add doc}
|
30
|
+
|
31
|
+
assert_equal(2, dict.terms["hello"])
|
32
|
+
assert_equal(2, dict.terms["yin"])
|
33
|
+
assert_equal(0, dict.terms["bye"])
|
34
|
+
assert_equal(1, dict.terms["world"])
|
35
|
+
|
36
|
+
|
37
|
+
assert_equal(1, dict.df["hello"])
|
38
|
+
assert_equal(0.5, dict.df["yin"])
|
39
|
+
assert_equal(0, dict.df["bye"])
|
40
|
+
assert_equal(0.5, dict.df["world"])
|
41
|
+
|
42
|
+
assert_equal(2.0/5, dict.tf["hello"])
|
43
|
+
assert_equal(2.0/5, dict.tf["yin"])
|
44
|
+
assert_equal(0, dict.tf["bye"])
|
45
|
+
assert_equal(1.0/5, dict.tf["world"])
|
46
|
+
|
47
|
+
assert_equal(Math::log(1), dict.idf["hello"])
|
48
|
+
assert_equal(Math::log(2), dict.idf["yin"])
|
49
|
+
assert_equal(0, dict.idf["bye"])
|
50
|
+
assert_equal(Math::log(2), dict.idf["world"])
|
51
|
+
|
52
|
+
assert_equal(2.0/5 * Math::log(1), dict.tf_idf["hello"])
|
53
|
+
assert_equal(2.0/5 * Math::log(2), dict.tf_idf["yin"])
|
54
|
+
assert_equal(0, dict.tf_idf["bye"])
|
55
|
+
assert_equal(1.0/5 * Math::log(2), dict.tf_idf["world"])
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_best
|
59
|
+
docs = []
|
60
|
+
docs << BagOfWords.terms("Hello World", false)
|
61
|
+
docs << BagOfWords.terms("Hello Yin Yin", false)
|
62
|
+
|
63
|
+
|
64
|
+
dict = Dictionary::TF_IDF.new
|
65
|
+
docs.each{|doc| dict.add doc}
|
66
|
+
|
67
|
+
assert_equal(1, dict.best(:limit => 1).length)
|
68
|
+
assert(dict.best(:limit => 1).include? "yin")
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_kl
|
72
|
+
docs = []
|
73
|
+
docs << [BagOfWords.terms("Hello World", false), :+]
|
74
|
+
docs << [BagOfWords.terms("Hello Cruel World", false), :+]
|
75
|
+
docs << [BagOfWords.terms("Hello Yan Yan", false), :-]
|
76
|
+
docs << [BagOfWords.terms("Hello Yin Yin", false), :-]
|
77
|
+
|
78
|
+
|
79
|
+
dict = Dictionary::KL.new
|
80
|
+
docs.each{|doc| dict.add *doc}
|
81
|
+
|
82
|
+
assert_equal(0, dict.kl["hello"])
|
83
|
+
assert_equal(dict.kl['yan'], dict.kl['yin'])
|
84
|
+
assert_in_delta(1 * Math::log(1 / 0.000001), dict.kl["world"],0.01)
|
85
|
+
assert_in_delta(0.5 * Math::log(0.5 / 0.000001), dict.kl["cruel"],0.01)
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../test_helper'
|
2
|
+
require 'rbbt-util'
|
3
|
+
require 'rbbt/ner/regexpNER'
|
4
|
+
require 'test/unit'
|
5
|
+
|
6
|
+
class TestRegExpNER < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_class
|
9
|
+
text = "a bc d e f g h i j k l m n o p q one two"
|
10
|
+
|
11
|
+
lexicon =<<-EOF
|
12
|
+
C1,a,x,xx,xxx
|
13
|
+
C2,bc,y,yy,yyy
|
14
|
+
C3,i,z,zz,zzz,m,one two
|
15
|
+
EOF
|
16
|
+
|
17
|
+
file = TmpFile.tmp_file
|
18
|
+
File.open(file, 'w'){|f| f.write lexicon}
|
19
|
+
|
20
|
+
r = RegExpNER.new(file, :sep => ',', :stopwords => false)
|
21
|
+
assert_equal(['a', 'bc', 'i', 'm','one two'].sort, r.match_hash(text).values.flatten.sort)
|
22
|
+
|
23
|
+
r = RegExpNER.new(file, :sep => ',', :stopwords => true)
|
24
|
+
assert_equal(['bc', 'm','one two'].sort,r.match_hash(text).values.flatten.sort)
|
25
|
+
|
26
|
+
|
27
|
+
FileUtils.rm file
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rbbt-text
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Miguel Vazquez
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-12-01 00:00:00 +01:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: rbbt-util
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
description: "Text mining tools: named entity recognition and normalization, document classification, bag-of-words, dictionaries, etc"
|
36
|
+
email: miguel.vazquez@fdi.ucm.es
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files: []
|
42
|
+
|
43
|
+
files:
|
44
|
+
- lib/rbbt/bow/bow.rb
|
45
|
+
- lib/rbbt/bow/dictionary.rb
|
46
|
+
- lib/rbbt/bow/misc.rb
|
47
|
+
- lib/rbbt/ner/regexpNER.rb
|
48
|
+
- test/rbbt/bow/test_bow.rb
|
49
|
+
- test/rbbt/bow/test_dictionary.rb
|
50
|
+
- test/rbbt/bow/test_misc.rb
|
51
|
+
- test/rbbt/ner/test_regexpNER.rb
|
52
|
+
- test/test_helper.rb
|
53
|
+
has_rdoc: true
|
54
|
+
homepage: http://github.com/mikisvaz/rbbt-util
|
55
|
+
licenses: []
|
56
|
+
|
57
|
+
post_install_message:
|
58
|
+
rdoc_options: []
|
59
|
+
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
hash: 3
|
68
|
+
segments:
|
69
|
+
- 0
|
70
|
+
version: "0"
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
hash: 3
|
77
|
+
segments:
|
78
|
+
- 0
|
79
|
+
version: "0"
|
80
|
+
requirements: []
|
81
|
+
|
82
|
+
rubyforge_project:
|
83
|
+
rubygems_version: 1.3.7
|
84
|
+
signing_key:
|
85
|
+
specification_version: 3
|
86
|
+
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|
87
|
+
test_files:
|
88
|
+
- test/rbbt/bow/test_bow.rb
|
89
|
+
- test/rbbt/bow/test_dictionary.rb
|
90
|
+
- test/rbbt/bow/test_misc.rb
|
91
|
+
- test/rbbt/ner/test_regexpNER.rb
|
92
|
+
- test/test_helper.rb
|