reclassifier 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +429 -0
- data/README.md +87 -0
- data/Rakefile +7 -0
- data/lib/gsl/vector.rb +12 -0
- data/lib/reclassifier.rb +19 -0
- data/lib/reclassifier/bayes.rb +129 -0
- data/lib/reclassifier/content_node.rb +66 -0
- data/lib/reclassifier/core_ext/array.rb +11 -0
- data/lib/reclassifier/core_ext/matrix.rb +72 -0
- data/lib/reclassifier/core_ext/object.rb +3 -0
- data/lib/reclassifier/core_ext/string.rb +143 -0
- data/lib/reclassifier/core_ext/vector.rb +20 -0
- data/lib/reclassifier/lsi.rb +300 -0
- data/lib/reclassifier/version.rb +3 -0
- data/lib/reclassifier/word_list.rb +32 -0
- data/reclassifier.gemspec +27 -0
- data/test/bayes_test.rb +34 -0
- data/test/core_ext/array_test.rb +15 -0
- data/test/core_ext/string_test.rb +13 -0
- data/test/lsi_test.rb +123 -0
- data/test/test_helper.rb +4 -0
- metadata +154 -0
data/README.md
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
# Reclassifier
|
2
|
+
|
3
|
+
Reclassifier is a gem that provides [classification](http://en.wikipedia.org/wiki/Statistical_classification) of strings.
|
4
|
+
|
5
|
+
Classification can be done via [Naïve Bayes](https://en.wikipedia.org/wiki/Naive_Bayes_classifier) or [Latent Semantic Indexing](http://en.wikipedia.org/wiki/Latent_semantic_indexing).
|
6
|
+
|
7
|
+
It is a fork of the original [Classifier](https://github.com/cardmagic/classifier) gem, which appears to be unmaintained as of a couple of years ago.
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add this line to your application's Gemfile:
|
12
|
+
|
13
|
+
gem 'reclassifier'
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install reclassifier
|
22
|
+
|
23
|
+
## Dependencies
|
24
|
+
|
25
|
+
Currently you need to install the GNU GSL library in order to use Reclassifier: http://www.gnu.org/software/gsl
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
### Bayes
|
30
|
+
Bayesian Classifiers are accurate, fast, and have modest memory requirements.
|
31
|
+
|
32
|
+
#### Usage
|
33
|
+
require 'reclassifier'
|
34
|
+
b = Reclassifier::Bayes.new 'Interesting', 'Uninteresting'
|
35
|
+
b.train_interesting "here are some good words. I hope you love them"
|
36
|
+
b.train_uninteresting "here are some bad words, I hate you"
|
37
|
+
b.classify "I hate bad words and you" # returns 'Uninteresting'
|
38
|
+
|
39
|
+
require 'madeleine'
|
40
|
+
m = SnapshotMadeleine.new("bayes_data") {
|
41
|
+
Reclassifier::Bayes.new 'Interesting', 'Uninteresting'
|
42
|
+
}
|
43
|
+
m.system.train_interesting "here are some good words. I hope you love them"
|
44
|
+
m.system.train_uninteresting "here are some bad words, I hate you"
|
45
|
+
m.take_snapshot
|
46
|
+
m.system.classify "I love you" # returns 'Interesting'
|
47
|
+
|
48
|
+
Using Madeleine, your application can persist the learned data over time.
|
49
|
+
|
50
|
+
### LSI
|
51
|
+
Latent Semantic Indexing engines are not as fast or as small as Bayesian classifiers, but are more flexible, providing
|
52
|
+
fast search and clustering detection as well as semantic analysis of the text that theoretically simulates human learning.
|
53
|
+
|
54
|
+
#### Usage
|
55
|
+
require 'reclassifier'
|
56
|
+
lsi = Reclassifier::LSI.new
|
57
|
+
strings = [ ["This text deals with dogs. Dogs.", :dog],
|
58
|
+
["This text involves dogs too. Dogs! ", :dog],
|
59
|
+
["This text revolves around cats. Cats.", :cat],
|
60
|
+
["This text also involves cats. Cats!", :cat],
|
61
|
+
["This text involves birds. Birds.",:bird ]]
|
62
|
+
strings.each {|x| lsi.add_item x.first, x.last}
|
63
|
+
|
64
|
+
lsi.search("dog", 3)
|
65
|
+
# returns => ["This text deals with dogs. Dogs.", "This text involves dogs too. Dogs! ",
|
66
|
+
# "This text also involves cats. Cats!"]
|
67
|
+
|
68
|
+
lsi.find_related(strings[2], 2)
|
69
|
+
# returns => ["This text revolves around cats. Cats.", "This text also involves cats. Cats!"]
|
70
|
+
|
71
|
+
lsi.classify "This text is also about dogs!"
|
72
|
+
# returns => :dog
|
73
|
+
|
74
|
+
Please see the Reclassifier::LSI documentation for more information. It is possible to index, search and classify
|
75
|
+
with more than just simple strings.
|
76
|
+
|
77
|
+
## Contributing
|
78
|
+
|
79
|
+
1. Fork it
|
80
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
81
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
82
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
83
|
+
5. Create new Pull Request
|
84
|
+
|
85
|
+
## License
|
86
|
+
|
87
|
+
This library is released under the terms of the GNU LGPL. See LICENSE for more details.
|
data/Rakefile
ADDED
data/lib/gsl/vector.rb
ADDED
data/lib/reclassifier.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# gems
|
2
|
+
require 'matrix'
|
3
|
+
require 'fast-stemmer'
|
4
|
+
require 'gsl'
|
5
|
+
|
6
|
+
# files
|
7
|
+
require 'reclassifier/version'
|
8
|
+
require 'reclassifier/core_ext/array'
|
9
|
+
require 'reclassifier/core_ext/matrix'
|
10
|
+
require 'reclassifier/core_ext/object'
|
11
|
+
require 'reclassifier/core_ext/string'
|
12
|
+
require 'gsl/vector'
|
13
|
+
|
14
|
+
module Reclassifier
|
15
|
+
autoload :Bayes, 'reclassifier/bayes'
|
16
|
+
autoload :LSI, 'reclassifier/lsi'
|
17
|
+
autoload :ContentNode, 'reclassifier/content_node'
|
18
|
+
autoload :WordList, 'reclassifier/word_list'
|
19
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
module Reclassifier
|
2
|
+
class Bayes
|
3
|
+
# The class can be created with one or more categories, each of which will be
|
4
|
+
# initialized and given a training method. E.g.,
|
5
|
+
# b = Classifier::Bayes.new 'Interesting', 'Uninteresting', 'Spam'
|
6
|
+
def initialize(*categories)
|
7
|
+
@categories = Hash.new
|
8
|
+
categories.each { |category| @categories[category.prepare_category_name] = Hash.new }
|
9
|
+
@total_words = 0
|
10
|
+
@category_counts = Hash.new(0)
|
11
|
+
end
|
12
|
+
|
13
|
+
#
|
14
|
+
# Provides a general training method for all categories specified in Bayes#new
|
15
|
+
# For example:
|
16
|
+
# b = Classifier::Bayes.new 'This', 'That', 'the_other'
|
17
|
+
# b.train :this, "This text"
|
18
|
+
# b.train "that", "That text"
|
19
|
+
# b.train "The other", "The other text"
|
20
|
+
def train(category, text)
|
21
|
+
category = category.prepare_category_name
|
22
|
+
@category_counts[category] += 1
|
23
|
+
text.word_hash.each do |word, count|
|
24
|
+
@categories[category][word] ||= 0
|
25
|
+
@categories[category][word] += count
|
26
|
+
@total_words += count
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
#
|
31
|
+
# Provides a untraining method for all categories specified in Bayes#new
|
32
|
+
# Be very careful with this method.
|
33
|
+
#
|
34
|
+
# For example:
|
35
|
+
# b = Classifier::Bayes.new 'This', 'That', 'the_other'
|
36
|
+
# b.train :this, "This text"
|
37
|
+
# b.untrain :this, "This text"
|
38
|
+
def untrain(category, text)
|
39
|
+
category = category.prepare_category_name
|
40
|
+
@category_counts[category] -= 1
|
41
|
+
text.word_hash.each do |word, count|
|
42
|
+
if @total_words >= 0
|
43
|
+
orig = @categories[category][word]
|
44
|
+
@categories[category][word] ||= 0
|
45
|
+
@categories[category][word] -= count
|
46
|
+
if @categories[category][word] <= 0
|
47
|
+
@categories[category].delete(word)
|
48
|
+
count = orig
|
49
|
+
end
|
50
|
+
@total_words -= count
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
#
|
56
|
+
# Returns the scores in each category the provided +text+. E.g.,
|
57
|
+
# b.classifications "I hate bad words and you"
|
58
|
+
# => {"Uninteresting"=>-12.6997928013932, "Interesting"=>-18.4206807439524}
|
59
|
+
# The largest of these scores (the one closest to 0) is the one picked out by #classify
|
60
|
+
def classifications(text)
|
61
|
+
score = Hash.new
|
62
|
+
training_count = @category_counts.values.inject { |x,y| x+y }.to_f
|
63
|
+
@categories.each do |category, category_words|
|
64
|
+
score[category.to_s] = 0
|
65
|
+
total = category_words.values.inject(0) {|sum, element| sum+element}
|
66
|
+
text.word_hash.each do |word, count|
|
67
|
+
s = category_words.has_key?(word) ? category_words[word] : 0.1
|
68
|
+
score[category.to_s] += Math.log(s/total.to_f)
|
69
|
+
end
|
70
|
+
# now add prior probability for the category
|
71
|
+
s = @category_counts.has_key?(category) ? @category_counts[category] : 0.1
|
72
|
+
score[category.to_s] += Math.log(s / training_count)
|
73
|
+
end
|
74
|
+
return score
|
75
|
+
end
|
76
|
+
|
77
|
+
#
|
78
|
+
# Returns the classification of the provided +text+, which is one of the
|
79
|
+
# categories given in the initializer. E.g.,
|
80
|
+
# b.classify "I hate bad words and you"
|
81
|
+
# => 'Uninteresting'
|
82
|
+
def classify(text)
|
83
|
+
(classifications(text).sort_by { |a| -a[1] })[0][0]
|
84
|
+
end
|
85
|
+
|
86
|
+
#
|
87
|
+
# Provides training and untraining methods for the categories specified in Bayes#new
|
88
|
+
# For example:
|
89
|
+
# b = Classifier::Bayes.new 'This', 'That', 'the_other'
|
90
|
+
# b.train_this "This text"
|
91
|
+
# b.train_that "That text"
|
92
|
+
# b.untrain_that "That text"
|
93
|
+
# b.train_the_other "The other text"
|
94
|
+
def method_missing(name, *args)
|
95
|
+
category = name.to_s.gsub(/(un)?train_([\w]+)/, '\2').prepare_category_name
|
96
|
+
if @categories.has_key? category
|
97
|
+
args.each { |text| eval("#{$1}train(category, text)") }
|
98
|
+
elsif name.to_s =~ /(un)?train_([\w]+)/
|
99
|
+
raise StandardError, "No such category: #{category}"
|
100
|
+
else
|
101
|
+
super #raise StandardError, "No such method: #{name}"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
#
|
106
|
+
# Provides a list of category names
|
107
|
+
# For example:
|
108
|
+
# b.categories
|
109
|
+
# => ['This', 'That', 'the_other']
|
110
|
+
def categories # :nodoc:
|
111
|
+
@categories.keys.collect {|c| c.to_s}
|
112
|
+
end
|
113
|
+
|
114
|
+
#
|
115
|
+
# Allows you to add categories to the classifier.
|
116
|
+
# For example:
|
117
|
+
# b.add_category "Not spam"
|
118
|
+
#
|
119
|
+
# WARNING: Adding categories to a trained classifier will
|
120
|
+
# result in an undertrained category that will tend to match
|
121
|
+
# more criteria than the trained selective categories. In short,
|
122
|
+
# try to initialize your categories at initialization.
|
123
|
+
def add_category(category)
|
124
|
+
@categories[category.prepare_category_name] = Hash.new
|
125
|
+
end
|
126
|
+
|
127
|
+
alias append_category add_category
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module Reclassifier
|
2
|
+
|
3
|
+
# This is an internal data structure class for the LSI node. Save for
|
4
|
+
# raw_vector_with, it should be fairly straightforward to understand.
|
5
|
+
# You should never have to use it directly.
|
6
|
+
class ContentNode
|
7
|
+
attr_accessor :raw_vector, :raw_norm,
|
8
|
+
:lsi_vector, :lsi_norm,
|
9
|
+
:categories
|
10
|
+
|
11
|
+
attr_reader :word_hash
|
12
|
+
# If text_proc is not specified, the source will be duck-typed
|
13
|
+
# via source.to_s
|
14
|
+
def initialize( word_hash, *categories )
|
15
|
+
@categories = categories || []
|
16
|
+
@word_hash = word_hash
|
17
|
+
end
|
18
|
+
|
19
|
+
# Use this to fetch the appropriate search vector.
|
20
|
+
def search_vector
|
21
|
+
@lsi_vector || @raw_vector
|
22
|
+
end
|
23
|
+
|
24
|
+
# Use this to fetch the appropriate search vector in normalized form.
|
25
|
+
def search_norm
|
26
|
+
@lsi_norm || @raw_norm
|
27
|
+
end
|
28
|
+
|
29
|
+
# Creates the raw vector out of word_hash using word_list as the
|
30
|
+
# key for mapping the vector space.
|
31
|
+
def raw_vector_with( word_list )
|
32
|
+
if $GSL
|
33
|
+
vec = GSL::Vector.alloc(word_list.size)
|
34
|
+
else
|
35
|
+
vec = Array.new(word_list.size, 0)
|
36
|
+
end
|
37
|
+
|
38
|
+
@word_hash.each_key do |word|
|
39
|
+
vec[word_list[word]] = @word_hash[word] if word_list[word]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Perform the scaling transform
|
43
|
+
total_words = $GSL ? vec.sum : vec.sum_with_identity
|
44
|
+
|
45
|
+
# Perform first-order association transform if this vector has more
|
46
|
+
# than one word in it.
|
47
|
+
if total_words > 1.0
|
48
|
+
weighted_total = 0.0
|
49
|
+
vec.each do |term|
|
50
|
+
if ( term > 0 )
|
51
|
+
weighted_total += (( term / total_words ) * Math.log( term / total_words ))
|
52
|
+
end
|
53
|
+
end
|
54
|
+
vec = vec.collect { |val| Math.log( val + 1 ) / -weighted_total }
|
55
|
+
end
|
56
|
+
|
57
|
+
if $GSL
|
58
|
+
@raw_norm = vec.normalize
|
59
|
+
@raw_vector = vec
|
60
|
+
else
|
61
|
+
@raw_norm = Vector[*vec].normalize
|
62
|
+
@raw_vector = Vector[*vec]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
class Matrix
|
2
|
+
def Matrix.diag(s)
|
3
|
+
Matrix.diagonal(*s)
|
4
|
+
end
|
5
|
+
|
6
|
+
alias :trans :transpose
|
7
|
+
|
8
|
+
def SV_decomp(maxSweeps = 20)
|
9
|
+
if self.row_size >= self.column_size
|
10
|
+
q = self.trans * self
|
11
|
+
else
|
12
|
+
q = self * self.trans
|
13
|
+
end
|
14
|
+
|
15
|
+
qrot = q.dup
|
16
|
+
v = Matrix.identity(q.row_size)
|
17
|
+
azrot = nil
|
18
|
+
mzrot = nil
|
19
|
+
cnt = 0
|
20
|
+
s_old = nil
|
21
|
+
mu = nil
|
22
|
+
|
23
|
+
while true do
|
24
|
+
cnt += 1
|
25
|
+
for row in (0...qrot.row_size-1) do
|
26
|
+
for col in (1..qrot.row_size-1) do
|
27
|
+
next if row == col
|
28
|
+
h = Math.atan((2 * qrot[row,col])/(qrot[row,row]-qrot[col,col]))/2.0
|
29
|
+
hcos = Math.cos(h)
|
30
|
+
hsin = Math.sin(h)
|
31
|
+
mzrot = Matrix.identity(qrot.row_size)
|
32
|
+
mzrot[row,row] = hcos
|
33
|
+
mzrot[row,col] = -hsin
|
34
|
+
mzrot[col,row] = hsin
|
35
|
+
mzrot[col,col] = hcos
|
36
|
+
qrot = mzrot.trans * qrot * mzrot
|
37
|
+
v = v * mzrot
|
38
|
+
end
|
39
|
+
end
|
40
|
+
s_old = qrot.dup if cnt == 1
|
41
|
+
sum_qrot = 0.0
|
42
|
+
if cnt > 1
|
43
|
+
qrot.row_size.times do |r|
|
44
|
+
sum_qrot += (qrot[r,r]-s_old[r,r]).abs if (qrot[r,r]-s_old[r,r]).abs > 0.001
|
45
|
+
end
|
46
|
+
s_old = qrot.dup
|
47
|
+
end
|
48
|
+
break if (sum_qrot <= 0.001 and cnt > 1) or cnt >= maxSweeps
|
49
|
+
end # of do while true
|
50
|
+
s = []
|
51
|
+
qrot.row_size.times do |r|
|
52
|
+
s << Math.sqrt(qrot[r,r])
|
53
|
+
end
|
54
|
+
#puts "cnt = #{cnt}"
|
55
|
+
if self.row_size >= self.column_size
|
56
|
+
mu = self * v * Matrix.diagonal(*s).inverse
|
57
|
+
return [mu, v, s]
|
58
|
+
else
|
59
|
+
puts v.row_size
|
60
|
+
puts v.column_size
|
61
|
+
puts self.row_size
|
62
|
+
puts self.column_size
|
63
|
+
puts s.size
|
64
|
+
|
65
|
+
mu = (self.trans * v * Matrix.diagonal(*s).inverse)
|
66
|
+
return [mu, v, s]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
def []=(i,j,val)
|
70
|
+
@rows[i][j] = val
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
class String
|
2
|
+
|
3
|
+
# Removes common punctuation symbols, returning a new string.
|
4
|
+
# E.g.,
|
5
|
+
# "Hello (greeting's), with {braces} < >...?".without_punctuation
|
6
|
+
# => "Hello greetings with braces "
|
7
|
+
def without_punctuation
|
8
|
+
tr( ',?.!;:"@#$%^&*()_=+[]{}\|<>/`~', " " ) .tr( "'\-", "")
|
9
|
+
end
|
10
|
+
|
11
|
+
# Return a Hash of strings => ints. Each word in the string is stemmed,
|
12
|
+
# symbolized, and indexed to its frequency in the document.
|
13
|
+
def word_hash
|
14
|
+
word_hash_for_words(gsub(/[^\w\s]/,"").split + gsub(/[\w]/," ").split)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Return a word hash without extra punctuation or short symbols, just stemmed words
|
18
|
+
def clean_word_hash
|
19
|
+
word_hash_for_words gsub(/[^\w\s]/,"").split
|
20
|
+
end
|
21
|
+
|
22
|
+
def word_hash_for_words(words)
|
23
|
+
d = Hash.new
|
24
|
+
words.each do |word|
|
25
|
+
word.downcase! if word =~ /[\w]+/
|
26
|
+
key = word.stem.to_sym
|
27
|
+
if word =~ /[^\w]/ || ! CORPUS_SKIP_WORDS.include?(word) && word.length > 2
|
28
|
+
d[key] ||= 0
|
29
|
+
d[key] += 1
|
30
|
+
end
|
31
|
+
end
|
32
|
+
return d
|
33
|
+
end
|
34
|
+
|
35
|
+
CORPUS_SKIP_WORDS = [
|
36
|
+
"a",
|
37
|
+
"again",
|
38
|
+
"all",
|
39
|
+
"along",
|
40
|
+
"are",
|
41
|
+
"also",
|
42
|
+
"an",
|
43
|
+
"and",
|
44
|
+
"as",
|
45
|
+
"at",
|
46
|
+
"but",
|
47
|
+
"by",
|
48
|
+
"came",
|
49
|
+
"can",
|
50
|
+
"cant",
|
51
|
+
"couldnt",
|
52
|
+
"did",
|
53
|
+
"didn",
|
54
|
+
"didnt",
|
55
|
+
"do",
|
56
|
+
"doesnt",
|
57
|
+
"dont",
|
58
|
+
"ever",
|
59
|
+
"first",
|
60
|
+
"from",
|
61
|
+
"have",
|
62
|
+
"her",
|
63
|
+
"here",
|
64
|
+
"him",
|
65
|
+
"how",
|
66
|
+
"i",
|
67
|
+
"if",
|
68
|
+
"in",
|
69
|
+
"into",
|
70
|
+
"is",
|
71
|
+
"isnt",
|
72
|
+
"it",
|
73
|
+
"itll",
|
74
|
+
"just",
|
75
|
+
"last",
|
76
|
+
"least",
|
77
|
+
"like",
|
78
|
+
"most",
|
79
|
+
"my",
|
80
|
+
"new",
|
81
|
+
"no",
|
82
|
+
"not",
|
83
|
+
"now",
|
84
|
+
"of",
|
85
|
+
"on",
|
86
|
+
"or",
|
87
|
+
"should",
|
88
|
+
"sinc",
|
89
|
+
"so",
|
90
|
+
"some",
|
91
|
+
"th",
|
92
|
+
"than",
|
93
|
+
"this",
|
94
|
+
"that",
|
95
|
+
"the",
|
96
|
+
"their",
|
97
|
+
"then",
|
98
|
+
"those",
|
99
|
+
"to",
|
100
|
+
"told",
|
101
|
+
"too",
|
102
|
+
"true",
|
103
|
+
"try",
|
104
|
+
"until",
|
105
|
+
"url",
|
106
|
+
"us",
|
107
|
+
"were",
|
108
|
+
"when",
|
109
|
+
"whether",
|
110
|
+
"while",
|
111
|
+
"with",
|
112
|
+
"within",
|
113
|
+
"yes",
|
114
|
+
"you",
|
115
|
+
"youll",
|
116
|
+
]
|
117
|
+
|
118
|
+
def summary( count=10, separator=" [...] " )
|
119
|
+
perform_lsi split_sentences, count, separator
|
120
|
+
end
|
121
|
+
|
122
|
+
def paragraph_summary( count=1, separator=" [...] " )
|
123
|
+
perform_lsi split_paragraphs, count, separator
|
124
|
+
end
|
125
|
+
|
126
|
+
def split_sentences
|
127
|
+
split /(\.|\!|\?)/ # TODO: make this less primitive
|
128
|
+
end
|
129
|
+
|
130
|
+
def split_paragraphs
|
131
|
+
split /(\n\n|\r\r|\r\n\r\n)/ # TODO: make this less primitive
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def perform_lsi(chunks, count, separator)
|
137
|
+
lsi = Reclassifier::LSI.new :auto_rebuild => false
|
138
|
+
chunks.each { |chunk| lsi << chunk unless chunk.strip.empty? || chunk.strip.split.size == 1 }
|
139
|
+
lsi.build_index
|
140
|
+
summaries = lsi.highest_relative_content count
|
141
|
+
return summaries.reject { |chunk| !summaries.include? chunk }.map { |x| x.strip }.join(separator)
|
142
|
+
end
|
143
|
+
end
|