classifier 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. data/LICENSE +361 -273
  2. data/README +6 -5
  3. data/Rakefile +12 -2
  4. data/bin/summarize.rb +11 -0
  5. data/doc/classes/Array.html +139 -0
  6. data/doc/classes/Array.src/M000003.html +18 -0
  7. data/doc/classes/Classifier.html +5 -5
  8. data/doc/classes/Classifier/Bayes.html +43 -43
  9. data/doc/classes/Classifier/Bayes.src/{M000023.html → M000038.html} +0 -0
  10. data/doc/classes/Classifier/Bayes.src/{M000024.html → M000039.html} +0 -0
  11. data/doc/classes/Classifier/Bayes.src/{M000025.html → M000040.html} +0 -0
  12. data/doc/classes/Classifier/Bayes.src/{M000026.html → M000041.html} +0 -0
  13. data/doc/classes/Classifier/Bayes.src/{M000027.html → M000042.html} +0 -0
  14. data/doc/classes/Classifier/Bayes.src/{M000028.html → M000043.html} +0 -0
  15. data/doc/classes/Classifier/Bayes.src/{M000029.html → M000044.html} +0 -0
  16. data/doc/classes/Classifier/ContentNode.html +23 -28
  17. data/doc/classes/Classifier/ContentNode.src/M000046.html +19 -0
  18. data/doc/classes/Classifier/ContentNode.src/{M000032.html → M000047.html} +1 -1
  19. data/doc/classes/Classifier/ContentNode.src/{M000033.html → M000048.html} +1 -1
  20. data/doc/classes/Classifier/ContentNode.src/M000049.html +49 -0
  21. data/doc/classes/Classifier/LSI.html +158 -68
  22. data/doc/classes/Classifier/LSI.src/M000022.html +6 -17
  23. data/doc/classes/Classifier/LSI.src/{M000012.html → M000023.html} +2 -2
  24. data/doc/classes/Classifier/LSI.src/{M000013.html → M000024.html} +3 -2
  25. data/doc/classes/Classifier/LSI.src/{M000014.html → M000025.html} +1 -1
  26. data/doc/classes/Classifier/LSI.src/M000026.html +19 -0
  27. data/doc/classes/Classifier/LSI.src/{M000015.html → M000027.html} +1 -1
  28. data/doc/classes/Classifier/LSI.src/{M000016.html → M000028.html} +1 -1
  29. data/doc/classes/Classifier/LSI.src/M000029.html +19 -0
  30. data/doc/classes/Classifier/LSI.src/M000030.html +43 -0
  31. data/doc/classes/Classifier/LSI.src/M000031.html +23 -0
  32. data/doc/classes/Classifier/LSI.src/{M000018.html → M000032.html} +7 -3
  33. data/doc/classes/Classifier/LSI.src/{M000019.html → M000033.html} +6 -2
  34. data/doc/classes/Classifier/LSI.src/{M000020.html → M000034.html} +2 -4
  35. data/doc/classes/Classifier/LSI.src/{M000021.html → M000035.html} +1 -1
  36. data/doc/classes/Classifier/LSI.src/M000036.html +31 -0
  37. data/doc/classes/Classifier/LSI.src/M000037.html +21 -0
  38. data/doc/classes/Classifier/WordList.html +37 -22
  39. data/doc/classes/Classifier/WordList.src/{M000007.html → M000017.html} +2 -2
  40. data/doc/classes/Classifier/WordList.src/{M000008.html → M000018.html} +1 -1
  41. data/doc/classes/Classifier/WordList.src/{M000009.html → M000019.html} +1 -1
  42. data/doc/classes/Classifier/WordList.src/M000020.html +18 -0
  43. data/doc/classes/Classifier/WordList.src/{M000010.html → M000021.html} +1 -1
  44. data/doc/classes/GSL.html +2 -1
  45. data/doc/classes/GSL/Matrix.html +126 -0
  46. data/doc/classes/GSL/Vector.html +10 -10
  47. data/doc/classes/GSL/Vector.src/{M000005.html → M000015.html} +0 -0
  48. data/doc/classes/GSL/Vector.src/{M000006.html → M000016.html} +0 -0
  49. data/doc/classes/Matrix.html +184 -0
  50. data/doc/classes/Matrix.src/M000004.html +18 -0
  51. data/doc/classes/Matrix.src/M000005.html +76 -0
  52. data/doc/classes/Matrix.src/M000006.html +18 -0
  53. data/doc/classes/Object.html +7 -7
  54. data/doc/classes/Object.src/{M000001.html → M000007.html} +1 -1
  55. data/doc/classes/String.html +90 -20
  56. data/doc/classes/String.src/{M000002.html → M000008.html} +0 -0
  57. data/doc/classes/String.src/{M000003.html → M000009.html} +0 -0
  58. data/doc/classes/String.src/{M000004.html → M000010.html} +0 -0
  59. data/doc/classes/String.src/M000011.html +18 -0
  60. data/doc/classes/String.src/M000012.html +18 -0
  61. data/doc/classes/String.src/M000013.html +18 -0
  62. data/doc/classes/String.src/M000014.html +18 -0
  63. data/doc/classes/Vector.html +154 -0
  64. data/doc/classes/Vector.src/M000001.html +22 -0
  65. data/doc/classes/Vector.src/M000002.html +25 -0
  66. data/doc/created.rid +1 -1
  67. data/doc/files/README.html +14 -8
  68. data/doc/files/lib/classifier/bayes_rb.html +1 -1
  69. data/doc/files/lib/classifier/{string_extensions_rb.html → extensions/string_rb.html} +5 -5
  70. data/doc/files/lib/classifier/extensions/vector_rb.html +120 -0
  71. data/doc/files/lib/classifier/extensions/vector_serialize_rb.html +1 -1
  72. data/doc/files/lib/classifier/extensions/word_hash_rb.html +1 -1
  73. data/doc/files/lib/classifier/lsi/content_node_rb.html +2 -2
  74. data/doc/files/lib/classifier/lsi/summary_rb.html +115 -0
  75. data/doc/files/lib/classifier/{extensions → lsi}/word_list_rb.html +3 -3
  76. data/doc/files/lib/classifier/lsi_rb.html +5 -3
  77. data/doc/files/lib/classifier_rb.html +2 -2
  78. data/doc/fr_class_index.html +4 -0
  79. data/doc/fr_file_index.html +4 -2
  80. data/doc/fr_method_index.html +49 -34
  81. data/doc/index.html +2 -2
  82. data/lib/classifier.rb +1 -1
  83. data/lib/classifier/{string_extensions.rb → extensions/string.rb} +0 -0
  84. data/lib/classifier/extensions/vector.rb +106 -0
  85. data/lib/classifier/extensions/vector_serialize.rb +6 -0
  86. data/lib/classifier/lsi.rb +101 -31
  87. data/lib/classifier/lsi/content_node.rb +28 -23
  88. data/lib/classifier/lsi/summary.rb +31 -0
  89. data/lib/classifier/{extensions → lsi}/word_list.rb +7 -2
  90. data/test/{string_extensions → extensions}/word_hash_test.rb +0 -0
  91. data/test/lsi/lsi_test.rb +36 -1
  92. metadata +68 -41
  93. data/doc/classes/Classifier/ContentNode.src/M000031.html +0 -21
  94. data/doc/classes/Classifier/ContentNode.src/M000034.html +0 -41
  95. data/doc/classes/Classifier/LSI.src/M000011.html +0 -20
  96. data/doc/classes/Classifier/LSI.src/M000017.html +0 -32
@@ -1,26 +1,23 @@
1
1
  # Author:: David Fayram (mailto:dfayram@lensmen.net)
2
2
  # Copyright:: Copyright (c) 2005 David Fayram II
3
- # License:: GPL
3
+ # License:: LGPL
4
4
 
5
5
  module Classifier
6
6
 
7
-
8
7
  # This is an internal data structure class for the LSI node. Save for
9
8
  # raw_vector_with, it should be fairly straightforward to understand.
10
9
  # You should never have to use it directly.
11
10
  class ContentNode
12
- attr_accessor :word_hash, :raw_vector, :raw_norm,
13
- :lsi_vector, :lsi_norm,
14
- :categories
15
- attr_reader :source
16
-
11
+ attr_accessor :raw_vector, :raw_norm,
12
+ :lsi_vector, :lsi_norm,
13
+ :categories
14
+
15
+ attr_reader :word_hash
17
16
  # If text_proc is not specified, the source will be duck-typed
18
17
  # via source.to_s
19
- def initialize( source, categories=nil, text_proc=nil )
20
- text_proc = text_proc || (proc {|x| x.to_s})
18
+ def initialize( word_hash, *categories )
21
19
  @categories = categories || []
22
- @source = source
23
- @word_hash = text_proc.call( @source ).clean_word_hash
20
+ @word_hash = word_hash
24
21
  end
25
22
 
26
23
  # Use this to fetch the appropriate search vector.
@@ -36,32 +33,40 @@ module Classifier
36
33
  # Creates the raw vector out of word_hash using word_list as the
37
34
  # key for mapping the vector space.
38
35
  def raw_vector_with( word_list )
39
- vec = Array.new(word_list.size, 0)
36
+ if $GSL
37
+ vec = Vector.new(word_list.size)
38
+ else
39
+ vec = Array.new(word_list.size, 0)
40
+ end
40
41
 
41
42
  @word_hash.each_key do |word|
42
43
  vec[word_list[word]] = @word_hash[word] if word_list[word]
43
44
  end
44
45
 
45
46
  # Perform the scaling transform
46
- total_words = vec.inject(0) { |sum,term| sum += term }.to_f
47
+ total_words = vec.sum
47
48
 
48
49
  # Perform first-order association transform if this vector has more
49
50
  # than one word in it.
50
51
  if total_words > 1.0
51
- weighted_total = vec.inject(0.0) do |sum,term|
52
- if( term > 0 )
53
- sum += (( term / total_words ) * Math.log( term / total_words ))
54
- else
55
- sum
52
+ weighted_total = 0.0
53
+ vec.each do |term|
54
+ if ( term > 0 )
55
+ weighted_total += (( term / total_words ) * Math.log( term / total_words ))
56
56
  end
57
57
  end
58
- vec.map! { |val| Math.log( val + 1 ) / -weighted_total }
58
+ vec = vec.collect { |val| Math.log( val + 1 ) / -weighted_total }
59
+ end
60
+
61
+ if $GSL
62
+ @raw_norm = vec.normalize
63
+ @raw_vector = vec
64
+ else
65
+ @raw_norm = Vector[*vec].normalize
66
+ @raw_vector = Vector[*vec]
59
67
  end
60
-
61
- @raw_norm = GSL::Vector.new( vec ).normalize
62
- @raw_vector = GSL::Vector.new( vec )
63
68
  end
64
69
 
65
70
  end
66
71
 
67
- end
72
+ end
@@ -0,0 +1,31 @@
1
+ # Author:: Lucas Carlson (mailto:lucas@rufy.com)
2
+ # Copyright:: Copyright (c) 2005 Lucas Carlson
3
+ # License:: LGPL
4
+
5
+ class String
6
+ def summary( count=10, separator=" [...] " )
7
+ perform_lsi split_sentences, count, separator
8
+ end
9
+
10
+ def paragraph_summary( count=1, separator=" [...] " )
11
+ perform_lsi split_paragraphs, count, separator
12
+ end
13
+
14
+ def split_sentences
15
+ split /(\.|\!|\?)/ # TODO: make this less primitive
16
+ end
17
+
18
+ def split_paragraphs
19
+ split /(\n\n|\r\r|\r\n\r\n)/ # TODO: make this less primitive
20
+ end
21
+
22
+ private
23
+
24
+ def perform_lsi(chunks, count, separator)
25
+ lsi = Classifier::LSI.new :auto_rebuild => false
26
+ chunks.each { |chunk| lsi << chunk unless chunk.strip.empty? || chunk.strip.split.size == 1 }
27
+ lsi.build_index
28
+ summaries = lsi.highest_relative_content count
29
+ return summaries.reject { |chunk| !summaries.include? chunk }.map { |x| x.strip }.join(separator)
30
+ end
31
+ end
@@ -1,13 +1,14 @@
1
1
  # Author:: David Fayram (mailto:dfayram@lensmen.net)
2
2
  # Copyright:: Copyright (c) 2005 David Fayram II
3
- # License:: GPL
3
+ # License:: LGPL
4
4
 
5
5
  module Classifier
6
6
  # This class keeps a word => index mapping. It is used to map stemmed words
7
7
  # to dimensions of a vector.
8
+
8
9
  class WordList
9
10
  def initialize
10
- @location_table = {}
11
+ @location_table = Hash.new
11
12
  end
12
13
 
13
14
  # Adds a word (if it is new) and assigns it a unique dimension.
@@ -22,6 +23,10 @@ module Classifier
22
23
  @location_table[term]
23
24
  end
24
25
 
26
+ def word_for_index(ind)
27
+ @location_table.invert[ind]
28
+ end
29
+
25
30
  # Returns the number of words mapped.
26
31
  def size
27
32
  @location_table.size
data/test/lsi/lsi_test.rb CHANGED
@@ -3,7 +3,7 @@ class LSITest < Test::Unit::TestCase
3
3
  def setup
4
4
  # we repeat principle words to help weight them.
5
5
  # This test is rather delicate, since this system is mostly noise.
6
- @str1 = "This text deals with dogs. Dogs."
6
+ @str1 = "This text deals with dogs. Dogs."
7
7
  @str2 = "This text involves dogs too. Dogs! "
8
8
  @str3 = "This text revolves around cats. Cats."
9
9
  @str4 = "This text also involves cats. Cats!"
@@ -23,6 +23,7 @@ class LSITest < Test::Unit::TestCase
23
23
  def test_not_auto_rebuild
24
24
  lsi = Classifier::LSI.new :auto_rebuild => false
25
25
  lsi.add_item @str1, "Dog"
26
+ lsi.add_item @str2, "Dog"
26
27
  assert lsi.needs_rebuild?
27
28
  lsi.build_index
28
29
  assert ! lsi.needs_rebuild?
@@ -57,6 +58,25 @@ class LSITest < Test::Unit::TestCase
57
58
  assert_not_equal "Dog", bayes.classify( tricky_case )
58
59
  end
59
60
 
61
+ def test_recategorize_interface
62
+ lsi = Classifier::LSI.new
63
+ lsi.add_item @str1, "Dog"
64
+ lsi.add_item @str2, "Dog"
65
+ lsi.add_item @str3, "Cat"
66
+ lsi.add_item @str4, "Cat"
67
+ lsi.add_item @str5, "Bird"
68
+
69
+ tricky_case = "This text revolves around dogs."
70
+ assert_equal "Dog", lsi.classify( tricky_case )
71
+
72
+ # Recategorize as needed.
73
+ lsi.categories_for(@str1).clear.push "Cow"
74
+ lsi.categories_for(@str2).clear.push "Cow"
75
+
76
+ assert !lsi.needs_rebuild?
77
+ assert_equal "Cow", lsi.classify( tricky_case )
78
+ end
79
+
60
80
  def test_search
61
81
  lsi = Classifier::LSI.new
62
82
  [@str1, @str2, @str3, @str4, @str5].each { |x| lsi << x }
@@ -85,4 +105,19 @@ class LSITest < Test::Unit::TestCase
85
105
  assert_equal lsi_m.find_related(@str1, 3), lsi.find_related(@str1, 3)
86
106
  end
87
107
 
108
+ def test_keyword_search
109
+ lsi = Classifier::LSI.new
110
+ lsi.add_item @str1, "Dog"
111
+ lsi.add_item @str2, "Dog"
112
+ lsi.add_item @str3, "Cat"
113
+ lsi.add_item @str4, "Cat"
114
+ lsi.add_item @str5, "Bird"
115
+
116
+ assert_equal [:dog, :text, :deal], lsi.highest_ranked_stems(@str1)
117
+ end
118
+
119
+ def test_summary
120
+ assert_equal "This text involves dogs too [...] This text also involves cats", [@str1, @str2, @str3, @str4, @str5].join.summary(2)
121
+ end
122
+
88
123
  end
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.6
2
+ rubygems_version: 0.8.10
3
3
  specification_version: 1
4
4
  name: classifier
5
5
  version: !ruby/object:Gem::Version
6
- version: 1.2.0
7
- date: 2005-04-24
6
+ version: 1.3.0
7
+ date: 2005-05-05
8
8
  summary: A general classifier module to allow Bayesian and other types of classifications.
9
9
  require_paths:
10
10
  - lib
@@ -33,19 +33,22 @@ files:
33
33
  - lib/classifier/extensions
34
34
  - lib/classifier/lsi
35
35
  - lib/classifier/lsi.rb
36
- - lib/classifier/string_extensions.rb
36
+ - lib/classifier/extensions/string.rb
37
+ - lib/classifier/extensions/vector.rb
37
38
  - lib/classifier/extensions/vector_serialize.rb
38
39
  - lib/classifier/extensions/word_hash.rb
39
- - lib/classifier/extensions/word_list.rb
40
40
  - lib/classifier/lsi/content_node.rb
41
+ - lib/classifier/lsi/summary.rb
42
+ - lib/classifier/lsi/word_list.rb
41
43
  - bin/bayes.rb
44
+ - bin/summarize.rb
42
45
  - test/bayes
46
+ - test/extensions
43
47
  - test/lsi
44
- - test/string_extensions
45
48
  - test/test_helper.rb
46
49
  - test/bayes/bayesian_test.rb
50
+ - test/extensions/word_hash_test.rb
47
51
  - test/lsi/lsi_test.rb
48
- - test/string_extensions/word_hash_test.rb
49
52
  - LICENSE
50
53
  - Rakefile
51
54
  - README
@@ -57,14 +60,21 @@ files:
57
60
  - doc/fr_method_index.html
58
61
  - doc/index.html
59
62
  - doc/rdoc-style.css
63
+ - doc/classes/Array.html
64
+ - doc/classes/Array.src
60
65
  - doc/classes/Classifier
61
66
  - doc/classes/Classifier.html
62
67
  - doc/classes/GSL
63
68
  - doc/classes/GSL.html
69
+ - doc/classes/Matrix.html
70
+ - doc/classes/Matrix.src
64
71
  - doc/classes/Object.html
65
72
  - doc/classes/Object.src
66
73
  - doc/classes/String.html
67
74
  - doc/classes/String.src
75
+ - doc/classes/Vector.html
76
+ - doc/classes/Vector.src
77
+ - doc/classes/Array.src/M000003.html
68
78
  - doc/classes/Classifier/Bayes.html
69
79
  - doc/classes/Classifier/Bayes.src
70
80
  - doc/classes/Classifier/ContentNode.html
@@ -73,41 +83,56 @@ files:
73
83
  - doc/classes/Classifier/LSI.src
74
84
  - doc/classes/Classifier/WordList.html
75
85
  - doc/classes/Classifier/WordList.src
76
- - doc/classes/Classifier/Bayes.src/M000023.html
77
- - doc/classes/Classifier/Bayes.src/M000024.html
78
- - doc/classes/Classifier/Bayes.src/M000025.html
79
- - doc/classes/Classifier/Bayes.src/M000026.html
80
- - doc/classes/Classifier/Bayes.src/M000027.html
81
- - doc/classes/Classifier/Bayes.src/M000028.html
82
- - doc/classes/Classifier/Bayes.src/M000029.html
83
- - doc/classes/Classifier/ContentNode.src/M000031.html
84
- - doc/classes/Classifier/ContentNode.src/M000032.html
85
- - doc/classes/Classifier/ContentNode.src/M000033.html
86
- - doc/classes/Classifier/ContentNode.src/M000034.html
87
- - doc/classes/Classifier/LSI.src/M000011.html
88
- - doc/classes/Classifier/LSI.src/M000012.html
89
- - doc/classes/Classifier/LSI.src/M000013.html
90
- - doc/classes/Classifier/LSI.src/M000014.html
91
- - doc/classes/Classifier/LSI.src/M000015.html
92
- - doc/classes/Classifier/LSI.src/M000016.html
93
- - doc/classes/Classifier/LSI.src/M000017.html
94
- - doc/classes/Classifier/LSI.src/M000018.html
95
- - doc/classes/Classifier/LSI.src/M000019.html
96
- - doc/classes/Classifier/LSI.src/M000020.html
97
- - doc/classes/Classifier/LSI.src/M000021.html
86
+ - doc/classes/Classifier/Bayes.src/M000038.html
87
+ - doc/classes/Classifier/Bayes.src/M000039.html
88
+ - doc/classes/Classifier/Bayes.src/M000040.html
89
+ - doc/classes/Classifier/Bayes.src/M000041.html
90
+ - doc/classes/Classifier/Bayes.src/M000042.html
91
+ - doc/classes/Classifier/Bayes.src/M000043.html
92
+ - doc/classes/Classifier/Bayes.src/M000044.html
93
+ - doc/classes/Classifier/ContentNode.src/M000046.html
94
+ - doc/classes/Classifier/ContentNode.src/M000047.html
95
+ - doc/classes/Classifier/ContentNode.src/M000048.html
96
+ - doc/classes/Classifier/ContentNode.src/M000049.html
98
97
  - doc/classes/Classifier/LSI.src/M000022.html
99
- - doc/classes/Classifier/WordList.src/M000007.html
100
- - doc/classes/Classifier/WordList.src/M000008.html
101
- - doc/classes/Classifier/WordList.src/M000009.html
102
- - doc/classes/Classifier/WordList.src/M000010.html
98
+ - doc/classes/Classifier/LSI.src/M000023.html
99
+ - doc/classes/Classifier/LSI.src/M000024.html
100
+ - doc/classes/Classifier/LSI.src/M000025.html
101
+ - doc/classes/Classifier/LSI.src/M000026.html
102
+ - doc/classes/Classifier/LSI.src/M000027.html
103
+ - doc/classes/Classifier/LSI.src/M000028.html
104
+ - doc/classes/Classifier/LSI.src/M000029.html
105
+ - doc/classes/Classifier/LSI.src/M000030.html
106
+ - doc/classes/Classifier/LSI.src/M000031.html
107
+ - doc/classes/Classifier/LSI.src/M000032.html
108
+ - doc/classes/Classifier/LSI.src/M000033.html
109
+ - doc/classes/Classifier/LSI.src/M000034.html
110
+ - doc/classes/Classifier/LSI.src/M000035.html
111
+ - doc/classes/Classifier/LSI.src/M000036.html
112
+ - doc/classes/Classifier/LSI.src/M000037.html
113
+ - doc/classes/Classifier/WordList.src/M000017.html
114
+ - doc/classes/Classifier/WordList.src/M000018.html
115
+ - doc/classes/Classifier/WordList.src/M000019.html
116
+ - doc/classes/Classifier/WordList.src/M000020.html
117
+ - doc/classes/Classifier/WordList.src/M000021.html
118
+ - doc/classes/GSL/Matrix.html
103
119
  - doc/classes/GSL/Vector.html
104
120
  - doc/classes/GSL/Vector.src
105
- - doc/classes/GSL/Vector.src/M000005.html
106
- - doc/classes/GSL/Vector.src/M000006.html
107
- - doc/classes/Object.src/M000001.html
108
- - doc/classes/String.src/M000002.html
109
- - doc/classes/String.src/M000003.html
110
- - doc/classes/String.src/M000004.html
121
+ - doc/classes/GSL/Vector.src/M000015.html
122
+ - doc/classes/GSL/Vector.src/M000016.html
123
+ - doc/classes/Matrix.src/M000004.html
124
+ - doc/classes/Matrix.src/M000005.html
125
+ - doc/classes/Matrix.src/M000006.html
126
+ - doc/classes/Object.src/M000007.html
127
+ - doc/classes/String.src/M000008.html
128
+ - doc/classes/String.src/M000009.html
129
+ - doc/classes/String.src/M000010.html
130
+ - doc/classes/String.src/M000011.html
131
+ - doc/classes/String.src/M000012.html
132
+ - doc/classes/String.src/M000013.html
133
+ - doc/classes/String.src/M000014.html
134
+ - doc/classes/Vector.src/M000001.html
135
+ - doc/classes/Vector.src/M000002.html
111
136
  - doc/files/lib
112
137
  - doc/files/README.html
113
138
  - doc/files/lib/classifier
@@ -116,11 +141,13 @@ files:
116
141
  - doc/files/lib/classifier/extensions
117
142
  - doc/files/lib/classifier/lsi
118
143
  - doc/files/lib/classifier/lsi_rb.html
119
- - doc/files/lib/classifier/string_extensions_rb.html
144
+ - doc/files/lib/classifier/extensions/string_rb.html
145
+ - doc/files/lib/classifier/extensions/vector_rb.html
120
146
  - doc/files/lib/classifier/extensions/vector_serialize_rb.html
121
147
  - doc/files/lib/classifier/extensions/word_hash_rb.html
122
- - doc/files/lib/classifier/extensions/word_list_rb.html
123
148
  - doc/files/lib/classifier/lsi/content_node_rb.html
149
+ - doc/files/lib/classifier/lsi/summary_rb.html
150
+ - doc/files/lib/classifier/lsi/word_list_rb.html
124
151
  test_files: []
125
152
  rdoc_options: []
126
153
  extra_rdoc_files: []
@@ -1,21 +0,0 @@
1
- <?xml version="1.0" encoding="iso-8859-1"?>
2
- <!DOCTYPE html
3
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
-
6
- <html>
7
- <head>
8
- <title>new (Classifier::ContentNode)</title>
9
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
- <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
- </head>
12
- <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/classifier/lsi/content_node.rb, line 19</span>
14
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>( <span class="ruby-identifier">source</span>, <span class="ruby-identifier">categories</span>=<span class="ruby-keyword kw">nil</span>, <span class="ruby-identifier">text_proc</span>=<span class="ruby-keyword kw">nil</span> )
15
- <span class="ruby-identifier">text_proc</span> = <span class="ruby-identifier">text_proc</span> <span class="ruby-operator">||</span> (<span class="ruby-identifier">proc</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span>.<span class="ruby-identifier">to_s</span>})
16
- <span class="ruby-ivar">@categories</span> = <span class="ruby-identifier">categories</span> <span class="ruby-operator">||</span> []
17
- <span class="ruby-ivar">@source</span> = <span class="ruby-identifier">source</span>
18
- <span class="ruby-ivar">@word_hash</span> = <span class="ruby-identifier">text_proc</span>.<span class="ruby-identifier">call</span>( <span class="ruby-ivar">@source</span> ).<span class="ruby-identifier">clean_word_hash</span>
19
- <span class="ruby-keyword kw">end</span></pre>
20
- </body>
21
- </html>
@@ -1,41 +0,0 @@
1
- <?xml version="1.0" encoding="iso-8859-1"?>
2
- <!DOCTYPE html
3
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
-
6
- <html>
7
- <head>
8
- <title>raw_vector_with (Classifier::ContentNode)</title>
9
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
- <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
- </head>
12
- <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/classifier/lsi/content_node.rb, line 38</span>
14
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">raw_vector_with</span>( <span class="ruby-identifier">word_list</span> )
15
- <span class="ruby-identifier">vec</span> = <span class="ruby-constant">Array</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">word_list</span>.<span class="ruby-identifier">size</span>, <span class="ruby-value">0</span>)
16
-
17
- <span class="ruby-ivar">@word_hash</span>.<span class="ruby-identifier">each_key</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">word</span><span class="ruby-operator">|</span>
18
- <span class="ruby-identifier">vec</span>[<span class="ruby-identifier">word_list</span>[<span class="ruby-identifier">word</span>]] = <span class="ruby-ivar">@word_hash</span>[<span class="ruby-identifier">word</span>] <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">word_list</span>[<span class="ruby-identifier">word</span>]
19
- <span class="ruby-keyword kw">end</span>
20
-
21
- <span class="ruby-comment cmt"># Perform the scaling transform</span>
22
- <span class="ruby-identifier">total_words</span> = <span class="ruby-identifier">vec</span>.<span class="ruby-identifier">inject</span>(<span class="ruby-value">0</span>) { <span class="ruby-operator">|</span><span class="ruby-identifier">sum</span>,<span class="ruby-identifier">term</span><span class="ruby-operator">|</span> <span class="ruby-identifier">sum</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">term</span> }.<span class="ruby-identifier">to_f</span>
23
-
24
- <span class="ruby-comment cmt"># Perform first-order association transform if this vector has more</span>
25
- <span class="ruby-comment cmt"># than one word in it. </span>
26
- <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">total_words</span> <span class="ruby-operator">&gt;</span> <span class="ruby-value">1.0</span>
27
- <span class="ruby-identifier">weighted_total</span> = <span class="ruby-identifier">vec</span>.<span class="ruby-identifier">inject</span>(<span class="ruby-value">0</span><span class="ruby-value">.0</span>) <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sum</span>,<span class="ruby-identifier">term</span><span class="ruby-operator">|</span>
28
- <span class="ruby-keyword kw">if</span>( <span class="ruby-identifier">term</span> <span class="ruby-operator">&gt;</span> <span class="ruby-value">0</span> )
29
- <span class="ruby-identifier">sum</span> <span class="ruby-operator">+=</span> (( <span class="ruby-identifier">term</span> <span class="ruby-operator">/</span> <span class="ruby-identifier">total_words</span> ) <span class="ruby-operator">*</span> <span class="ruby-constant">Math</span>.<span class="ruby-identifier">log</span>( <span class="ruby-identifier">term</span> <span class="ruby-operator">/</span> <span class="ruby-identifier">total_words</span> ))
30
- <span class="ruby-keyword kw">else</span>
31
- <span class="ruby-identifier">sum</span>
32
- <span class="ruby-keyword kw">end</span>
33
- <span class="ruby-keyword kw">end</span>
34
- <span class="ruby-identifier">vec</span>.<span class="ruby-identifier">map!</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">val</span><span class="ruby-operator">|</span> <span class="ruby-constant">Math</span>.<span class="ruby-identifier">log</span>( <span class="ruby-identifier">val</span> <span class="ruby-operator">+</span> <span class="ruby-value">1</span> ) <span class="ruby-operator">/</span> <span class="ruby-operator">-</span><span class="ruby-identifier">weighted_total</span> }
35
- <span class="ruby-keyword kw">end</span>
36
-
37
- <span class="ruby-ivar">@raw_norm</span> = <span class="ruby-constant">GSL</span><span class="ruby-operator">::</span><span class="ruby-constant">Vector</span>.<span class="ruby-identifier">new</span>( <span class="ruby-identifier">vec</span> ).<span class="ruby-identifier">normalize</span>
38
- <span class="ruby-ivar">@raw_vector</span> = <span class="ruby-constant">GSL</span><span class="ruby-operator">::</span><span class="ruby-constant">Vector</span>.<span class="ruby-identifier">new</span>( <span class="ruby-identifier">vec</span> )
39
- <span class="ruby-keyword kw">end</span></pre>
40
- </body>
41
- </html>
@@ -1,20 +0,0 @@
1
- <?xml version="1.0" encoding="iso-8859-1"?>
2
- <!DOCTYPE html
3
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
-
6
- <html>
7
- <head>
8
- <title>new (Classifier::LSI)</title>
9
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
- <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
- </head>
12
- <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 26</span>
14
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">options</span> = {})
15
- <span class="ruby-ivar">@auto_rebuild</span> = <span class="ruby-keyword kw">true</span> <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">options</span>[<span class="ruby-identifier">:auto_rebuild</span>] <span class="ruby-operator">==</span> <span class="ruby-keyword kw">false</span>
16
- <span class="ruby-ivar">@word_list</span>, <span class="ruby-ivar">@items</span> = <span class="ruby-constant">WordList</span>.<span class="ruby-identifier">new</span>, {}
17
- <span class="ruby-ivar">@version</span>, <span class="ruby-ivar">@built_at_version</span> = <span class="ruby-value">0</span>, <span class="ruby-value">-1</span>
18
- <span class="ruby-keyword kw">end</span></pre>
19
- </body>
20
- </html>