tf-idf-similarity 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -86,8 +86,6 @@ class TfIdfSimilarity::Collection
86
86
  # @param [Hash] opts optional arguments
87
87
  # @option opts [Symbol] :function one of :tfidf (default) or :bm25
88
88
  # @return [Float] the term's frequency in the document
89
- #
90
- # @note Like Lucene, we use a b value of 0.75 and a k1 value of 1.2.
91
89
  def term_frequency_inverse_document_frequency(document, term, opts = {})
92
90
  inverse_document_frequency(term, opts) * term_frequency(document, term, opts)
93
91
  end
@@ -115,7 +113,7 @@ class TfIdfSimilarity::Collection
115
113
  # @note Like Lucene, we use a b value of 0.75 and a k1 value of 1.2.
116
114
  def term_frequency(document, term, opts = {})
117
115
  if opts[:function] == :bm25
118
- (document.term_counts[term] * 2.2) / (document.term_counts[term] + 0.3 + 0.9 * document.size / average_document_size)
116
+ (document.term_counts[term].to_i * 2.2) / (document.term_counts[term].to_i + 0.3 + 0.9 * document.size / average_document_size)
119
117
  else
120
118
  document.term_frequency term
121
119
  end
@@ -35,7 +35,7 @@ class TfIdfSimilarity::Document
35
35
  #
36
36
  # @see http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/TFIDFSimilarity.html
37
37
  def term_frequency(term)
38
- Math.sqrt term_counts[term]
38
+ Math.sqrt term_counts[term].to_i
39
39
  end
40
40
  alias_method :tf, :term_frequency
41
41
 
@@ -1,3 +1,3 @@
1
1
  module TfIdfSimilarity
2
- VERSION = "0.0.6"
2
+ VERSION = "0.0.7"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tf-idf-similarity
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-05 00:00:00.000000000 Z
12
+ date: 2012-11-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: unicode_utils