tf-idf-similarity 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
@@ -86,8 +86,6 @@ class TfIdfSimilarity::Collection
|
|
86
86
|
# @param [Hash] opts optional arguments
|
87
87
|
# @option opts [Symbol] :function one of :tfidf (default) or :bm25
|
88
88
|
# @return [Float] the term's frequency in the document
|
89
|
-
#
|
90
|
-
# @note Like Lucene, we use a b value of 0.75 and a k1 value of 1.2.
|
91
89
|
def term_frequency_inverse_document_frequency(document, term, opts = {})
|
92
90
|
inverse_document_frequency(term, opts) * term_frequency(document, term, opts)
|
93
91
|
end
|
@@ -115,7 +113,7 @@ class TfIdfSimilarity::Collection
|
|
115
113
|
# @note Like Lucene, we use a b value of 0.75 and a k1 value of 1.2.
|
116
114
|
def term_frequency(document, term, opts = {})
|
117
115
|
if opts[:function] == :bm25
|
118
|
-
(document.term_counts[term] * 2.2) / (document.term_counts[term] + 0.3 + 0.9 * document.size / average_document_size)
|
116
|
+
(document.term_counts[term].to_i * 2.2) / (document.term_counts[term].to_i + 0.3 + 0.9 * document.size / average_document_size)
|
119
117
|
else
|
120
118
|
document.term_frequency term
|
121
119
|
end
|
@@ -35,7 +35,7 @@ class TfIdfSimilarity::Document
|
|
35
35
|
#
|
36
36
|
# @see http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/TFIDFSimilarity.html
|
37
37
|
def term_frequency(term)
|
38
|
-
Math.sqrt term_counts[term]
|
38
|
+
Math.sqrt term_counts[term].to_i
|
39
39
|
end
|
40
40
|
alias_method :tf, :term_frequency
|
41
41
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tf-idf-similarity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: unicode_utils
|