tf-idf-similarity 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -86,8 +86,6 @@ class TfIdfSimilarity::Collection
|
|
86
86
|
# @param [Hash] opts optional arguments
|
87
87
|
# @option opts [Symbol] :function one of :tfidf (default) or :bm25
|
88
88
|
# @return [Float] the term's frequency in the document
|
89
|
-
#
|
90
|
-
# @note Like Lucene, we use a b value of 0.75 and a k1 value of 1.2.
|
91
89
|
def term_frequency_inverse_document_frequency(document, term, opts = {})
|
92
90
|
inverse_document_frequency(term, opts) * term_frequency(document, term, opts)
|
93
91
|
end
|
@@ -115,7 +113,7 @@ class TfIdfSimilarity::Collection
|
|
115
113
|
# @note Like Lucene, we use a b value of 0.75 and a k1 value of 1.2.
|
116
114
|
def term_frequency(document, term, opts = {})
|
117
115
|
if opts[:function] == :bm25
|
118
|
-
(document.term_counts[term] * 2.2) / (document.term_counts[term] + 0.3 + 0.9 * document.size / average_document_size)
|
116
|
+
(document.term_counts[term].to_i * 2.2) / (document.term_counts[term].to_i + 0.3 + 0.9 * document.size / average_document_size)
|
119
117
|
else
|
120
118
|
document.term_frequency term
|
121
119
|
end
|
@@ -35,7 +35,7 @@ class TfIdfSimilarity::Document
|
|
35
35
|
#
|
36
36
|
# @see http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/TFIDFSimilarity.html
|
37
37
|
def term_frequency(term)
|
38
|
-
Math.sqrt term_counts[term]
|
38
|
+
Math.sqrt term_counts[term].to_i
|
39
39
|
end
|
40
40
|
alias_method :tf, :term_frequency
|
41
41
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tf-idf-similarity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: unicode_utils
|