tf-idf-similarity 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
| @@ -86,8 +86,6 @@ class TfIdfSimilarity::Collection | |
| 86 86 | 
             
              # @param [Hash] opts optional arguments
         | 
| 87 87 | 
             
              # @option opts [Symbol] :function one of :tfidf (default) or :bm25
         | 
| 88 88 | 
             
              # @return [Float] the term's frequency in the document
         | 
| 89 | 
            -
              #
         | 
| 90 | 
            -
              # @note Like Lucene, we use a b value of 0.75 and a k1 value of 1.2.
         | 
| 91 89 | 
             
              def term_frequency_inverse_document_frequency(document, term, opts = {})
         | 
| 92 90 | 
             
                inverse_document_frequency(term, opts) * term_frequency(document, term, opts)
         | 
| 93 91 | 
             
              end
         | 
| @@ -115,7 +113,7 @@ class TfIdfSimilarity::Collection | |
| 115 113 | 
             
              # @note Like Lucene, we use a b value of 0.75 and a k1 value of 1.2.
         | 
| 116 114 | 
             
              def term_frequency(document, term, opts = {})
         | 
| 117 115 | 
             
                if opts[:function] == :bm25
         | 
| 118 | 
            -
                  (document.term_counts[term] * 2.2) / (document.term_counts[term] + 0.3 + 0.9 * document.size / average_document_size)
         | 
| 116 | 
            +
                  (document.term_counts[term].to_i * 2.2) / (document.term_counts[term].to_i + 0.3 + 0.9 * document.size / average_document_size)
         | 
| 119 117 | 
             
                else
         | 
| 120 118 | 
             
                  document.term_frequency term
         | 
| 121 119 | 
             
                end
         | 
| @@ -35,7 +35,7 @@ class TfIdfSimilarity::Document | |
| 35 35 | 
             
              #
         | 
| 36 36 | 
             
              # @see http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/TFIDFSimilarity.html
         | 
| 37 37 | 
             
              def term_frequency(term)
         | 
| 38 | 
            -
                Math.sqrt term_counts[term]
         | 
| 38 | 
            +
                Math.sqrt term_counts[term].to_i
         | 
| 39 39 | 
             
              end
         | 
| 40 40 | 
             
              alias_method :tf, :term_frequency
         | 
| 41 41 |  | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: tf-idf-similarity
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0. | 
| 4 | 
            +
              version: 0.0.7
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
            platform: ruby
         | 
| 7 7 | 
             
            authors:
         | 
| @@ -9,7 +9,7 @@ authors: | |
| 9 9 | 
             
            autorequire: 
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date: 2012-11- | 
| 12 | 
            +
            date: 2012-11-20 00:00:00.000000000 Z
         | 
| 13 13 | 
             
            dependencies:
         | 
| 14 14 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 15 15 | 
             
              name: unicode_utils
         |