string-similarity 1.1.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/.travis.yml +7 -3
 - data/CHANGELOG.md +7 -1
 - data/README.md +18 -5
 - data/lib/string/similarity.rb +107 -127
 - data/lib/string/similarity/version.rb +1 -1
 - data/lib/string/similarity_refinements.rb +22 -0
 - metadata +2 -1
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 308c3664b419f777c0492b103cb9901e108455c0
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 5e2af01712dc0a08c37b8dd4cbc1f60a5883cb38
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: a739214fa67e112e179e9b744e9e8afa4c728d963a0d9ef70bbe3cbbe8abdc8485eef391670963ed050ed723e849ce59bddd672c543c8b12a8c330544800f09e
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: f6c7b317034c2b9c324cdbda88e33fac62e592663676735168017ef3c8ab23f247fe21a4e3289f202271bcbcb639f50c917cddcd47a26cee7e4ec68177235596
         
     | 
    
        data/.travis.yml
    CHANGED
    
    
    
        data/CHANGELOG.md
    CHANGED
    
    | 
         @@ -1,3 +1,9 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            **2.0.0** (2016-02-19)
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            * removed: core extensions on `String`
         
     | 
| 
      
 4 
     | 
    
         
            +
            * added: refinements for `String` (see README!)
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
       1 
7 
     | 
    
         
             
            **1.1.1** (2016-02-19)
         
     | 
| 
       2 
8 
     | 
    
         | 
| 
       3 
     | 
    
         
            -
            * added: `require 'string-similarity'` now works  
     | 
| 
      
 9 
     | 
    
         
            +
            * added: `require 'string-similarity'` now works as well.
         
     | 
    
        data/README.md
    CHANGED
    
    | 
         @@ -43,26 +43,39 @@ String::Similarity.cosine 'mine', 'thyne' 
     | 
|
| 
       43 
43 
     | 
    
         
             
            String::Similarity.cosine 'foo', 'foo'
         
     | 
| 
       44 
44 
     | 
    
         
             
            # => 1.0
         
     | 
| 
       45 
45 
     | 
    
         | 
| 
       46 
     | 
    
         
            -
            # or call on a string directly
         
     | 
| 
       47 
     | 
    
         
            -
            'string'.cosine_similarity_to 'strong'
         
     | 
| 
       48 
     | 
    
         
            -
            # => 0.8333333333333335
         
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
       50 
46 
     | 
    
         | 
| 
       51 
47 
     | 
    
         
             
            # Same for Levenshtein:
         
     | 
| 
       52 
48 
     | 
    
         
             
            String::Similarity.levenshtein_distance('kitten', 'sitting') # or ...
         
     | 
| 
       53 
     | 
    
         
            -
            'kitten'.levenshtein_distance_to('sitting')
         
     | 
| 
       54 
49 
     | 
    
         
             
            # => 3
         
     | 
| 
       55 
50 
     | 
    
         
             
            String::Similarity.levenshtein('foo', 'far') # or ...
         
     | 
| 
      
 51 
     | 
    
         
            +
            # => 0.5
         
     | 
| 
      
 52 
     | 
    
         
            +
            ```
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
            If you want, you can use [Refinements](http://ruby-doc.org/core-2.3.0/doc/syntax/refinements_rdoc.html) to add the functionality to the `String` class:
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 57 
     | 
    
         
            +
            using String::SimilarityRefinements
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
            'string'.cosine_similarity_to 'strong'
         
     | 
| 
      
 60 
     | 
    
         
            +
            # => 0.8333333333333335
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
            'kitten'.levenshtein_distance_to('sitting')
         
     | 
| 
      
 63 
     | 
    
         
            +
            # => 3
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
       56 
65 
     | 
    
         
             
            'far'.levenshtein_similarity_to('foo')
         
     | 
| 
       57 
66 
     | 
    
         
             
            # => 0.5
         
     | 
| 
       58 
67 
     | 
    
         
             
            ```
         
     | 
| 
       59 
68 
     | 
    
         | 
| 
      
 69 
     | 
    
         
            +
            (See this free [Ruby Tapas Episode](http://www.rubytapas.com/episodes/250-Refinements) if you don't know Refinements)
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
       60 
71 
     | 
    
         
             
            ## Development
         
     | 
| 
       61 
72 
     | 
    
         | 
| 
       62 
73 
     | 
    
         
             
            After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
         
     | 
| 
       63 
74 
     | 
    
         | 
| 
       64 
75 
     | 
    
         
             
            To install this gem onto your local machine, run `bundle exec rake install`.
         
     | 
| 
       65 
76 
     | 
    
         | 
| 
      
 77 
     | 
    
         
            +
            This Project uses [Semantic Versioning](http://semver.org/).
         
     | 
| 
      
 78 
     | 
    
         
            +
             
     | 
| 
       66 
79 
     | 
    
         
             
            ## Contributing
         
     | 
| 
       67 
80 
     | 
    
         | 
| 
       68 
81 
     | 
    
         
             
            1. Fork it ( https://github.com/mhutter/string-similarity/fork )
         
     | 
    
        data/lib/string/similarity.rb
    CHANGED
    
    | 
         @@ -1,142 +1,122 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require 'string/similarity/version'
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
               
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
               
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
              #  
     | 
| 
       12 
     | 
    
         
            -
              # 
     | 
| 
       13 
     | 
    
         
            -
               
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
      
 2 
     | 
    
         
            +
            require 'string/similarity_refinements'
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            # +String::Similarity+ provides various methods for
         
     | 
| 
      
 5 
     | 
    
         
            +
            # calculating string distances.
         
     | 
| 
      
 6 
     | 
    
         
            +
            module String::Similarity
         
     | 
| 
      
 7 
     | 
    
         
            +
              # Calcuate the {https://en.wikipedia.org/wiki/Cosine_similarity
         
     | 
| 
      
 8 
     | 
    
         
            +
              # Cosine similarity} of two strings.
         
     | 
| 
      
 9 
     | 
    
         
            +
              #
         
     | 
| 
      
 10 
     | 
    
         
            +
              # For an explanation of the Cosine similarity of two strings read
         
     | 
| 
      
 11 
     | 
    
         
            +
              # {http://stackoverflow.com/a/1750187/405454 this excellent SO answer}.
         
     | 
| 
      
 12 
     | 
    
         
            +
              #
         
     | 
| 
      
 13 
     | 
    
         
            +
              # @param str1 [String] first string
         
     | 
| 
      
 14 
     | 
    
         
            +
              # @param str2 [String] second string
         
     | 
| 
      
 15 
     | 
    
         
            +
              # @return [Float] cosine similarity of the two arguments.
         
     | 
| 
      
 16 
     | 
    
         
            +
              #   - +1.0+ if the strings are identical
         
     | 
| 
      
 17 
     | 
    
         
            +
              #   - +0.0+ if the strings are completely different
         
     | 
| 
      
 18 
     | 
    
         
            +
              #   - +0.0+ if one of the strings is empty
         
     | 
| 
      
 19 
     | 
    
         
            +
              def self.cosine(str1, str2)
         
     | 
| 
      
 20 
     | 
    
         
            +
                return 1.0 if str1 == str2
         
     | 
| 
      
 21 
     | 
    
         
            +
                return 0.0 if str1.empty? || str2.empty?
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                # convert both texts to vectors
         
     | 
| 
      
 24 
     | 
    
         
            +
                v1 = vector(str1)
         
     | 
| 
      
 25 
     | 
    
         
            +
                v2 = vector(str2)
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                # calculate the dot product
         
     | 
| 
      
 28 
     | 
    
         
            +
                dot_product = dot(v1, v2)
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
                # calculate the magnitude
         
     | 
| 
      
 31 
     | 
    
         
            +
                magnitude = mag(v1.values) * mag(v2.values)
         
     | 
| 
      
 32 
     | 
    
         
            +
                dot_product / magnitude
         
     | 
| 
       15 
33 
     | 
    
         
             
              end
         
     | 
| 
       16 
34 
     | 
    
         | 
| 
       17 
     | 
    
         
            -
              #  
     | 
| 
       18 
     | 
    
         
            -
              # 
     | 
| 
       19 
     | 
    
         
            -
               
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
      
 35 
     | 
    
         
            +
              # Calculate the Levenshtein similarity for two strings.
         
     | 
| 
      
 36 
     | 
    
         
            +
              #
         
     | 
| 
      
 37 
     | 
    
         
            +
              # This is basically the inversion of the levenshtein_distance, i.e.
         
     | 
| 
      
 38 
     | 
    
         
            +
              #     1 / levenshtein_distance(str1, str2)
         
     | 
| 
      
 39 
     | 
    
         
            +
              #
         
     | 
| 
      
 40 
     | 
    
         
            +
              # @param str1 [String] first string
         
     | 
| 
      
 41 
     | 
    
         
            +
              # @param str2 [String] second string
         
     | 
| 
      
 42 
     | 
    
         
            +
              # @return [Float] levenshtein similarity of the two arguments.
         
     | 
| 
      
 43 
     | 
    
         
            +
              #   - +1.0+ if the strings are identical
         
     | 
| 
      
 44 
     | 
    
         
            +
              #   - +0.0+ if one of the strings is empty
         
     | 
| 
      
 45 
     | 
    
         
            +
              # @see #levenshtein_distance
         
     | 
| 
      
 46 
     | 
    
         
            +
              def self.levenshtein(str1, str2)
         
     | 
| 
      
 47 
     | 
    
         
            +
                return 1.0 if str1.eql?(str2)
         
     | 
| 
      
 48 
     | 
    
         
            +
                return 0.0 if str1.empty? || str2.empty?
         
     | 
| 
      
 49 
     | 
    
         
            +
                1.0 / levenshtein_distance(str1, str2)
         
     | 
| 
       21 
50 
     | 
    
         
             
              end
         
     | 
| 
       22 
51 
     | 
    
         | 
| 
       23 
     | 
    
         
            -
              #  
     | 
| 
       24 
     | 
    
         
            -
              #  
     | 
| 
       25 
     | 
    
         
            -
               
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
             
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
                #
         
     | 
| 
       32 
     | 
    
         
            -
                 
     | 
| 
       33 
     | 
    
         
            -
                 
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
       35 
     | 
    
         
            -
                # 
     | 
| 
       36 
     | 
    
         
            -
                 
     | 
| 
       37 
     | 
    
         
            -
                 
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
                   
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
     | 
    
         
            -
                   
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
       46 
     | 
    
         
            -
             
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
       51 
     | 
    
         
            -
                  dot_product / magnitude
         
     | 
| 
       52 
     | 
    
         
            -
                end
         
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
                # Calculate the Levenshtein similarity for two strings.
         
     | 
| 
       55 
     | 
    
         
            -
                #
         
     | 
| 
       56 
     | 
    
         
            -
                # This is basically the inversion of the levenshtein_distance, i.e.
         
     | 
| 
       57 
     | 
    
         
            -
                #     1 / levenshtein_distance(str1, str2)
         
     | 
| 
       58 
     | 
    
         
            -
                #
         
     | 
| 
       59 
     | 
    
         
            -
                # @param str1 [String] first string
         
     | 
| 
       60 
     | 
    
         
            -
                # @param str2 [String] second string
         
     | 
| 
       61 
     | 
    
         
            -
                # @return [Float] levenshtein similarity of the two arguments.
         
     | 
| 
       62 
     | 
    
         
            -
                #   - +1.0+ if the strings are identical
         
     | 
| 
       63 
     | 
    
         
            -
                #   - +0.0+ if one of the strings is empty
         
     | 
| 
       64 
     | 
    
         
            -
                # @see #levenshtein_distance
         
     | 
| 
       65 
     | 
    
         
            -
                def self.levenshtein(str1, str2)
         
     | 
| 
       66 
     | 
    
         
            -
                  return 1.0 if str1.eql?(str2)
         
     | 
| 
       67 
     | 
    
         
            -
                  return 0.0 if str1.empty? || str2.empty?
         
     | 
| 
       68 
     | 
    
         
            -
                  1.0 / levenshtein_distance(str1, str2)
         
     | 
| 
       69 
     | 
    
         
            -
                end
         
     | 
| 
       70 
     | 
    
         
            -
             
     | 
| 
       71 
     | 
    
         
            -
                # Calculate the {https://en.wikipedia.org/wiki/Levenshtein_distance
         
     | 
| 
       72 
     | 
    
         
            -
                # Levenshtein distance} of two strings.
         
     | 
| 
       73 
     | 
    
         
            -
                #
         
     | 
| 
       74 
     | 
    
         
            -
                # @param str1 [String] first string
         
     | 
| 
       75 
     | 
    
         
            -
                # @param str2 [String] second string
         
     | 
| 
       76 
     | 
    
         
            -
                # @return [Fixnum] edit distance between the two strings
         
     | 
| 
       77 
     | 
    
         
            -
                #   - +0+ if the strings are identical
         
     | 
| 
       78 
     | 
    
         
            -
                def self.levenshtein_distance(str1, str2)
         
     | 
| 
       79 
     | 
    
         
            -
                  # base cases
         
     | 
| 
       80 
     | 
    
         
            -
                  result = base_case?(str1, str2)
         
     | 
| 
       81 
     | 
    
         
            -
                  return result if result
         
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
       83 
     | 
    
         
            -
                  # Initialize cost-matrix rows
         
     | 
| 
       84 
     | 
    
         
            -
                  previous = (0..str2.length).to_a
         
     | 
| 
       85 
     | 
    
         
            -
                  current = []
         
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
       87 
     | 
    
         
            -
                  (0...str1.length).each do |i|
         
     | 
| 
       88 
     | 
    
         
            -
                    # first element is always the edit distance from an empty string.
         
     | 
| 
       89 
     | 
    
         
            -
                    current[0] = i + 1
         
     | 
| 
       90 
     | 
    
         
            -
                    (0...str2.length).each do |j|
         
     | 
| 
       91 
     | 
    
         
            -
                      current[j + 1] = [
         
     | 
| 
       92 
     | 
    
         
            -
                        # insertion
         
     | 
| 
       93 
     | 
    
         
            -
                        current[j] + 1,
         
     | 
| 
       94 
     | 
    
         
            -
                        # deletion
         
     | 
| 
       95 
     | 
    
         
            -
                        previous[j + 1] + 1,
         
     | 
| 
       96 
     | 
    
         
            -
                        # substitution or no operation
         
     | 
| 
       97 
     | 
    
         
            -
                        previous[j] + (str1[i].eql?(str2[j]) ? 0 : 1)
         
     | 
| 
       98 
     | 
    
         
            -
                      ].min
         
     | 
| 
       99 
     | 
    
         
            -
                    end
         
     | 
| 
       100 
     | 
    
         
            -
                    previous = current.dup
         
     | 
| 
      
 52 
     | 
    
         
            +
              # Calculate the {https://en.wikipedia.org/wiki/Levenshtein_distance
         
     | 
| 
      
 53 
     | 
    
         
            +
              # Levenshtein distance} of two strings.
         
     | 
| 
      
 54 
     | 
    
         
            +
              #
         
     | 
| 
      
 55 
     | 
    
         
            +
              # @param str1 [String] first string
         
     | 
| 
      
 56 
     | 
    
         
            +
              # @param str2 [String] second string
         
     | 
| 
      
 57 
     | 
    
         
            +
              # @return [Fixnum] edit distance between the two strings
         
     | 
| 
      
 58 
     | 
    
         
            +
              #   - +0+ if the strings are identical
         
     | 
| 
      
 59 
     | 
    
         
            +
              def self.levenshtein_distance(str1, str2)
         
     | 
| 
      
 60 
     | 
    
         
            +
                # base cases
         
     | 
| 
      
 61 
     | 
    
         
            +
                result = base_case?(str1, str2)
         
     | 
| 
      
 62 
     | 
    
         
            +
                return result if result
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
                # Initialize cost-matrix rows
         
     | 
| 
      
 65 
     | 
    
         
            +
                previous = (0..str2.length).to_a
         
     | 
| 
      
 66 
     | 
    
         
            +
                current = []
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
                (0...str1.length).each do |i|
         
     | 
| 
      
 69 
     | 
    
         
            +
                  # first element is always the edit distance from an empty string.
         
     | 
| 
      
 70 
     | 
    
         
            +
                  current[0] = i + 1
         
     | 
| 
      
 71 
     | 
    
         
            +
                  (0...str2.length).each do |j|
         
     | 
| 
      
 72 
     | 
    
         
            +
                    current[j + 1] = [
         
     | 
| 
      
 73 
     | 
    
         
            +
                      # insertion
         
     | 
| 
      
 74 
     | 
    
         
            +
                      current[j] + 1,
         
     | 
| 
      
 75 
     | 
    
         
            +
                      # deletion
         
     | 
| 
      
 76 
     | 
    
         
            +
                      previous[j + 1] + 1,
         
     | 
| 
      
 77 
     | 
    
         
            +
                      # substitution or no operation
         
     | 
| 
      
 78 
     | 
    
         
            +
                      previous[j] + (str1[i].eql?(str2[j]) ? 0 : 1)
         
     | 
| 
      
 79 
     | 
    
         
            +
                    ].min
         
     | 
| 
       101 
80 
     | 
    
         
             
                  end
         
     | 
| 
       102 
     | 
    
         
            -
             
     | 
| 
       103 
     | 
    
         
            -
                  current[str2.length]
         
     | 
| 
      
 81 
     | 
    
         
            +
                  previous = current.dup
         
     | 
| 
       104 
82 
     | 
    
         
             
                end
         
     | 
| 
       105 
83 
     | 
    
         | 
| 
       106 
     | 
    
         
            -
                 
     | 
| 
      
 84 
     | 
    
         
            +
                current[str2.length]
         
     | 
| 
      
 85 
     | 
    
         
            +
              end
         
     | 
| 
       107 
86 
     | 
    
         | 
| 
       108 
     | 
    
         
            -
             
     | 
| 
       109 
     | 
    
         
            -
                  return 0 if str1.eql?(str2)
         
     | 
| 
       110 
     | 
    
         
            -
                  return str2.length if str1.empty?
         
     | 
| 
       111 
     | 
    
         
            -
                  return str1.length if str2.empty?
         
     | 
| 
       112 
     | 
    
         
            -
                  false
         
     | 
| 
       113 
     | 
    
         
            -
                end
         
     | 
| 
      
 87 
     | 
    
         
            +
              private
         
     | 
| 
       114 
88 
     | 
    
         | 
| 
       115 
     | 
    
         
            -
             
     | 
| 
       116 
     | 
    
         
            -
                 
     | 
| 
       117 
     | 
    
         
            -
                 
     | 
| 
       118 
     | 
    
         
            -
                 
     | 
| 
       119 
     | 
    
         
            -
                 
     | 
| 
       120 
     | 
    
         
            -
             
     | 
| 
       121 
     | 
    
         
            -
                  v = Hash.new(0)
         
     | 
| 
       122 
     | 
    
         
            -
                  str.each_char { |c| v[c] += 1 }
         
     | 
| 
       123 
     | 
    
         
            -
                  v
         
     | 
| 
       124 
     | 
    
         
            -
                end
         
     | 
| 
      
 89 
     | 
    
         
            +
              def self.base_case?(str1, str2)
         
     | 
| 
      
 90 
     | 
    
         
            +
                return 0 if str1.eql?(str2)
         
     | 
| 
      
 91 
     | 
    
         
            +
                return str2.length if str1.empty?
         
     | 
| 
      
 92 
     | 
    
         
            +
                return str1.length if str2.empty?
         
     | 
| 
      
 93 
     | 
    
         
            +
                false
         
     | 
| 
      
 94 
     | 
    
         
            +
              end
         
     | 
| 
       125 
95 
     | 
    
         | 
| 
       126 
     | 
    
         
            -
             
     | 
| 
       127 
     | 
    
         
            -
             
     | 
| 
       128 
     | 
    
         
            -
             
     | 
| 
       129 
     | 
    
         
            -
             
     | 
| 
       130 
     | 
    
         
            -
             
     | 
| 
       131 
     | 
    
         
            -
             
     | 
| 
       132 
     | 
    
         
            -
             
     | 
| 
       133 
     | 
    
         
            -
                 
     | 
| 
      
 96 
     | 
    
         
            +
              # create a vector from +str+
         
     | 
| 
      
 97 
     | 
    
         
            +
              #
         
     | 
| 
      
 98 
     | 
    
         
            +
              # @example
         
     | 
| 
      
 99 
     | 
    
         
            +
              #     v1 = vector('hello') # => {"h"=>1, "e"=>1, "l"=>2, "o"=>1}
         
     | 
| 
      
 100 
     | 
    
         
            +
              #     v1["x"] # => 0
         
     | 
| 
      
 101 
     | 
    
         
            +
              def self.vector(str)
         
     | 
| 
      
 102 
     | 
    
         
            +
                v = Hash.new(0)
         
     | 
| 
      
 103 
     | 
    
         
            +
                str.each_char { |c| v[c] += 1 }
         
     | 
| 
      
 104 
     | 
    
         
            +
                v
         
     | 
| 
      
 105 
     | 
    
         
            +
              end
         
     | 
| 
       134 
106 
     | 
    
         | 
| 
       135 
     | 
    
         
            -
             
     | 
| 
       136 
     | 
    
         
            -
             
     | 
| 
       137 
     | 
    
         
            -
             
     | 
| 
       138 
     | 
    
         
            -
             
     | 
| 
       139 
     | 
    
         
            -
                   
     | 
| 
      
 107 
     | 
    
         
            +
              # calculate the dot product of +vector1+ and +vector2+
         
     | 
| 
      
 108 
     | 
    
         
            +
              def self.dot(vector1, vector2)
         
     | 
| 
      
 109 
     | 
    
         
            +
                product = 0
         
     | 
| 
      
 110 
     | 
    
         
            +
                vector1.each do |k, v|
         
     | 
| 
      
 111 
     | 
    
         
            +
                  product += v * vector2[k]
         
     | 
| 
       140 
112 
     | 
    
         
             
                end
         
     | 
| 
      
 113 
     | 
    
         
            +
                product
         
     | 
| 
      
 114 
     | 
    
         
            +
              end
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
              # calculate the magnitude for +vector+
         
     | 
| 
      
 117 
     | 
    
         
            +
              def self.mag(vector)
         
     | 
| 
      
 118 
     | 
    
         
            +
                # calculate the sum of squares
         
     | 
| 
      
 119 
     | 
    
         
            +
                sq = vector.inject(0) { |a, e| a + e**2 }
         
     | 
| 
      
 120 
     | 
    
         
            +
                Math.sqrt(sq)
         
     | 
| 
       141 
121 
     | 
    
         
             
              end
         
     | 
| 
       142 
122 
     | 
    
         
             
            end
         
     | 
| 
         @@ -0,0 +1,22 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # provide refinements for the String class
         
     | 
| 
      
 2 
     | 
    
         
            +
            module String::SimilarityRefinements
         
     | 
| 
      
 3 
     | 
    
         
            +
              refine String do
         
     | 
| 
      
 4 
     | 
    
         
            +
                # Returns the cosine similarity to +other+
         
     | 
| 
      
 5 
     | 
    
         
            +
                # @see String::Similarity#cosine
         
     | 
| 
      
 6 
     | 
    
         
            +
                def cosine_similarity_to(other)
         
     | 
| 
      
 7 
     | 
    
         
            +
                  String::Similarity.cosine(self, other)
         
     | 
| 
      
 8 
     | 
    
         
            +
                end
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                # Returns the Levenshtein distance to +other+
         
     | 
| 
      
 11 
     | 
    
         
            +
                # @see String::Similarity.levenshtein_distance
         
     | 
| 
      
 12 
     | 
    
         
            +
                def levenshtein_distance_to(other)
         
     | 
| 
      
 13 
     | 
    
         
            +
                  String::Similarity.levenshtein_distance(self, other)
         
     | 
| 
      
 14 
     | 
    
         
            +
                end
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                # Returns the Levenshtein similarity to +other+
         
     | 
| 
      
 17 
     | 
    
         
            +
                # @see String::Similarity.levenshtein
         
     | 
| 
      
 18 
     | 
    
         
            +
                def levenshtein_similarity_to(other)
         
     | 
| 
      
 19 
     | 
    
         
            +
                  String::Similarity.levenshtein(self, other)
         
     | 
| 
      
 20 
     | 
    
         
            +
                end
         
     | 
| 
      
 21 
     | 
    
         
            +
              end
         
     | 
| 
      
 22 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: string-similarity
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version:  
     | 
| 
      
 4 
     | 
    
         
            +
              version: 2.0.0
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Manuel Hutter
         
     | 
| 
         @@ -100,6 +100,7 @@ files: 
     | 
|
| 
       100 
100 
     | 
    
         
             
            - lib/string-similarity.rb
         
     | 
| 
       101 
101 
     | 
    
         
             
            - lib/string/similarity.rb
         
     | 
| 
       102 
102 
     | 
    
         
             
            - lib/string/similarity/version.rb
         
     | 
| 
      
 103 
     | 
    
         
            +
            - lib/string/similarity_refinements.rb
         
     | 
| 
       103 
104 
     | 
    
         
             
            - string-similarity.gemspec
         
     | 
| 
       104 
105 
     | 
    
         
             
            homepage: https://github.com/mhutter/string-similarity
         
     | 
| 
       105 
106 
     | 
    
         
             
            licenses:
         
     |