similarityTextCoefficients 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/lib/similarityTextCoefficients.rb +98 -0
 - metadata +47 -0
 
    
        checksums.yaml
    ADDED
    
    | 
         @@ -0,0 +1,7 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            ---
         
     | 
| 
      
 2 
     | 
    
         
            +
            SHA1:
         
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 4deed5d254b448b0f72730d1d7259e3f6fa44b41
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 526d8ee307d479d0a93121d76430b53b16be8cfc
         
     | 
| 
      
 5 
     | 
    
         
            +
            SHA512:
         
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: eb7649ab9ba6cd423555dc12f108d7c29d08b66fd60b0df83f1dda4448cd5a762b319fc895cce9cd791e943e1364e829bef5e0e668095057e734bf08afaf0e2b
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 1daafe71aaa86253a61fdd09a3bd428fec4c8f9cdd510b25db6e88bc5dfd06cfcbdf5d4c8bcef326a3114823ba70136bf64a6e9267406b988e2f8bf125b8380a
         
     | 
| 
         @@ -0,0 +1,98 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "set"
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            class SimilarityTextCoefficients
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
              def self.dice_coefficient(a, b)
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                vectorOne  =  Array.new
         
     | 
| 
      
 8 
     | 
    
         
            +
                vectorTwo =  Array.new
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                vectorOne=a.split(" ")
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                vectorTwo=b.split(" ")
         
     | 
| 
      
 13 
     | 
    
         
            +
                newdice = 0
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                for x in 0..  vectorTwo.length-1
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                  for y in 0..  vectorOne.length-1
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                    a_bigrams = vectorOne[y].each_char.each_cons(2).to_set
         
     | 
| 
      
 20 
     | 
    
         
            +
                    b_bigrams = vectorTwo[x].each_char.each_cons(2).to_set
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
                    overlap = (a_bigrams & b_bigrams).size
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                    total = a_bigrams.size + b_bigrams.size
         
     | 
| 
      
 25 
     | 
    
         
            +
                    dice  = overlap * 2.0 / total
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                    if newdice == 0
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
                      newdice=dice
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                    end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                    if newdice < dice
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                      newdice=dice
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                    end
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                  end
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
                end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
                newdice
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
              end
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
              #Index of Jaccard =intersection/ ((numberA+numberB) -intersection )
         
     | 
| 
      
 49 
     | 
    
         
            +
              #where
         
     | 
| 
      
 50 
     | 
    
         
            +
              #numberA - number of elements in set А
         
     | 
| 
      
 51 
     | 
    
         
            +
              #numberB- number of elements in set B
         
     | 
| 
      
 52 
     | 
    
         
            +
              #intersection - number of elements in intersecting set
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
              def self.createJaccardIndex(a,b)
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                vectorOne  =  Array.new
         
     | 
| 
      
 57 
     | 
    
         
            +
                vectorTwo =  Array.new
         
     | 
| 
      
 58 
     | 
    
         
            +
                intersection=0
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                contvector=0
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                # We get number of itens for each group and put in them in a array One and Two
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
                vectorOne=a.split(" ")
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
                vectorTwo=b.split(" ")
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
                if    vectorOne.length < vectorTwo.length
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
                  numberA= vectorOne.length
         
     | 
| 
      
 71 
     | 
    
         
            +
                  numberB= vectorTwo.length
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
      
 73 
     | 
    
         
            +
                else
         
     | 
| 
      
 74 
     | 
    
         
            +
                  numberB= vectorOne.length
         
     | 
| 
      
 75 
     | 
    
         
            +
                  numberA= vectorTwo.length
         
     | 
| 
      
 76 
     | 
    
         
            +
                end
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
                numberA= vectorOne.length
         
     | 
| 
      
 79 
     | 
    
         
            +
                numberB= vectorTwo.length
         
     | 
| 
      
 80 
     | 
    
         
            +
             
     | 
| 
      
 81 
     | 
    
         
            +
                # We compare each iten inside of each array to obtain: number of elements in intersecting set
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
      
 83 
     | 
    
         
            +
                for x in 0..  vectorTwo.length-1
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                  if ((vectorOne).include?(vectorTwo[x]) == true)
         
     | 
| 
      
 86 
     | 
    
         
            +
                    intersection+=1
         
     | 
| 
      
 87 
     | 
    
         
            +
                  end
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
                end
         
     | 
| 
      
 91 
     | 
    
         
            +
             
     | 
| 
      
 92 
     | 
    
         
            +
                indexJaccard= intersection.to_f/ ((numberA+numberB) -intersection )
         
     | 
| 
      
 93 
     | 
    
         
            +
                return indexJaccard
         
     | 
| 
      
 94 
     | 
    
         
            +
             
     | 
| 
      
 95 
     | 
    
         
            +
              end
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
      
 97 
     | 
    
         
            +
             
     | 
| 
      
 98 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    ADDED
    
    | 
         @@ -0,0 +1,47 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            --- !ruby/object:Gem::Specification
         
     | 
| 
      
 2 
     | 
    
         
            +
            name: similarityTextCoefficients
         
     | 
| 
      
 3 
     | 
    
         
            +
            version: !ruby/object:Gem::Version
         
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.0.0
         
     | 
| 
      
 5 
     | 
    
         
            +
            platform: ruby
         
     | 
| 
      
 6 
     | 
    
         
            +
            authors:
         
     | 
| 
      
 7 
     | 
    
         
            +
            - Gilberto Flores
         
     | 
| 
      
 8 
     | 
    
         
            +
            autorequire: 
         
     | 
| 
      
 9 
     | 
    
         
            +
            bindir: bin
         
     | 
| 
      
 10 
     | 
    
         
            +
            cert_chain: []
         
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2015-04-27 00:00:00.000000000 Z
         
     | 
| 
      
 12 
     | 
    
         
            +
            dependencies: []
         
     | 
| 
      
 13 
     | 
    
         
            +
            description: Enter two strings and it compares their similarity and gives a score
         
     | 
| 
      
 14 
     | 
    
         
            +
              between 0 and 1, when 1 is the similarity
         
     | 
| 
      
 15 
     | 
    
         
            +
            email: gilbertofp16@gmail.com
         
     | 
| 
      
 16 
     | 
    
         
            +
            executables: []
         
     | 
| 
      
 17 
     | 
    
         
            +
            extensions: []
         
     | 
| 
      
 18 
     | 
    
         
            +
            extra_rdoc_files: []
         
     | 
| 
      
 19 
     | 
    
         
            +
            files:
         
     | 
| 
      
 20 
     | 
    
         
            +
            - lib/similarityTextCoefficients.rb
         
     | 
| 
      
 21 
     | 
    
         
            +
            homepage: http://rubygems.org/gems/SimilarityTextCoefficients
         
     | 
| 
      
 22 
     | 
    
         
            +
            licenses:
         
     | 
| 
      
 23 
     | 
    
         
            +
            - MIT
         
     | 
| 
      
 24 
     | 
    
         
            +
            metadata: {}
         
     | 
| 
      
 25 
     | 
    
         
            +
            post_install_message: 
         
     | 
| 
      
 26 
     | 
    
         
            +
            rdoc_options: []
         
     | 
| 
      
 27 
     | 
    
         
            +
            require_paths:
         
     | 
| 
      
 28 
     | 
    
         
            +
            - lib
         
     | 
| 
      
 29 
     | 
    
         
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 30 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 31 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 32 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 33 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 34 
     | 
    
         
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 35 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 36 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 37 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 38 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 39 
     | 
    
         
            +
            requirements: []
         
     | 
| 
      
 40 
     | 
    
         
            +
            rubyforge_project: 
         
     | 
| 
      
 41 
     | 
    
         
            +
            rubygems_version: 2.4.6
         
     | 
| 
      
 42 
     | 
    
         
            +
            signing_key: 
         
     | 
| 
      
 43 
     | 
    
         
            +
            specification_version: 4
         
     | 
| 
      
 44 
     | 
    
         
            +
            summary: For text comparations it implements Dice's coefficient and Jaccard's or Tanimoto
         
     | 
| 
      
 45 
     | 
    
         
            +
              index
         
     | 
| 
      
 46 
     | 
    
         
            +
            test_files: []
         
     | 
| 
      
 47 
     | 
    
         
            +
            has_rdoc: 
         
     |