shingle 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
 - data/Gemfile +4 -0
 - data/Rakefile +1 -0
 - data/lib/shingle.rb +50 -0
 - data/lib/shingle/version.rb +3 -0
 - data/shingle.gemspec +23 -0
 - data/spec/shingle_spec.rb +5 -0
 - metadata +76 -0
 
    
        data/.gitignore
    ADDED
    
    
    
        data/Gemfile
    ADDED
    
    
    
        data/Rakefile
    ADDED
    
    | 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "bundler/gem_tasks"
         
     | 
    
        data/lib/shingle.rb
    ADDED
    
    | 
         @@ -0,0 +1,50 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "shingle/version"
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Shingle
         
     | 
| 
      
 4 
     | 
    
         
            +
              class Shingle
         
     | 
| 
      
 5 
     | 
    
         
            +
                def initialize(content = '', options = {})
         
     | 
| 
      
 6 
     | 
    
         
            +
                  @content = content
         
     | 
| 
      
 7 
     | 
    
         
            +
                  @shingle_length = options[:shingle_length] || 5
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @stop_words = options[:stop_words] || []
         
     | 
| 
      
 9 
     | 
    
         
            +
                  @downcase = options[:downcase] || false
         
     | 
| 
      
 10 
     | 
    
         
            +
                  @replace_chars = options[:replace_chars] || {}
         
     | 
| 
      
 11 
     | 
    
         
            +
                end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                def each_shingles
         
     | 
| 
      
 14 
     | 
    
         
            +
                  word = ""
         
     | 
| 
      
 15 
     | 
    
         
            +
                  char_flag = false
         
     | 
| 
      
 16 
     | 
    
         
            +
                  shingle = []
         
     | 
| 
      
 17 
     | 
    
         
            +
                  position_end_words = []
         
     | 
| 
      
 18 
     | 
    
         
            +
                  position_start, position_end = 0, 0
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                  @content.each_char do |char|
         
     | 
| 
      
 21 
     | 
    
         
            +
                    char = @replace_chars[char] if @replace_chars.key? char
         
     | 
| 
      
 22 
     | 
    
         
            +
                    if char !~ /[[:word:]]+/
         
     | 
| 
      
 23 
     | 
    
         
            +
                      char_flag = true
         
     | 
| 
      
 24 
     | 
    
         
            +
                    else
         
     | 
| 
      
 25 
     | 
    
         
            +
                      if char_flag
         
     | 
| 
      
 26 
     | 
    
         
            +
                        if !stop_word?(word) || word !~ /\S/
         
     | 
| 
      
 27 
     | 
    
         
            +
                          shingle << (@downcase ? Unicode::downcase(word) : word)
         
     | 
| 
      
 28 
     | 
    
         
            +
                          word = ""
         
     | 
| 
      
 29 
     | 
    
         
            +
                          position_end_words << position_end
         
     | 
| 
      
 30 
     | 
    
         
            +
                          if shingle.size == @shingle_length
         
     | 
| 
      
 31 
     | 
    
         
            +
                            yield(shingle, position_start, position_end - 1)
         
     | 
| 
      
 32 
     | 
    
         
            +
                            position_start = position_end_words.shift
         
     | 
| 
      
 33 
     | 
    
         
            +
                            shingle.shift
         
     | 
| 
      
 34 
     | 
    
         
            +
                          end
         
     | 
| 
      
 35 
     | 
    
         
            +
                        end
         
     | 
| 
      
 36 
     | 
    
         
            +
                        char_flag = false
         
     | 
| 
      
 37 
     | 
    
         
            +
                      end
         
     | 
| 
      
 38 
     | 
    
         
            +
                      word << char
         
     | 
| 
      
 39 
     | 
    
         
            +
                    end
         
     | 
| 
      
 40 
     | 
    
         
            +
                    position_end += 1
         
     | 
| 
      
 41 
     | 
    
         
            +
                  end        
         
     | 
| 
      
 42 
     | 
    
         
            +
                end
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                private
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
                def stop_word? word
         
     | 
| 
      
 47 
     | 
    
         
            +
                  @stop_words.include? word
         
     | 
| 
      
 48 
     | 
    
         
            +
                end  
         
     | 
| 
      
 49 
     | 
    
         
            +
              end
         
     | 
| 
      
 50 
     | 
    
         
            +
            end
         
     | 
    
        data/shingle.gemspec
    ADDED
    
    | 
         @@ -0,0 +1,23 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
            $:.push File.expand_path("../lib", __FILE__)
         
     | 
| 
      
 3 
     | 
    
         
            +
            require "shingle/version"
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            Gem::Specification.new do |s|
         
     | 
| 
      
 6 
     | 
    
         
            +
              s.name        = "shingle"
         
     | 
| 
      
 7 
     | 
    
         
            +
              s.version     = Shingle::VERSION
         
     | 
| 
      
 8 
     | 
    
         
            +
              s.authors     = ["vad"]
         
     | 
| 
      
 9 
     | 
    
         
            +
              s.email       = ["vad4msiu@gmail.com"]
         
     | 
| 
      
 10 
     | 
    
         
            +
              s.homepage    = ""
         
     | 
| 
      
 11 
     | 
    
         
            +
              s.summary     = "Shingle"
         
     | 
| 
      
 12 
     | 
    
         
            +
              s.description = "Shingle"
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
              s.rubyforge_project = "shingle"
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
              s.files         = `git ls-files`.split("\n")
         
     | 
| 
      
 17 
     | 
    
         
            +
              s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
         
     | 
| 
      
 18 
     | 
    
         
            +
              s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
         
     | 
| 
      
 19 
     | 
    
         
            +
              s.require_paths = ["lib"]
         
     | 
| 
      
 20 
     | 
    
         
            +
              
         
     | 
| 
      
 21 
     | 
    
         
            +
              s.add_dependency "unicode"
         
     | 
| 
      
 22 
     | 
    
         
            +
              s.add_development_dependency "rspec"
         
     | 
| 
      
 23 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    ADDED
    
    | 
         @@ -0,0 +1,76 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            --- !ruby/object:Gem::Specification
         
     | 
| 
      
 2 
     | 
    
         
            +
            name: shingle
         
     | 
| 
      
 3 
     | 
    
         
            +
            version: !ruby/object:Gem::Version
         
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.0.1
         
     | 
| 
      
 5 
     | 
    
         
            +
              prerelease: 
         
     | 
| 
      
 6 
     | 
    
         
            +
            platform: ruby
         
     | 
| 
      
 7 
     | 
    
         
            +
            authors:
         
     | 
| 
      
 8 
     | 
    
         
            +
            - vad
         
     | 
| 
      
 9 
     | 
    
         
            +
            autorequire: 
         
     | 
| 
      
 10 
     | 
    
         
            +
            bindir: bin
         
     | 
| 
      
 11 
     | 
    
         
            +
            cert_chain: []
         
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2011-11-05 00:00:00.000000000Z
         
     | 
| 
      
 13 
     | 
    
         
            +
            dependencies:
         
     | 
| 
      
 14 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 15 
     | 
    
         
            +
              name: unicode
         
     | 
| 
      
 16 
     | 
    
         
            +
              requirement: &2157208220 !ruby/object:Gem::Requirement
         
     | 
| 
      
 17 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 18 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 19 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 20 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 21 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 22 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 23 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 24 
     | 
    
         
            +
              version_requirements: *2157208220
         
     | 
| 
      
 25 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 26 
     | 
    
         
            +
              name: rspec
         
     | 
| 
      
 27 
     | 
    
         
            +
              requirement: &2157207740 !ruby/object:Gem::Requirement
         
     | 
| 
      
 28 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 29 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 30 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 31 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 32 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 33 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 34 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 35 
     | 
    
         
            +
              version_requirements: *2157207740
         
     | 
| 
      
 36 
     | 
    
         
            +
            description: Shingle
         
     | 
| 
      
 37 
     | 
    
         
            +
            email:
         
     | 
| 
      
 38 
     | 
    
         
            +
            - vad4msiu@gmail.com
         
     | 
| 
      
 39 
     | 
    
         
            +
            executables: []
         
     | 
| 
      
 40 
     | 
    
         
            +
            extensions: []
         
     | 
| 
      
 41 
     | 
    
         
            +
            extra_rdoc_files: []
         
     | 
| 
      
 42 
     | 
    
         
            +
            files:
         
     | 
| 
      
 43 
     | 
    
         
            +
            - .gitignore
         
     | 
| 
      
 44 
     | 
    
         
            +
            - Gemfile
         
     | 
| 
      
 45 
     | 
    
         
            +
            - Rakefile
         
     | 
| 
      
 46 
     | 
    
         
            +
            - lib/shingle.rb
         
     | 
| 
      
 47 
     | 
    
         
            +
            - lib/shingle/version.rb
         
     | 
| 
      
 48 
     | 
    
         
            +
            - shingle.gemspec
         
     | 
| 
      
 49 
     | 
    
         
            +
            - spec/shingle_spec.rb
         
     | 
| 
      
 50 
     | 
    
         
            +
            homepage: ''
         
     | 
| 
      
 51 
     | 
    
         
            +
            licenses: []
         
     | 
| 
      
 52 
     | 
    
         
            +
            post_install_message: 
         
     | 
| 
      
 53 
     | 
    
         
            +
            rdoc_options: []
         
     | 
| 
      
 54 
     | 
    
         
            +
            require_paths:
         
     | 
| 
      
 55 
     | 
    
         
            +
            - lib
         
     | 
| 
      
 56 
     | 
    
         
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 57 
     | 
    
         
            +
              none: false
         
     | 
| 
      
 58 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 59 
     | 
    
         
            +
              - - ! '>='
         
     | 
| 
      
 60 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 61 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 62 
     | 
    
         
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 63 
     | 
    
         
            +
              none: false
         
     | 
| 
      
 64 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 65 
     | 
    
         
            +
              - - ! '>='
         
     | 
| 
      
 66 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 67 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 68 
     | 
    
         
            +
            requirements: []
         
     | 
| 
      
 69 
     | 
    
         
            +
            rubyforge_project: shingle
         
     | 
| 
      
 70 
     | 
    
         
            +
            rubygems_version: 1.8.10
         
     | 
| 
      
 71 
     | 
    
         
            +
            signing_key: 
         
     | 
| 
      
 72 
     | 
    
         
            +
            specification_version: 3
         
     | 
| 
      
 73 
     | 
    
         
            +
            summary: Shingle
         
     | 
| 
      
 74 
     | 
    
         
            +
            test_files:
         
     | 
| 
      
 75 
     | 
    
         
            +
            - spec/shingle_spec.rb
         
     | 
| 
      
 76 
     | 
    
         
            +
            has_rdoc: 
         
     |