regexp_trie 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +4 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +40 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/example/benchmark.rb +90 -0
- data/example/hatena-keyword-list.csv +454722 -0
- data/example/synopsis.rb +4 -0
- data/lib/regexp_trie/version.rb +3 -0
- data/lib/regexp_trie.rb +81 -0
- data/regexp_trie.gemspec +28 -0
- metadata +115 -0
    
        data/example/synopsis.rb
    ADDED
    
    
    
        data/lib/regexp_trie.rb
    ADDED
    
    | @@ -0,0 +1,81 @@ | |
| 1 | 
            +
            require_relative "regexp_trie/version"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            class RegexpTrie
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              # @param [Array<String>] strings Set of patterns
         | 
| 6 | 
            +
              # @param [Fixnum,Boolean] option The second argument of Regexp.new()
         | 
| 7 | 
            +
              # @return [Regexp]
         | 
| 8 | 
            +
              def self.union(*strings, option: nil)
         | 
| 9 | 
            +
                rt = new
         | 
| 10 | 
            +
                strings.flatten.each do |arg|
         | 
| 11 | 
            +
                  rt.add(arg)
         | 
| 12 | 
            +
                end
         | 
| 13 | 
            +
                rt.to_regexp(option)
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
              def initialize
         | 
| 17 | 
            +
                @head = {}
         | 
| 18 | 
            +
              end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
              # @param [String] pattern
         | 
| 21 | 
            +
              def add(str)
         | 
| 22 | 
            +
                return self unless str && str.size > 0
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                entry = @head
         | 
| 25 | 
            +
                str.each_char do |c|
         | 
| 26 | 
            +
                  entry[c] ||= {}
         | 
| 27 | 
            +
                  entry = entry[c]
         | 
| 28 | 
            +
                end
         | 
| 29 | 
            +
                entry[''] = true # terminator
         | 
| 30 | 
            +
                self
         | 
| 31 | 
            +
              end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
              # @return [Regexp]
         | 
| 34 | 
            +
              def to_regexp(option = nil)
         | 
| 35 | 
            +
                if @head.empty?
         | 
| 36 | 
            +
                  Regexp.union
         | 
| 37 | 
            +
                else
         | 
| 38 | 
            +
                  Regexp.new(build(@head), option)
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
              end
         | 
| 41 | 
            +
             | 
| 42 | 
            +
              private
         | 
| 43 | 
            +
             | 
| 44 | 
            +
              def build(entry)
         | 
| 45 | 
            +
                return nil if entry[''] && entry.size == 1
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                alt = []
         | 
| 48 | 
            +
                cc = []
         | 
| 49 | 
            +
                q = false
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                entry.keys.each do |c|
         | 
| 52 | 
            +
                  if entry[c].kind_of?(Hash)
         | 
| 53 | 
            +
                    recurse = build(entry[c])
         | 
| 54 | 
            +
                    qc = Regexp.quote(c)
         | 
| 55 | 
            +
                    if recurse
         | 
| 56 | 
            +
                      alt.push(qc + recurse)
         | 
| 57 | 
            +
                    else
         | 
| 58 | 
            +
                      cc.push(qc)
         | 
| 59 | 
            +
                    end
         | 
| 60 | 
            +
                  else
         | 
| 61 | 
            +
                    q = true
         | 
| 62 | 
            +
                  end
         | 
| 63 | 
            +
                end
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                cconly = alt.empty?
         | 
| 66 | 
            +
                unless cc.empty?
         | 
| 67 | 
            +
                  alt.push(cc.size == 1 ? cc.first : "[#{cc.join('')}]")
         | 
| 68 | 
            +
                end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                result = alt.size == 1 ? alt.first : "(?:#{alt.join('|')})"
         | 
| 71 | 
            +
                if q
         | 
| 72 | 
            +
                  if cconly
         | 
| 73 | 
            +
                    "#{result}?"
         | 
| 74 | 
            +
                  else
         | 
| 75 | 
            +
                    "(?:#{$result})?"
         | 
| 76 | 
            +
                  end
         | 
| 77 | 
            +
                else
         | 
| 78 | 
            +
                  result
         | 
| 79 | 
            +
                end
         | 
| 80 | 
            +
              end
         | 
| 81 | 
            +
            end
         | 
    
        data/regexp_trie.gemspec
    ADDED
    
    | @@ -0,0 +1,28 @@ | |
| 1 | 
            +
            # coding: utf-8
         | 
| 2 | 
            +
            lib = File.expand_path('../lib', __FILE__)
         | 
| 3 | 
            +
            $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
         | 
| 4 | 
            +
            require 'regexp_trie/version'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            Gem::Specification.new do |spec|
         | 
| 7 | 
            +
              spec.name          = "regexp_trie"
         | 
| 8 | 
            +
              spec.version       = RegexpTrie::VERSION
         | 
| 9 | 
            +
              spec.authors       = ["FUJI Goro (gfx)"]
         | 
| 10 | 
            +
              spec.email         = ["gfuji@cpan.org"]
         | 
| 11 | 
            +
             | 
| 12 | 
            +
              spec.summary       = %q{Optimized Regexp builder with Trie}
         | 
| 13 | 
            +
              spec.description   = %q{Optimized Regexp builder with Trie as a port of Perl's Regexp::Trie}
         | 
| 14 | 
            +
              spec.homepage      = "https://github.com/gfx/ruby-regexp_trie"
         | 
| 15 | 
            +
              spec.license       = "MIT"
         | 
| 16 | 
            +
             | 
| 17 | 
            +
              spec.metadata['allowed_push_host'] = 'https://rubygems.org/'
         | 
| 18 | 
            +
             | 
| 19 | 
            +
              spec.files         = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
         | 
| 20 | 
            +
              spec.bindir        = "exe"
         | 
| 21 | 
            +
              spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
         | 
| 22 | 
            +
              spec.require_paths = ["lib"]
         | 
| 23 | 
            +
             | 
| 24 | 
            +
              spec.add_development_dependency "bundler", "~> 1.11"
         | 
| 25 | 
            +
              spec.add_development_dependency "rake", "~> 10.0"
         | 
| 26 | 
            +
              spec.add_development_dependency "minitest", "~> 5.0"
         | 
| 27 | 
            +
              spec.add_development_dependency "minitest-power_assert"
         | 
| 28 | 
            +
            end
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,115 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification
         | 
| 2 | 
            +
            name: regexp_trie
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            +
              version: 0.1.0
         | 
| 5 | 
            +
            platform: ruby
         | 
| 6 | 
            +
            authors:
         | 
| 7 | 
            +
            - FUJI Goro (gfx)
         | 
| 8 | 
            +
            autorequire: 
         | 
| 9 | 
            +
            bindir: exe
         | 
| 10 | 
            +
            cert_chain: []
         | 
| 11 | 
            +
            date: 2016-01-22 00:00:00.000000000 Z
         | 
| 12 | 
            +
            dependencies:
         | 
| 13 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 14 | 
            +
              name: bundler
         | 
| 15 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 16 | 
            +
                requirements:
         | 
| 17 | 
            +
                - - "~>"
         | 
| 18 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 19 | 
            +
                    version: '1.11'
         | 
| 20 | 
            +
              type: :development
         | 
| 21 | 
            +
              prerelease: false
         | 
| 22 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 | 
            +
                requirements:
         | 
| 24 | 
            +
                - - "~>"
         | 
| 25 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 26 | 
            +
                    version: '1.11'
         | 
| 27 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 28 | 
            +
              name: rake
         | 
| 29 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 30 | 
            +
                requirements:
         | 
| 31 | 
            +
                - - "~>"
         | 
| 32 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 33 | 
            +
                    version: '10.0'
         | 
| 34 | 
            +
              type: :development
         | 
| 35 | 
            +
              prerelease: false
         | 
| 36 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 37 | 
            +
                requirements:
         | 
| 38 | 
            +
                - - "~>"
         | 
| 39 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 40 | 
            +
                    version: '10.0'
         | 
| 41 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 42 | 
            +
              name: minitest
         | 
| 43 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 44 | 
            +
                requirements:
         | 
| 45 | 
            +
                - - "~>"
         | 
| 46 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 47 | 
            +
                    version: '5.0'
         | 
| 48 | 
            +
              type: :development
         | 
| 49 | 
            +
              prerelease: false
         | 
| 50 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 | 
            +
                requirements:
         | 
| 52 | 
            +
                - - "~>"
         | 
| 53 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 54 | 
            +
                    version: '5.0'
         | 
| 55 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 56 | 
            +
              name: minitest-power_assert
         | 
| 57 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 58 | 
            +
                requirements:
         | 
| 59 | 
            +
                - - ">="
         | 
| 60 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 61 | 
            +
                    version: '0'
         | 
| 62 | 
            +
              type: :development
         | 
| 63 | 
            +
              prerelease: false
         | 
| 64 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 65 | 
            +
                requirements:
         | 
| 66 | 
            +
                - - ">="
         | 
| 67 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 68 | 
            +
                    version: '0'
         | 
| 69 | 
            +
            description: Optimized Regexp builder with Trie as a port of Perl's Regexp::Trie
         | 
| 70 | 
            +
            email:
         | 
| 71 | 
            +
            - gfuji@cpan.org
         | 
| 72 | 
            +
            executables: []
         | 
| 73 | 
            +
            extensions: []
         | 
| 74 | 
            +
            extra_rdoc_files: []
         | 
| 75 | 
            +
            files:
         | 
| 76 | 
            +
            - ".gitignore"
         | 
| 77 | 
            +
            - ".travis.yml"
         | 
| 78 | 
            +
            - Gemfile
         | 
| 79 | 
            +
            - LICENSE.txt
         | 
| 80 | 
            +
            - README.md
         | 
| 81 | 
            +
            - Rakefile
         | 
| 82 | 
            +
            - bin/console
         | 
| 83 | 
            +
            - bin/setup
         | 
| 84 | 
            +
            - example/benchmark.rb
         | 
| 85 | 
            +
            - example/hatena-keyword-list.csv
         | 
| 86 | 
            +
            - example/synopsis.rb
         | 
| 87 | 
            +
            - lib/regexp_trie.rb
         | 
| 88 | 
            +
            - lib/regexp_trie/version.rb
         | 
| 89 | 
            +
            - regexp_trie.gemspec
         | 
| 90 | 
            +
            homepage: https://github.com/gfx/ruby-regexp_trie
         | 
| 91 | 
            +
            licenses:
         | 
| 92 | 
            +
            - MIT
         | 
| 93 | 
            +
            metadata:
         | 
| 94 | 
            +
              allowed_push_host: https://rubygems.org/
         | 
| 95 | 
            +
            post_install_message: 
         | 
| 96 | 
            +
            rdoc_options: []
         | 
| 97 | 
            +
            require_paths:
         | 
| 98 | 
            +
            - lib
         | 
| 99 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 100 | 
            +
              requirements:
         | 
| 101 | 
            +
              - - ">="
         | 
| 102 | 
            +
                - !ruby/object:Gem::Version
         | 
| 103 | 
            +
                  version: '0'
         | 
| 104 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 105 | 
            +
              requirements:
         | 
| 106 | 
            +
              - - ">="
         | 
| 107 | 
            +
                - !ruby/object:Gem::Version
         | 
| 108 | 
            +
                  version: '0'
         | 
| 109 | 
            +
            requirements: []
         | 
| 110 | 
            +
            rubyforge_project: 
         | 
| 111 | 
            +
            rubygems_version: 2.5.1
         | 
| 112 | 
            +
            signing_key: 
         | 
| 113 | 
            +
            specification_version: 4
         | 
| 114 | 
            +
            summary: Optimized Regexp builder with Trie
         | 
| 115 | 
            +
            test_files: []
         |