regexp_trie 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ require 'regexp_trie'
2
+ rt = RegexpTrie.new;
3
+ rt.add_all(*%w(foobar fooxar foozap fooza))
4
+ puts rt.to_regexp # (?-mix:foo(?:bar|xar|zap?))
@@ -0,0 +1,3 @@
1
+ class RegexpTrie
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,81 @@
1
+ require_relative "regexp_trie/version"
2
+
3
+ class RegexpTrie
4
+
5
+ # @param [Array<String>] strings Set of patterns
6
+ # @param [Fixnum,Boolean] option The second argument of Regexp.new()
7
+ # @return [Regexp]
8
+ def self.union(*strings, option: nil)
9
+ rt = new
10
+ strings.flatten.each do |arg|
11
+ rt.add(arg)
12
+ end
13
+ rt.to_regexp(option)
14
+ end
15
+
16
+ def initialize
17
+ @head = {}
18
+ end
19
+
20
+ # @param [String] pattern
21
+ def add(str)
22
+ return self unless str && str.size > 0
23
+
24
+ entry = @head
25
+ str.each_char do |c|
26
+ entry[c] ||= {}
27
+ entry = entry[c]
28
+ end
29
+ entry[''] = true # terminator
30
+ self
31
+ end
32
+
33
+ # @return [Regexp]
34
+ def to_regexp(option = nil)
35
+ if @head.empty?
36
+ Regexp.union
37
+ else
38
+ Regexp.new(build(@head), option)
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ def build(entry)
45
+ return nil if entry[''] && entry.size == 1
46
+
47
+ alt = []
48
+ cc = []
49
+ q = false
50
+
51
+ entry.keys.each do |c|
52
+ if entry[c].kind_of?(Hash)
53
+ recurse = build(entry[c])
54
+ qc = Regexp.quote(c)
55
+ if recurse
56
+ alt.push(qc + recurse)
57
+ else
58
+ cc.push(qc)
59
+ end
60
+ else
61
+ q = true
62
+ end
63
+ end
64
+
65
+ cconly = alt.empty?
66
+ unless cc.empty?
67
+ alt.push(cc.size == 1 ? cc.first : "[#{cc.join('')}]")
68
+ end
69
+
70
+ result = alt.size == 1 ? alt.first : "(?:#{alt.join('|')})"
71
+ if q
72
+ if cconly
73
+ "#{result}?"
74
+ else
75
+ "(?:#{$result})?"
76
+ end
77
+ else
78
+ result
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'regexp_trie/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "regexp_trie"
8
+ spec.version = RegexpTrie::VERSION
9
+ spec.authors = ["FUJI Goro (gfx)"]
10
+ spec.email = ["gfuji@cpan.org"]
11
+
12
+ spec.summary = %q{Optimized Regexp builder with Trie}
13
+ spec.description = %q{Optimized Regexp builder with Trie as a port of Perl's Regexp::Trie}
14
+ spec.homepage = "https://github.com/gfx/ruby-regexp_trie"
15
+ spec.license = "MIT"
16
+
17
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org/'
18
+
19
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
20
+ spec.bindir = "exe"
21
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.require_paths = ["lib"]
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.11"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_development_dependency "minitest", "~> 5.0"
27
+ spec.add_development_dependency "minitest-power_assert"
28
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: regexp_trie
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - FUJI Goro (gfx)
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-01-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.11'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.11'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '5.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '5.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest-power_assert
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Optimized Regexp builder with Trie as a port of Perl's Regexp::Trie
70
+ email:
71
+ - gfuji@cpan.org
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".travis.yml"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - bin/console
83
+ - bin/setup
84
+ - example/benchmark.rb
85
+ - example/hatena-keyword-list.csv
86
+ - example/synopsis.rb
87
+ - lib/regexp_trie.rb
88
+ - lib/regexp_trie/version.rb
89
+ - regexp_trie.gemspec
90
+ homepage: https://github.com/gfx/ruby-regexp_trie
91
+ licenses:
92
+ - MIT
93
+ metadata:
94
+ allowed_push_host: https://rubygems.org/
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 2.5.1
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: Optimized Regexp builder with Trie
115
+ test_files: []