regexp_trie 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/CI.yml +31 -0
- data/CHANGES.md +6 -1
- data/README.md +1 -1
- data/lib/regexp_trie/version.rb +1 -1
- data/lib/regexp_trie.rb +4 -2
- data/regexp_trie.gemspec +11 -8
- metadata +14 -17
- data/.travis.yml +0 -7
- data/example/benchmark.rb +0 -90
- data/example/hatena-keyword-list.csv +0 -454722
- data/example/synopsis.rb +0 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3b7c0815c1b7b8f4bc6205ddaed11d259531f39bdcf6ac4a08a1edb965019d75
|
4
|
+
data.tar.gz: 9321a016f40fe9ffd441dbcc41068440d2fc1cea4ff49eb1c5efa9e559590113
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b9cba5c7d1a9379061c813e8829c52e2e69915824814d7b156f492a464a6d0bee21b0fbad041263779303104215388a69610cd796f0fe633f143125feb2e9b64
|
7
|
+
data.tar.gz: 7877a5f840407312b18f2d6644ebff0c71f9c0692450c365b953dcb42b4c8e4a45f2cbd5f8583dc6408a39ca521726eb577df8de7054b369c9003b0916589ba6
|
@@ -0,0 +1,31 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ "master" ]
|
6
|
+
pull_request:
|
7
|
+
|
8
|
+
permissions:
|
9
|
+
contents: read
|
10
|
+
|
11
|
+
jobs:
|
12
|
+
test:
|
13
|
+
runs-on: ubuntu-latest
|
14
|
+
strategy:
|
15
|
+
matrix:
|
16
|
+
ruby-version:
|
17
|
+
- '2.5'
|
18
|
+
- '2.6'
|
19
|
+
- '2.7'
|
20
|
+
- '3.0'
|
21
|
+
- '3.1'
|
22
|
+
|
23
|
+
steps:
|
24
|
+
- uses: actions/checkout@v3
|
25
|
+
- name: Set up Ruby
|
26
|
+
uses: ruby/setup-ruby@v1
|
27
|
+
with:
|
28
|
+
ruby-version: '${{ matrix.ruby-version }}'
|
29
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
30
|
+
- name: Run tests
|
31
|
+
run: bundle exec rake
|
data/CHANGES.md
CHANGED
@@ -1,9 +1,14 @@
|
|
1
1
|
Revision history of the RegexpTrie gem
|
2
2
|
|
3
|
-
## v1.0.
|
3
|
+
## v1.0.2 - 2018/10/09
|
4
4
|
|
5
|
+
* Tested on Ruby v2.5
|
5
6
|
* Add `# frozen_string_literal: true` to source files
|
6
7
|
|
8
|
+
## v1.0.1 - 2018/10/09
|
9
|
+
|
10
|
+
* <del>Add `# frozen_string_literal: true` to source files</del> (<ins>it was not enabled; fixed in v1.0.2</ins>)
|
11
|
+
|
7
12
|
## v1.0.0 - 2016/11/14
|
8
13
|
|
9
14
|
- Add a method to get the union pattern as `String`, not as `Regexp`
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# RegexpTrie [](https://github.com/gfx/ruby-regexp_trie/actions/workflows/CI.yml) [](https://badge.fury.io/rb/regexp_trie)
|
2
2
|
|
3
3
|
## Synopsis
|
4
4
|
|
data/lib/regexp_trie/version.rb
CHANGED
data/lib/regexp_trie.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
|
-
# frozen_string_literal
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "regexp_trie/version"
|
4
4
|
|
5
5
|
class RegexpTrie
|
6
6
|
|
7
|
+
# Factly method to call `new(*strings).to_regexp(option)` in short.
|
8
|
+
#
|
7
9
|
# @param [Array<String>] strings Set of patterns
|
8
|
-
# @param [
|
10
|
+
# @param [Integer,Boolean] option The second argument of `Regexp.new()` passed to build a regexp instance
|
9
11
|
# @return [Regexp]
|
10
12
|
def self.union(*strings, option: nil)
|
11
13
|
new(*strings).to_regexp(option)
|
data/regexp_trie.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require_relative './lib/regexp_trie/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "regexp_trie"
|
@@ -13,17 +13,20 @@ Gem::Specification.new do |spec|
|
|
13
13
|
spec.description = %q{Optimized Regexp builder with Trie as a port of Perl's Regexp::Trie}
|
14
14
|
spec.homepage = "https://github.com/gfx/ruby-regexp_trie"
|
15
15
|
spec.license = "MIT"
|
16
|
+
spec.metadata = {
|
17
|
+
"source_code_uri" => "https://github.com/gfx/ruby-regexp_trie",
|
18
|
+
"allowed_push_host" => "https://rubygems.org/"
|
19
|
+
}
|
16
20
|
|
17
|
-
spec.metadata['allowed_push_host'] = 'https://rubygems.org/'
|
18
21
|
|
19
|
-
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
22
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features|example)/}) }
|
20
23
|
spec.bindir = "exe"
|
21
24
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
25
|
spec.require_paths = ["lib"]
|
23
26
|
|
24
|
-
spec.add_development_dependency "bundler", "
|
25
|
-
spec.add_development_dependency "rake", "
|
26
|
-
spec.add_development_dependency "minitest", "
|
27
|
+
spec.add_development_dependency "bundler", ">= 1.11"
|
28
|
+
spec.add_development_dependency "rake", ">= 10.0"
|
29
|
+
spec.add_development_dependency "minitest", ">= 5.0"
|
27
30
|
spec.add_development_dependency "minitest-power_assert"
|
28
31
|
spec.add_development_dependency "simplecov"
|
29
32
|
end
|
metadata
CHANGED
@@ -1,55 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_trie
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- FUJI Goro (gfx)
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-09-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.11'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.11'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '10.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '5.0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '5.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
@@ -87,8 +87,8 @@ executables: []
|
|
87
87
|
extensions: []
|
88
88
|
extra_rdoc_files: []
|
89
89
|
files:
|
90
|
+
- ".github/workflows/CI.yml"
|
90
91
|
- ".gitignore"
|
91
|
-
- ".travis.yml"
|
92
92
|
- CHANGES.md
|
93
93
|
- Gemfile
|
94
94
|
- LICENSE.txt
|
@@ -96,9 +96,6 @@ files:
|
|
96
96
|
- Rakefile
|
97
97
|
- bin/console
|
98
98
|
- bin/setup
|
99
|
-
- example/benchmark.rb
|
100
|
-
- example/hatena-keyword-list.csv
|
101
|
-
- example/synopsis.rb
|
102
99
|
- lib/regexp_trie.rb
|
103
100
|
- lib/regexp_trie/version.rb
|
104
101
|
- regexp_trie.gemspec
|
@@ -106,8 +103,9 @@ homepage: https://github.com/gfx/ruby-regexp_trie
|
|
106
103
|
licenses:
|
107
104
|
- MIT
|
108
105
|
metadata:
|
106
|
+
source_code_uri: https://github.com/gfx/ruby-regexp_trie
|
109
107
|
allowed_push_host: https://rubygems.org/
|
110
|
-
post_install_message:
|
108
|
+
post_install_message:
|
111
109
|
rdoc_options: []
|
112
110
|
require_paths:
|
113
111
|
- lib
|
@@ -122,9 +120,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
120
|
- !ruby/object:Gem::Version
|
123
121
|
version: '0'
|
124
122
|
requirements: []
|
125
|
-
|
126
|
-
|
127
|
-
signing_key:
|
123
|
+
rubygems_version: 3.2.15
|
124
|
+
signing_key:
|
128
125
|
specification_version: 4
|
129
126
|
summary: Optimized Regexp builder with Trie
|
130
127
|
test_files: []
|
data/.travis.yml
DELETED
data/example/benchmark.rb
DELETED
@@ -1,90 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'benchmark'
|
3
|
-
require 'diffy'
|
4
|
-
require 'regexp_trie'
|
5
|
-
|
6
|
-
keywords = []
|
7
|
-
File.open('example/hatena-keyword-list.csv') do |io|
|
8
|
-
io.each do |line|
|
9
|
-
yomi, word = line.split(/\t/)
|
10
|
-
word.strip!
|
11
|
-
unless word.empty?
|
12
|
-
keywords.push(word)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
puts "build regexp ..."
|
18
|
-
|
19
|
-
keywords.sort_by! { |item| -item.length }
|
20
|
-
|
21
|
-
rx_raw = Regexp.union(keywords)
|
22
|
-
rx_trie = RegexpTrie.union(keywords)
|
23
|
-
|
24
|
-
puts "rx_raw: #{rx_raw.to_s.length}"
|
25
|
-
puts "rx_trie: #{rx_trie.to_s.length}"
|
26
|
-
|
27
|
-
text = <<'EOS'
|
28
|
-
http://blog.livedoor.jp/dankogai/archives/50074802.html
|
29
|
-
|
30
|
-
TRIE-Optimized Regexp [Show on Hatena Bookmark]
|
31
|
-
これをPerlで直接使えたらうれしいよね>おおる
|
32
|
-
|
33
|
-
きまぐれ日記: はてなキーワードを高速に付与
|
34
|
-
そこで、はてなキーワードを TRIE を使って付与するプログラムを作ってみました。
|
35
|
-
というわけで、やってみました。
|
36
|
-
|
37
|
-
|
38
|
-
最初はDartsのXSを作ろうとしたのだけど、どうもtemplateばりばりのC++コードとXSは相性が悪い。でもTrieを作るだけなら、Perlでもそこそこ出来るし、実際Regexp::OptimizerやRegexp::Assembleのようなモジュールもある。ただこれらはTrie以外のOptimizeもしてしまうので、ちょっと重たいというわけで、mk_trie_regexp.plというScriptをサクっと書いてみました。
|
39
|
-
|
40
|
-
使い方は簡単。/usr/share/dict/wordsのような、一行一語のファイルを引数に指定すると、それに対応した正規表現を吐いてくれます。あとはそれを
|
41
|
-
|
42
|
-
my $re = do "keyword.list.rx";
|
43
|
-
とかして読み込めばOK。
|
44
|
-
|
45
|
-
しかし、はてなのキーワードリストはすでにRegexpとして書かれちゃっているので、これを戻す為にhatena2list.plというscriptも書いときました。
|
46
|
-
|
47
|
-
そしてベンチマークを取った結果が以下です。
|
48
|
-
|
49
|
-
PowerBook G4 1.67MHz / Mac OS X v10.4
|
50
|
-
(warning: too few iterations for a reliable count)
|
51
|
-
s/iter comp_raw comp_trie
|
52
|
-
comp_raw 4.61 -- -87%
|
53
|
-
comp_trie 0.592 679% --
|
54
|
-
Rate pm_raw pm_trie
|
55
|
-
pm_raw 156/s -- -100%
|
56
|
-
pm_trie 70337/s 44874% --
|
57
|
-
(warning: too few iterations for a reliable count)
|
58
|
-
s/iter nm_raw nm_trie
|
59
|
-
nm_raw 23.6 -- -100%
|
60
|
-
nm_trie 1.57e-02 150763% --
|
61
|
-
Dual Xeon 2.66MHz / FreeBSD 5.4-Stable
|
62
|
-
(warning: too few iterations for a reliable count)
|
63
|
-
s/iter comp_raw comp_trie
|
64
|
-
comp_raw 4.45 -- -90%
|
65
|
-
comp_trie 0.465 855% --
|
66
|
-
Rate pm_raw pm_trie
|
67
|
-
pm_raw 532/s -- -99%
|
68
|
-
pm_trie 92027/s 17197% --
|
69
|
-
(warning: too few iterations for a reliable count)
|
70
|
-
s/iter nm_raw nm_trie
|
71
|
-
nm_raw 6.91 -- -100%
|
72
|
-
nm_trie 1.22e-02 56417% --
|
73
|
-
Darts版ほどとは行きませんが、なかなかPracticalなのではないでしょうか。なんといってもPerlから直接使える--正規表現そのものはRubyでも互換?--のはぐ~でしょう。
|
74
|
-
|
75
|
-
Dan the Just Another (Perl|Trie) Hacker
|
76
|
-
EOS
|
77
|
-
|
78
|
-
unless text.gsub(rx_raw, '*') == text.gsub(rx_trie, '*')
|
79
|
-
puts '!!!differences between Regexp.union() and RegexpTrie.union()!!!'
|
80
|
-
puts Diffy::Diff.new(text.gsub(rx_raw, '*'), text.gsub(rx_trie, '*'))
|
81
|
-
end
|
82
|
-
|
83
|
-
Benchmark.bm 20 do |r|
|
84
|
-
r.report "Regexp raw" do
|
85
|
-
text.gsub(rx_raw, '*')
|
86
|
-
end
|
87
|
-
r.report "RegexpTrie" do
|
88
|
-
text.gsub(rx_trie, '*')
|
89
|
-
end
|
90
|
-
end
|