regexp_trie 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0fcc05dab8b49d9d5d0793084bdc2b2127ac087f50c8381ab86bd43bfc1eacc8
4
- data.tar.gz: 8c864a687f6046e3878f0782bc538e5040960d3f6d62f3e6c1573eee95db7678
3
+ metadata.gz: 3b7c0815c1b7b8f4bc6205ddaed11d259531f39bdcf6ac4a08a1edb965019d75
4
+ data.tar.gz: 9321a016f40fe9ffd441dbcc41068440d2fc1cea4ff49eb1c5efa9e559590113
5
5
  SHA512:
6
- metadata.gz: 602567d8cbe16033fd939ffd1991d600dbb1bcdc1f730f8160e127ad242b7e2920b398da2cab75087d09e67ff2a1d9a4cee2eb1e78868c6faffc5294aa828586
7
- data.tar.gz: a66dd1e9eaf272109d53dc04be393c8a2d873260da6cc3097b5e2af2e9292a1963c16a32ae3aa662902ba82f5987e57523731a1e916a181db31ac097ad2b128b
6
+ metadata.gz: b9cba5c7d1a9379061c813e8829c52e2e69915824814d7b156f492a464a6d0bee21b0fbad041263779303104215388a69610cd796f0fe633f143125feb2e9b64
7
+ data.tar.gz: 7877a5f840407312b18f2d6644ebff0c71f9c0692450c365b953dcb42b4c8e4a45f2cbd5f8583dc6408a39ca521726eb577df8de7054b369c9003b0916589ba6
@@ -0,0 +1,31 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ "master" ]
6
+ pull_request:
7
+
8
+ permissions:
9
+ contents: read
10
+
11
+ jobs:
12
+ test:
13
+ runs-on: ubuntu-latest
14
+ strategy:
15
+ matrix:
16
+ ruby-version:
17
+ - '2.5'
18
+ - '2.6'
19
+ - '2.7'
20
+ - '3.0'
21
+ - '3.1'
22
+
23
+ steps:
24
+ - uses: actions/checkout@v3
25
+ - name: Set up Ruby
26
+ uses: ruby/setup-ruby@v1
27
+ with:
28
+ ruby-version: '${{ matrix.ruby-version }}'
29
+ bundler-cache: true # runs 'bundle install' and caches installed gems automatically
30
+ - name: Run tests
31
+ run: bundle exec rake
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # RegexpTrie [![Build Status](https://travis-ci.org/gfx/ruby-regexp_trie.svg?branch=master)](https://travis-ci.org/gfx/ruby-regexp_trie) [![Gem Version](https://badge.fury.io/rb/regexp_trie.svg)](https://badge.fury.io/rb/regexp_trie)
1
+ # RegexpTrie [![CI](https://github.com/gfx/ruby-regexp_trie/actions/workflows/CI.yml/badge.svg)](https://github.com/gfx/ruby-regexp_trie/actions/workflows/CI.yml) [![Gem Version](https://badge.fury.io/rb/regexp_trie.svg)](https://badge.fury.io/rb/regexp_trie)
2
2
 
3
3
  ## Synopsis
4
4
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class RegexpTrie
4
- VERSION = "1.0.2"
4
+ VERSION = "1.0.3"
5
5
  end
data/lib/regexp_trie.rb CHANGED
@@ -4,8 +4,10 @@ require_relative "regexp_trie/version"
4
4
 
5
5
  class RegexpTrie
6
6
 
7
+ # Factly method to call `new(*strings).to_regexp(option)` in short.
8
+ #
7
9
  # @param [Array<String>] strings Set of patterns
8
- # @param [Fixnum,Boolean] option The second argument of Regexp.new()
10
+ # @param [Integer,Boolean] option The second argument of `Regexp.new()` passed to build a regexp instance
9
11
  # @return [Regexp]
10
12
  def self.union(*strings, option: nil)
11
13
  new(*strings).to_regexp(option)
data/regexp_trie.gemspec CHANGED
@@ -1,4 +1,6 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
3
+
2
4
  require_relative './lib/regexp_trie/version'
3
5
 
4
6
  Gem::Specification.new do |spec|
@@ -11,10 +13,13 @@ Gem::Specification.new do |spec|
11
13
  spec.description = %q{Optimized Regexp builder with Trie as a port of Perl's Regexp::Trie}
12
14
  spec.homepage = "https://github.com/gfx/ruby-regexp_trie"
13
15
  spec.license = "MIT"
16
+ spec.metadata = {
17
+ "source_code_uri" => "https://github.com/gfx/ruby-regexp_trie",
18
+ "allowed_push_host" => "https://rubygems.org/"
19
+ }
14
20
 
15
- spec.metadata['allowed_push_host'] = 'https://rubygems.org/'
16
21
 
17
- spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features|example)/}) }
18
23
  spec.bindir = "exe"
19
24
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
25
  spec.require_paths = ["lib"]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_trie
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - FUJI Goro (gfx)
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-10-09 00:00:00.000000000 Z
11
+ date: 2022-09-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -87,8 +87,8 @@ executables: []
87
87
  extensions: []
88
88
  extra_rdoc_files: []
89
89
  files:
90
+ - ".github/workflows/CI.yml"
90
91
  - ".gitignore"
91
- - ".travis.yml"
92
92
  - CHANGES.md
93
93
  - Gemfile
94
94
  - LICENSE.txt
@@ -96,9 +96,6 @@ files:
96
96
  - Rakefile
97
97
  - bin/console
98
98
  - bin/setup
99
- - example/benchmark.rb
100
- - example/hatena-keyword-list.csv
101
- - example/synopsis.rb
102
99
  - lib/regexp_trie.rb
103
100
  - lib/regexp_trie/version.rb
104
101
  - regexp_trie.gemspec
@@ -106,8 +103,9 @@ homepage: https://github.com/gfx/ruby-regexp_trie
106
103
  licenses:
107
104
  - MIT
108
105
  metadata:
106
+ source_code_uri: https://github.com/gfx/ruby-regexp_trie
109
107
  allowed_push_host: https://rubygems.org/
110
- post_install_message:
108
+ post_install_message:
111
109
  rdoc_options: []
112
110
  require_paths:
113
111
  - lib
@@ -122,9 +120,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
122
120
  - !ruby/object:Gem::Version
123
121
  version: '0'
124
122
  requirements: []
125
- rubyforge_project:
126
- rubygems_version: 2.7.6
127
- signing_key:
123
+ rubygems_version: 3.2.15
124
+ signing_key:
128
125
  specification_version: 4
129
126
  summary: Optimized Regexp builder with Trie
130
127
  test_files: []
data/.travis.yml DELETED
@@ -1,7 +0,0 @@
1
- language: ruby
2
- sudo: false
3
- rvm:
4
- - "2.3"
5
- - "2.4"
6
- - "2.5"
7
- before_install: gem install bundler
data/example/benchmark.rb DELETED
@@ -1,90 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'benchmark'
3
- require 'diffy'
4
- require 'regexp_trie'
5
-
6
- keywords = []
7
- File.open('example/hatena-keyword-list.csv') do |io|
8
- io.each do |line|
9
- yomi, word = line.split(/\t/)
10
- word.strip!
11
- unless word.empty?
12
- keywords.push(word)
13
- end
14
- end
15
- end
16
-
17
- puts "build regexp ..."
18
-
19
- keywords.sort_by! { |item| -item.length }
20
-
21
- rx_raw = Regexp.union(keywords)
22
- rx_trie = RegexpTrie.union(keywords)
23
-
24
- puts "rx_raw: #{rx_raw.to_s.length}"
25
- puts "rx_trie: #{rx_trie.to_s.length}"
26
-
27
- text = <<'EOS'
28
- http://blog.livedoor.jp/dankogai/archives/50074802.html
29
-
30
- TRIE-Optimized Regexp [Show on Hatena Bookmark]
31
- これをPerlで直接使えたらうれしいよね>おおる
32
-
33
- きまぐれ日記: はてなキーワードを高速に付与
34
- そこで、はてなキーワードを TRIE を使って付与するプログラムを作ってみました。
35
- というわけで、やってみました。
36
-
37
-
38
- 最初はDartsのXSを作ろうとしたのだけど、どうもtemplateばりばりのC++コードとXSは相性が悪い。でもTrieを作るだけなら、Perlでもそこそこ出来るし、実際Regexp::OptimizerやRegexp::Assembleのようなモジュールもある。ただこれらはTrie以外のOptimizeもしてしまうので、ちょっと重たいというわけで、mk_trie_regexp.plというScriptをサクっと書いてみました。
39
-
40
- 使い方は簡単。/usr/share/dict/wordsのような、一行一語のファイルを引数に指定すると、それに対応した正規表現を吐いてくれます。あとはそれを
41
-
42
- my $re = do "keyword.list.rx";
43
- とかして読み込めばOK。
44
-
45
- しかし、はてなのキーワードリストはすでにRegexpとして書かれちゃっているので、これを戻す為にhatena2list.plというscriptも書いときました。
46
-
47
- そしてベンチマークを取った結果が以下です。
48
-
49
- PowerBook G4 1.67MHz / Mac OS X v10.4
50
- (warning: too few iterations for a reliable count)
51
- s/iter comp_raw comp_trie
52
- comp_raw 4.61 -- -87%
53
- comp_trie 0.592 679% --
54
- Rate pm_raw pm_trie
55
- pm_raw 156/s -- -100%
56
- pm_trie 70337/s 44874% --
57
- (warning: too few iterations for a reliable count)
58
- s/iter nm_raw nm_trie
59
- nm_raw 23.6 -- -100%
60
- nm_trie 1.57e-02 150763% --
61
- Dual Xeon 2.66MHz / FreeBSD 5.4-Stable
62
- (warning: too few iterations for a reliable count)
63
- s/iter comp_raw comp_trie
64
- comp_raw 4.45 -- -90%
65
- comp_trie 0.465 855% --
66
- Rate pm_raw pm_trie
67
- pm_raw 532/s -- -99%
68
- pm_trie 92027/s 17197% --
69
- (warning: too few iterations for a reliable count)
70
- s/iter nm_raw nm_trie
71
- nm_raw 6.91 -- -100%
72
- nm_trie 1.22e-02 56417% --
73
- Darts版ほどとは行きませんが、なかなかPracticalなのではないでしょうか。なんといってもPerlから直接使える--正規表現そのものはRubyでも互換?--のはぐ~でしょう。
74
-
75
- Dan the Just Another (Perl|Trie) Hacker
76
- EOS
77
-
78
- unless text.gsub(rx_raw, '*') == text.gsub(rx_trie, '*')
79
- puts '!!!differences between Regexp.union() and RegexpTrie.union()!!!'
80
- puts Diffy::Diff.new(text.gsub(rx_raw, '*'), text.gsub(rx_trie, '*'))
81
- end
82
-
83
- Benchmark.bm 20 do |r|
84
- r.report "Regexp raw" do
85
- text.gsub(rx_raw, '*')
86
- end
87
- r.report "RegexpTrie" do
88
- text.gsub(rx_trie, '*')
89
- end
90
- end