rahocorasick 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7e548a8abbd5d890d98c42df63229f116e70e6546cfbda0de2a6a48ea6a111f7
4
+ data.tar.gz: d11f7a542307464994beeee27a67f0aa9e9c5fe6fc9e51a8250c8a608b700823
5
+ SHA512:
6
+ metadata.gz: 66ea7459fd3f8181ed3f172bceb6fb85781a176d5da279fd87bfba05a78a6db16093806a4ed2600477dd115eff2885e6acd366815683bf0cfbd05262ec1d100c
7
+ data.tar.gz: 63dc2a1fbada86a23b489020875a980f1126dd6a2ef654129f1eac08b774ec63c44ff27f514dde8e13ff454e07be96b8619a08aacc0d27382ec6591a9daa1387
data/README.md ADDED
@@ -0,0 +1,73 @@
1
+ # Rahocorasick
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/rahocorasick.svg)](https://badge.fury.io/rb/rahocorasick)
4
+
5
+ `Rahocorasick` is a Ruby wraper for Rust [Aho-Corasick](https://github.com/BurntSushi/aho-corasick) algorithm.
6
+
7
+ It is a string searching algorithm that locates elements of a finite set of strings (the "dictionary") within an input text.
8
+ It matches all strings simultaneously.
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ ```ruby
15
+ gem 'rahocorasick'
16
+ ```
17
+
18
+ And then execute:
19
+
20
+ $ bundle install
21
+
22
+ Or install it yourself as:
23
+
24
+ $ gem install rahocorasick
25
+
26
+ ## Usage
27
+
28
+ ```ruby
29
+ ahoc = RAhoC.new ['foo', 'bar']
30
+ ahoc.lookup 'Foolish men play football in bar.'
31
+ => ["foo", "bar"]
32
+ ```
33
+
34
+ ## Benchmark
35
+
36
+ ```
37
+ Words size: 500
38
+ Test cases: 2000
39
+ Test text avg length: 3154
40
+ Test text max length: 23676
41
+ Test text min length: 13
42
+ --------------------
43
+ user system total real
44
+ each&include 6.487059 0.185424 6.672483 ( 6.791808)
45
+ ruby_ahoc 4.178672 0.138610 4.317282 ( 4.547964)
46
+ rust_ahoc 0.157662 0.004847 0.162509 ( 0.166964)
47
+ ```
48
+
49
+ > 🎈 Rust version about 27.2x fast than Ruby (pure version).
50
+
51
+ ```
52
+ Words size: 500
53
+ Test cases: 2000
54
+ Test text avg length: 49162
55
+ Test text max length: 10392056
56
+ Test text min length: 577
57
+ --------------------
58
+ user system total real
59
+ each&include 27.903179 0.237389 28.140568 ( 28.563194)
60
+ ruby_ahoc 45.220535 0.363107 45.583642 ( 46.477702)
61
+ rust_ahoc 0.670583 0.007192 0.677775 ( 0.686904)
62
+ ```
63
+
64
+ > 🎈 Rust version about 67.7x fast than Ruby (pure version).
65
+
66
+ ## Reference
67
+
68
+ - [Aho-Corasick](https://github.com/BurntSushi/aho-corasick) - Aho-Corasick string matching algorithm in Rust.
69
+ - [ahocorasick](https://github.com/ahnick/ahocorasick) - Aho-Corasick string matching algorithm in Ruby.
70
+
71
+ ## Contributing
72
+
73
+ Bug reports and pull requests are welcome on GitHub at https://github.com/jinshuju/rahocorasick.
data/Rakefile ADDED
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rake/testtask'
4
+ require 'rake/extensiontask'
5
+
6
+ CROSS_PLATFORMS = %w[
7
+ aarch64-linux
8
+ arm64-darwin
9
+ x86_64-darwin
10
+ x86_64-linux
11
+ ].freeze
12
+
13
+ spec = Bundler.load_gemspec('rahocorasick.gemspec')
14
+
15
+ Rake::ExtensionTask.new('rahocorasick', spec) do |c|
16
+ c.lib_dir = 'lib/rahocorasick'
17
+ c.source_pattern = '*.{rs,toml}'
18
+ c.cross_compile = true
19
+ c.cross_platform = CROSS_PLATFORMS
20
+ end
21
+
22
+ Rake::TestTask.new do |t|
23
+ t.deps << :dev << :compile
24
+ t.test_files = FileList['test/**/*_test.rb']
25
+ end
26
+
27
+ task :dev do
28
+ ENV['RB_SYS_CARGO_PROFILE'] = 'dev'
29
+ end
30
+
31
+ task default: :test
@@ -0,0 +1,276 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 3
4
+
5
+ [[package]]
6
+ name = "aho-corasick"
7
+ version = "0.7.20"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
10
+ dependencies = [
11
+ "memchr",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "bindgen"
16
+ version = "0.60.1"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "062dddbc1ba4aca46de6338e2bf87771414c335f7b2f2036e8f3e9befebf88e6"
19
+ dependencies = [
20
+ "bitflags",
21
+ "cexpr",
22
+ "clang-sys",
23
+ "lazy_static",
24
+ "lazycell",
25
+ "peeking_take_while",
26
+ "proc-macro2",
27
+ "quote",
28
+ "regex",
29
+ "rustc-hash",
30
+ "shlex",
31
+ ]
32
+
33
+ [[package]]
34
+ name = "bitflags"
35
+ version = "1.3.2"
36
+ source = "registry+https://github.com/rust-lang/crates.io-index"
37
+ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
38
+
39
+ [[package]]
40
+ name = "cexpr"
41
+ version = "0.6.0"
42
+ source = "registry+https://github.com/rust-lang/crates.io-index"
43
+ checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
44
+ dependencies = [
45
+ "nom",
46
+ ]
47
+
48
+ [[package]]
49
+ name = "cfg-if"
50
+ version = "1.0.0"
51
+ source = "registry+https://github.com/rust-lang/crates.io-index"
52
+ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
53
+
54
+ [[package]]
55
+ name = "clang-sys"
56
+ version = "1.6.0"
57
+ source = "registry+https://github.com/rust-lang/crates.io-index"
58
+ checksum = "77ed9a53e5d4d9c573ae844bfac6872b159cb1d1585a83b29e7a64b7eef7332a"
59
+ dependencies = [
60
+ "glob",
61
+ "libc",
62
+ "libloading",
63
+ ]
64
+
65
+ [[package]]
66
+ name = "glob"
67
+ version = "0.3.1"
68
+ source = "registry+https://github.com/rust-lang/crates.io-index"
69
+ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
70
+
71
+ [[package]]
72
+ name = "lazy_static"
73
+ version = "1.4.0"
74
+ source = "registry+https://github.com/rust-lang/crates.io-index"
75
+ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
76
+
77
+ [[package]]
78
+ name = "lazycell"
79
+ version = "1.3.0"
80
+ source = "registry+https://github.com/rust-lang/crates.io-index"
81
+ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
82
+
83
+ [[package]]
84
+ name = "libc"
85
+ version = "0.2.139"
86
+ source = "registry+https://github.com/rust-lang/crates.io-index"
87
+ checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"
88
+
89
+ [[package]]
90
+ name = "libloading"
91
+ version = "0.7.4"
92
+ source = "registry+https://github.com/rust-lang/crates.io-index"
93
+ checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
94
+ dependencies = [
95
+ "cfg-if",
96
+ "winapi",
97
+ ]
98
+
99
+ [[package]]
100
+ name = "magnus"
101
+ version = "0.4.4"
102
+ source = "registry+https://github.com/rust-lang/crates.io-index"
103
+ checksum = "fc87660cd7daa49fddbfd524c836de54d5c927d520cd163f43700c5087c57d6c"
104
+ dependencies = [
105
+ "magnus-macros",
106
+ "rb-sys",
107
+ "rb-sys-env",
108
+ ]
109
+
110
+ [[package]]
111
+ name = "magnus-macros"
112
+ version = "0.3.0"
113
+ source = "registry+https://github.com/rust-lang/crates.io-index"
114
+ checksum = "206cb23bfeea05180c97522ef6a3e52a4eb17b0ed2f30ee3ca9c4f994d2378ae"
115
+ dependencies = [
116
+ "proc-macro2",
117
+ "quote",
118
+ "syn",
119
+ ]
120
+
121
+ [[package]]
122
+ name = "memchr"
123
+ version = "2.5.0"
124
+ source = "registry+https://github.com/rust-lang/crates.io-index"
125
+ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
126
+
127
+ [[package]]
128
+ name = "minimal-lexical"
129
+ version = "0.2.1"
130
+ source = "registry+https://github.com/rust-lang/crates.io-index"
131
+ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
132
+
133
+ [[package]]
134
+ name = "nom"
135
+ version = "7.1.3"
136
+ source = "registry+https://github.com/rust-lang/crates.io-index"
137
+ checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
138
+ dependencies = [
139
+ "memchr",
140
+ "minimal-lexical",
141
+ ]
142
+
143
+ [[package]]
144
+ name = "peeking_take_while"
145
+ version = "0.1.2"
146
+ source = "registry+https://github.com/rust-lang/crates.io-index"
147
+ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
148
+
149
+ [[package]]
150
+ name = "proc-macro2"
151
+ version = "1.0.51"
152
+ source = "registry+https://github.com/rust-lang/crates.io-index"
153
+ checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6"
154
+ dependencies = [
155
+ "unicode-ident",
156
+ ]
157
+
158
+ [[package]]
159
+ name = "quote"
160
+ version = "1.0.23"
161
+ source = "registry+https://github.com/rust-lang/crates.io-index"
162
+ checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b"
163
+ dependencies = [
164
+ "proc-macro2",
165
+ ]
166
+
167
+ [[package]]
168
+ name = "rahocorasick"
169
+ version = "0.1.0"
170
+ dependencies = [
171
+ "aho-corasick",
172
+ "magnus",
173
+ ]
174
+
175
+ [[package]]
176
+ name = "rb-sys"
177
+ version = "0.9.65"
178
+ source = "registry+https://github.com/rust-lang/crates.io-index"
179
+ checksum = "e8fe617bad8e88fd7e5d6f432e35f09e5f94144dfb8e8ee4adde82fb920dc59b"
180
+ dependencies = [
181
+ "rb-sys-build",
182
+ ]
183
+
184
+ [[package]]
185
+ name = "rb-sys-build"
186
+ version = "0.9.65"
187
+ source = "registry+https://github.com/rust-lang/crates.io-index"
188
+ checksum = "007e63597f91c711cbb299e60fecbdb6f5ad4a066d6a20c81943893f1584c895"
189
+ dependencies = [
190
+ "bindgen",
191
+ "lazy_static",
192
+ "quote",
193
+ "regex",
194
+ "shell-words",
195
+ "syn",
196
+ ]
197
+
198
+ [[package]]
199
+ name = "rb-sys-env"
200
+ version = "0.1.2"
201
+ source = "registry+https://github.com/rust-lang/crates.io-index"
202
+ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
203
+
204
+ [[package]]
205
+ name = "regex"
206
+ version = "1.7.1"
207
+ source = "registry+https://github.com/rust-lang/crates.io-index"
208
+ checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733"
209
+ dependencies = [
210
+ "aho-corasick",
211
+ "memchr",
212
+ "regex-syntax",
213
+ ]
214
+
215
+ [[package]]
216
+ name = "regex-syntax"
217
+ version = "0.6.28"
218
+ source = "registry+https://github.com/rust-lang/crates.io-index"
219
+ checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
220
+
221
+ [[package]]
222
+ name = "rustc-hash"
223
+ version = "1.1.0"
224
+ source = "registry+https://github.com/rust-lang/crates.io-index"
225
+ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
226
+
227
+ [[package]]
228
+ name = "shell-words"
229
+ version = "1.1.0"
230
+ source = "registry+https://github.com/rust-lang/crates.io-index"
231
+ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
232
+
233
+ [[package]]
234
+ name = "shlex"
235
+ version = "1.1.0"
236
+ source = "registry+https://github.com/rust-lang/crates.io-index"
237
+ checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
238
+
239
+ [[package]]
240
+ name = "syn"
241
+ version = "1.0.107"
242
+ source = "registry+https://github.com/rust-lang/crates.io-index"
243
+ checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5"
244
+ dependencies = [
245
+ "proc-macro2",
246
+ "quote",
247
+ "unicode-ident",
248
+ ]
249
+
250
+ [[package]]
251
+ name = "unicode-ident"
252
+ version = "1.0.6"
253
+ source = "registry+https://github.com/rust-lang/crates.io-index"
254
+ checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
255
+
256
+ [[package]]
257
+ name = "winapi"
258
+ version = "0.3.9"
259
+ source = "registry+https://github.com/rust-lang/crates.io-index"
260
+ checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
261
+ dependencies = [
262
+ "winapi-i686-pc-windows-gnu",
263
+ "winapi-x86_64-pc-windows-gnu",
264
+ ]
265
+
266
+ [[package]]
267
+ name = "winapi-i686-pc-windows-gnu"
268
+ version = "0.4.0"
269
+ source = "registry+https://github.com/rust-lang/crates.io-index"
270
+ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
271
+
272
+ [[package]]
273
+ name = "winapi-x86_64-pc-windows-gnu"
274
+ version = "0.4.0"
275
+ source = "registry+https://github.com/rust-lang/crates.io-index"
276
+ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
@@ -0,0 +1,12 @@
1
+ [package]
2
+ edition = "2021"
3
+ name = "rahocorasick"
4
+ version = "0.1.0"
5
+
6
+ # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7
+ [lib]
8
+ crate-type = ["cdylib"]
9
+
10
+ [dependencies]
11
+ aho-corasick = "0.7"
12
+ magnus = "0.4"
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+ require 'rb_sys/mkmf'
5
+
6
+ create_rust_makefile('rahocorasick/rahocorasick')
@@ -0,0 +1,31 @@
1
+ use aho_corasick::AhoCorasick;
2
+ use magnus::{define_class, method, function, prelude::*, Error};
3
+
4
+ #[magnus::wrap(class = "RAhoC")]
5
+ pub struct RAhoC {
6
+ words: Vec<String>,
7
+ ac: AhoCorasick,
8
+ }
9
+
10
+ impl RAhoC {
11
+ fn new(words: Vec<String>) -> Self {
12
+ let ac = AhoCorasick::new(&words);
13
+ Self { words, ac }
14
+ }
15
+
16
+ fn lookup(&self, haystack: String) -> Vec<String> {
17
+ let mut matches = vec![];
18
+ for mat in self.ac.find_iter(&haystack) {
19
+ matches.push(self.words[mat.pattern()].clone());
20
+ }
21
+ matches
22
+ }
23
+ }
24
+
25
+ #[magnus::init]
26
+ fn main() -> Result<(), Error> {
27
+ let class = define_class("RAhoC", Default::default())?;
28
+ class.define_singleton_method("new", function!(RAhoC::new, 1))?;
29
+ class.define_method("lookup", method!(RAhoC::lookup, 1))?;
30
+ Ok(())
31
+ }
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ # load the precompiled extension file
5
+ ruby_version = /(\d+\.\d+)/.match(::RUBY_VERSION)
6
+ require_relative "rahocorasick/#{ruby_version}/rahocorasick"
7
+ rescue LoadError
8
+ require 'rahocorasick/rahocorasick'
9
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rahocorasick
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ platform: ruby
6
+ authors:
7
+ - IChou
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-02-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rb_sys
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.54
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.54
27
+ description: It's a string searching gem that locates elements of a finite set of
28
+ strings (the "dictionary") within an input text.
29
+ email:
30
+ - zhouyi@jinshuju.net
31
+ executables: []
32
+ extensions:
33
+ - ext/rahocorasick/extconf.rb
34
+ extra_rdoc_files: []
35
+ files:
36
+ - README.md
37
+ - Rakefile
38
+ - ext/rahocorasick/Cargo.lock
39
+ - ext/rahocorasick/Cargo.toml
40
+ - ext/rahocorasick/extconf.rb
41
+ - ext/rahocorasick/src/lib.rs
42
+ - lib/rahocorasick.rb
43
+ homepage: https://github.com/jinshuju/rahocorasick
44
+ licenses:
45
+ - MIT
46
+ metadata: {}
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: 2.7.0
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ requirements: []
62
+ rubygems_version: 3.4.6
63
+ signing_key:
64
+ specification_version: 4
65
+ summary: A Ruby wraper for Rust [Aho-Corasick](https://github.com/BurntSushi/aho-corasick)
66
+ algorithm
67
+ test_files: []