ahocorasick-rust 2.2.0-x86_64-linux-gnu

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+ require 'rb_sys/mkmf'
5
+
6
+ create_rust_makefile('rahocorasick/rahocorasick')
@@ -0,0 +1,191 @@
1
+ use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
2
+ use magnus::{method, function, prelude::*, Error, Ruby, RHash, RArray, Value, Symbol};
3
+
4
+ #[magnus::wrap(class = "AhoCorasickRust")]
5
+ pub struct AhoCorasickRust {
6
+ words: Vec<String>,
7
+ ac: AhoCorasick,
8
+ }
9
+
10
+ impl AhoCorasickRust {
11
+ fn new_impl(ruby: &Ruby, words: Vec<String>, kwargs: Option<RHash>) -> Result<Self, Error> {
12
+ let mut builder = AhoCorasickBuilder::new();
13
+
14
+ // Check for options if kwargs provided
15
+ if let Some(kwargs) = kwargs {
16
+ // case_insensitive option
17
+ if let Some(val) = kwargs.get(ruby.to_symbol("case_insensitive")) {
18
+ if let Ok(case_insensitive) = bool::try_convert(val) {
19
+ if case_insensitive {
20
+ builder.ascii_case_insensitive(true);
21
+ }
22
+ }
23
+ }
24
+
25
+ // match_kind option
26
+ if let Some(val) = kwargs.get(ruby.to_symbol("match_kind")) {
27
+ if let Some(sym) = Symbol::from_value(val) {
28
+ let kind_str = sym.name()?.to_string();
29
+ let match_kind = match kind_str.as_ref() {
30
+ "standard" => MatchKind::Standard,
31
+ "leftmost_first" => MatchKind::LeftmostFirst,
32
+ "leftmost_longest" => MatchKind::LeftmostLongest,
33
+ _ => {
34
+ return Err(Error::new(
35
+ ruby.exception_arg_error(),
36
+ format!("Invalid match_kind: '{}'. Valid values are :standard, :leftmost_first, :leftmost_longest", kind_str)
37
+ ));
38
+ }
39
+ };
40
+ builder.match_kind(match_kind);
41
+ }
42
+ }
43
+ }
44
+
45
+ let ac = builder.build(&words)
46
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), format!("Failed to build automaton: {}", e)))?;
47
+ Ok(Self { words, ac })
48
+ }
49
+
50
+ fn new(ruby: &Ruby, args: &[Value]) -> Result<Self, Error> {
51
+ let args = magnus::scan_args::scan_args::<(Vec<String>,), (), (), (), RHash, ()>(args)?;
52
+ let (words,) = args.required;
53
+ let kwargs = args.keywords;
54
+
55
+ // Only pass kwargs if non-empty
56
+ let kwargs_opt = if kwargs.len() > 0 { Some(kwargs) } else { None };
57
+
58
+ Self::new_impl(ruby, words, kwargs_opt)
59
+ }
60
+
61
+ fn lookup(&self, haystack: String) -> Vec<String> {
62
+ let mut matches = vec![];
63
+ for mat in self.ac.find_iter(&haystack) {
64
+ matches.push(self.words[mat.pattern()].clone());
65
+ }
66
+ matches
67
+ }
68
+
69
+ fn is_match(&self, haystack: String) -> bool {
70
+ self.ac.is_match(&haystack)
71
+ }
72
+
73
+ fn lookup_overlapping(&self, haystack: String) -> Vec<String> {
74
+ let mut matches = vec![];
75
+ for mat in self.ac.find_overlapping_iter(&haystack) {
76
+ matches.push(self.words[mat.pattern()].clone());
77
+ }
78
+ matches
79
+ }
80
+
81
+ fn find_first(&self, haystack: String) -> Option<String> {
82
+ self.ac.find(&haystack).map(|mat| self.words[mat.pattern()].clone())
83
+ }
84
+
85
+ fn find_first_with_position(&self, haystack: String) -> Result<Option<RHash>, Error> {
86
+ let ruby = Ruby::get().unwrap();
87
+ if let Some(mat) = self.ac.find(&haystack) {
88
+ let hash = ruby.hash_new();
89
+ hash.aset(ruby.to_symbol("pattern"), self.words[mat.pattern()].clone())?;
90
+ hash.aset(ruby.to_symbol("start"), mat.start())?;
91
+ hash.aset(ruby.to_symbol("end"), mat.end())?;
92
+ Ok(Some(hash))
93
+ } else {
94
+ Ok(None)
95
+ }
96
+ }
97
+
98
+ fn lookup_with_positions(&self, haystack: String) -> Result<RArray, Error> {
99
+ let ruby = Ruby::get().unwrap();
100
+ let matches = ruby.ary_new();
101
+ for mat in self.ac.find_iter(&haystack) {
102
+ let hash = ruby.hash_new();
103
+ hash.aset(ruby.to_symbol("pattern"), self.words[mat.pattern()].clone())?;
104
+ hash.aset(ruby.to_symbol("start"), mat.start())?;
105
+ hash.aset(ruby.to_symbol("end"), mat.end())?;
106
+ matches.push(hash)?;
107
+ }
108
+ Ok(matches)
109
+ }
110
+
111
+ fn replace_all(&self, args: &[Value]) -> Result<String, Error> {
112
+ let ruby = Ruby::get().unwrap();
113
+
114
+ // Parse arguments: haystack (required), replacements (optional if block given)
115
+ let haystack: String = if args.is_empty() {
116
+ return Err(Error::new(
117
+ ruby.exception_arg_error(),
118
+ "wrong number of arguments (given 0, expected 1..2)"
119
+ ));
120
+ } else {
121
+ String::try_convert(args[0])?
122
+ };
123
+
124
+ // Check if a block was given
125
+ match ruby.block_proc() {
126
+ Ok(proc) => {
127
+ // Block-based replacement
128
+ let mut result = haystack.clone();
129
+ let mut offset: isize = 0;
130
+
131
+ for mat in self.ac.find_iter(&haystack) {
132
+ let pattern = &self.words[mat.pattern()];
133
+ let replacement: String = proc.call((pattern.clone(),))?;
134
+
135
+ let start = (mat.start() as isize + offset) as usize;
136
+ let end = (mat.end() as isize + offset) as usize;
137
+
138
+ result.replace_range(start..end, &replacement);
139
+ offset += replacement.len() as isize - (mat.end() - mat.start()) as isize;
140
+ }
141
+
142
+ Ok(result)
143
+ }
144
+ Err(_) if args.len() >= 2 => {
145
+ // Hash-based replacement
146
+ if let Some(hash) = RHash::from_value(args[1]) {
147
+ let mut replace_with: Vec<String> = Vec::with_capacity(self.words.len());
148
+
149
+ for word in &self.words {
150
+ if let Some(val) = hash.get(word.clone()) {
151
+ if let Ok(replacement) = String::try_convert(val) {
152
+ replace_with.push(replacement);
153
+ } else {
154
+ replace_with.push(word.clone());
155
+ }
156
+ } else {
157
+ replace_with.push(word.clone());
158
+ }
159
+ }
160
+
161
+ Ok(self.ac.replace_all(&haystack, &replace_with))
162
+ } else {
163
+ Err(Error::new(
164
+ ruby.exception_arg_error(),
165
+ "replace_all requires a Hash or block"
166
+ ))
167
+ }
168
+ }
169
+ Err(_) => {
170
+ Err(Error::new(
171
+ ruby.exception_arg_error(),
172
+ "replace_all requires a Hash or block"
173
+ ))
174
+ }
175
+ }
176
+ }
177
+ }
178
+
179
+ #[magnus::init]
180
+ fn main(ruby: &Ruby) -> Result<(), Error> {
181
+ let class = ruby.define_class("AhoCorasickRust", ruby.class_object())?;
182
+ class.define_singleton_method("new", function!(AhoCorasickRust::new, -1))?;
183
+ class.define_method("lookup", method!(AhoCorasickRust::lookup, 1))?;
184
+ class.define_method("match?", method!(AhoCorasickRust::is_match, 1))?;
185
+ class.define_method("lookup_overlapping", method!(AhoCorasickRust::lookup_overlapping, 1))?;
186
+ class.define_method("find_first", method!(AhoCorasickRust::find_first, 1))?;
187
+ class.define_method("find_first_with_position", method!(AhoCorasickRust::find_first_with_position, 1))?;
188
+ class.define_method("lookup_with_positions", method!(AhoCorasickRust::lookup_with_positions, 1))?;
189
+ class.define_method("replace_all", method!(AhoCorasickRust::replace_all, -1))?;
190
+ Ok(())
191
+ }
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ # load the precompiled extension file
5
+ ruby_version = /(\d+\.\d+)/.match(::RUBY_VERSION)
6
+ require_relative "rahocorasick/#{ruby_version}/rahocorasick"
7
+ rescue LoadError
8
+ require 'rahocorasick/rahocorasick'
9
+ end
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ahocorasick-rust
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.2.0
5
+ platform: x86_64-linux-gnu
6
+ authors:
7
+ - Eric
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2026-01-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rb_sys
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.117
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.117
27
+ description: A Ruby gem wrapping the legendary Rust Aho-Corasick algorithm! Aho-Corasick
28
+ is a powerful string searching algorithm that finds multiple patterns simultaneously
29
+ in a text. Features include overlapping matches, case-insensitive search, find &
30
+ replace, match positions, and configurable match strategies. Perfect for content
31
+ filtering, tokenization, and multi-pattern search at lightning speed! (ノ◕ヮ◕)ノ*:・゚✧
32
+ email:
33
+ - eric@ebj.dev
34
+ executables: []
35
+ extensions: []
36
+ extra_rdoc_files: []
37
+ files:
38
+ - README.md
39
+ - Rakefile
40
+ - docs/match_kind.md
41
+ - docs/reference.md
42
+ - ext/rahocorasick/Cargo.lock
43
+ - ext/rahocorasick/Cargo.toml
44
+ - ext/rahocorasick/extconf.rb
45
+ - ext/rahocorasick/src/lib.rs
46
+ - lib/ahocorasick-rust.rb
47
+ - lib/rahocorasick/2.7/rahocorasick.so
48
+ - lib/rahocorasick/3.0/rahocorasick.so
49
+ - lib/rahocorasick/3.1/rahocorasick.so
50
+ - lib/rahocorasick/3.2/rahocorasick.so
51
+ - lib/rahocorasick/3.3/rahocorasick.so
52
+ - lib/rahocorasick/3.4/rahocorasick.so
53
+ homepage: https://github.com/jetpks/ahocorasick-rust-ruby
54
+ licenses:
55
+ - MIT
56
+ metadata: {}
57
+ post_install_message:
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: '2.7'
66
+ - - "<"
67
+ - !ruby/object:Gem::Version
68
+ version: 3.5.dev
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: 3.3.22
74
+ requirements: []
75
+ rubygems_version: 3.5.23
76
+ signing_key:
77
+ specification_version: 4
78
+ summary: Blazing-fast ✨ Ruby wrapper for the Rust Aho-Corasick string matching algorithm!
79
+ test_files: []