tre_regex 0.1.1-x86_64-linux-musl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ext/tre_regex/extconf.rb +98 -0
- data/ext/tre_regex/tre_regex.c +1 -0
- data/lib/tre_regex/3.3/tre_regex.so +0 -0
- data/lib/tre_regex/3.4/tre_regex.so +0 -0
- data/lib/tre_regex/4.0/tre_regex.so +0 -0
- data/lib/tre_regex/bin/libtre.so +0 -0
- data/lib/tre_regex/version.rb +5 -0
- data/lib/tre_regex.rb +191 -0
- metadata +74 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: fa8da48b1a5e547d78838ac929a2e4c57124003d4bf1f10163b212ca207884f9
|
|
4
|
+
data.tar.gz: 1d86f4aa5eff793743c3da881e007f0ad622f59ebfc37b6e3deadf0fb2c39520
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 8347570e140d23daddf3ad0f9c1758ce51407c87d9a7b0cf88e1b2e188ce99279906f8659f167b147fb65b3af8acfb10afb6853697c93225739344371cb826d4
|
|
7
|
+
data.tar.gz: 9c3a30a98e1eb0ea06f710bacaf147aa15b5417ca9a261cf44e1b21f282921a3f75712eb29901ebfc3675493e8cf173fb31cfd6338c9b3b56b19355b712b99aa
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'mkmf'
|
|
4
|
+
require 'rbconfig'
|
|
5
|
+
require 'open-uri'
|
|
6
|
+
require 'net/http'
|
|
7
|
+
require 'fileutils'
|
|
8
|
+
|
|
9
|
+
is_windows = RbConfig::CONFIG['host_os'] =~ /mingw|mswin/
|
|
10
|
+
is_darwin = RbConfig::CONFIG['host_os'].include?('darwin')
|
|
11
|
+
|
|
12
|
+
root_dir = File.expand_path(__dir__)
|
|
13
|
+
root_dir = File.dirname(root_dir) until Dir.exist?(File.join(root_dir, 'lib')) || root_dir == '/'
|
|
14
|
+
|
|
15
|
+
# Download Configuration
|
|
16
|
+
github_repo = 'laurikari/tre'
|
|
17
|
+
version = '5ac28057f648debda76f9bf4d39dfdfa85b0df18'
|
|
18
|
+
tarball_url = "https://github.com/#{github_repo}/archive/#{version}.tar.gz"
|
|
19
|
+
tarball_file = File.expand_path("./tre-#{version}.tar.gz", __dir__)
|
|
20
|
+
tre_src_dir = File.expand_path("./tre-#{version}", __dir__)
|
|
21
|
+
dest_lib_dir = File.join(root_dir, 'lib', 'tre_regex', 'bin')
|
|
22
|
+
|
|
23
|
+
def download_file(url, limit = 10)
|
|
24
|
+
raise 'Too many redirects' if limit.zero?
|
|
25
|
+
|
|
26
|
+
uri = URI(url)
|
|
27
|
+
response = Net::HTTP.get_response(uri)
|
|
28
|
+
|
|
29
|
+
case response
|
|
30
|
+
when Net::HTTPSuccess
|
|
31
|
+
response.body
|
|
32
|
+
when Net::HTTPRedirection
|
|
33
|
+
location = response['location']
|
|
34
|
+
puts "Following redirect to #{location}..."
|
|
35
|
+
download_file(location, limit - 1)
|
|
36
|
+
else
|
|
37
|
+
raise "Download failed: #{response.code} #{response.message}"
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Automatically Download and Extract
|
|
42
|
+
unless Dir.exist?(tre_src_dir)
|
|
43
|
+
puts '========== Downloading TRE from GitHub =========='
|
|
44
|
+
begin
|
|
45
|
+
content = download_file(tarball_url)
|
|
46
|
+
File.binwrite(tarball_file, content)
|
|
47
|
+
rescue StandardError => e
|
|
48
|
+
abort "Error: #{e.message}"
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
puts '========== Extracting TRE Source =========='
|
|
52
|
+
# Ensure we use -z for gzip
|
|
53
|
+
system("tar -xzf #{tarball_file} -C #{__dir__}") || abort('Extraction failed')
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Build TRE synchronously using Ruby
|
|
57
|
+
host_flag = enable_config('cross-build') ? "--host=#{RbConfig::CONFIG['host']} " : ''
|
|
58
|
+
RbConfig::CONFIG['SOEXT'] || RbConfig::CONFIG['DLEXT'] || (is_windows ? 'dll' : 'so')
|
|
59
|
+
|
|
60
|
+
puts '========== Building TRE =========='
|
|
61
|
+
Dir.chdir(tre_src_dir) do
|
|
62
|
+
system('./utils/autogen.sh') || raise('autogen.sh failed') unless File.exist?('configure')
|
|
63
|
+
|
|
64
|
+
system("./configure #{host_flag} --enable-shared --disable-static --disable-agrep") || raise('configure failed')
|
|
65
|
+
system('make') || raise('make failed')
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
puts '========== Staging Shared Library for FFI =========='
|
|
69
|
+
FileUtils.mkdir_p(dest_lib_dir)
|
|
70
|
+
|
|
71
|
+
# Find the REAL physical file, strictly ignoring symlinks and static (.a) archives
|
|
72
|
+
src_lib = Dir.glob("#{tre_src_dir}/lib/.libs/*").find do |f|
|
|
73
|
+
(f.include?('.so') || f.include?('.dylib') || f.end_with?('.dll')) &&
|
|
74
|
+
!f.end_with?('.a') &&
|
|
75
|
+
!File.symlink?(f)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Fallback just in case libtool behaved differently
|
|
79
|
+
src_lib ||= Dir.glob("#{tre_src_dir}/lib/.libs/*").find do |f|
|
|
80
|
+
(f.include?('.so') || f.include?('.dylib') || f.end_with?('.dll')) && !f.end_with?('.a')
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
if src_lib
|
|
84
|
+
# Determine the clean target filename based on the OS
|
|
85
|
+
dest_name = if is_windows
|
|
86
|
+
'tre.dll'
|
|
87
|
+
elsif is_darwin
|
|
88
|
+
'libtre.dylib'
|
|
89
|
+
else
|
|
90
|
+
'libtre.so'
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Use File.realpath to guarantee we are copying raw bytes
|
|
94
|
+
FileUtils.cp(File.realpath(src_lib), File.join(dest_lib_dir, dest_name))
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Create a standard dummy ruby extension to satisfy rake-compiler completely
|
|
98
|
+
create_makefile('tre_regex')
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
void Init_tre_regex(void) {}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
data/lib/tre_regex.rb
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'ffi'
|
|
4
|
+
require 'rbconfig'
|
|
5
|
+
require_relative 'tre_regex/version'
|
|
6
|
+
|
|
7
|
+
module TreRegex
|
|
8
|
+
class Error < StandardError; end
|
|
9
|
+
|
|
10
|
+
# The FFI Native Bridge
|
|
11
|
+
module Native
|
|
12
|
+
extend FFI::Library
|
|
13
|
+
|
|
14
|
+
# Determine OS and expected filename
|
|
15
|
+
host_os = RbConfig::CONFIG['host_os']
|
|
16
|
+
filename = case host_os
|
|
17
|
+
when /linux/ then 'libtre.so'
|
|
18
|
+
when /darwin/ then 'libtre.dylib'
|
|
19
|
+
when /mingw|mswin/ then 'tre.dll'
|
|
20
|
+
else raise "Unsupported OS: #{host_os}"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Search for the compiled binary (checks both your extconf.rb path and standard path)
|
|
24
|
+
search_paths = [
|
|
25
|
+
File.expand_path("tre_regex/bin/#{filename}", __dir__),
|
|
26
|
+
File.expand_path(filename, __dir__)
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
lib_path = search_paths.find { |p| File.exist?(p) }
|
|
30
|
+
|
|
31
|
+
unless lib_path
|
|
32
|
+
raise LoadError, "Could not find #{filename} in #{search_paths.first}. Did you compile the C extension?"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
ffi_lib lib_path
|
|
36
|
+
|
|
37
|
+
# TRE Regex Configuration Flags
|
|
38
|
+
REG_EXTENDED = 1
|
|
39
|
+
REG_ICASE = 2
|
|
40
|
+
REG_NEWLINE = 4
|
|
41
|
+
REG_NOSUB = 8
|
|
42
|
+
|
|
43
|
+
# Memory layout for TRE match offsets
|
|
44
|
+
class RegMatch < FFI::Struct
|
|
45
|
+
layout :rm_so, :int,
|
|
46
|
+
:rm_eo, :int
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Memory layout for TRE approximate matching parameters
|
|
50
|
+
class RegAParams < FFI::Struct
|
|
51
|
+
layout :cost_ins, :int,
|
|
52
|
+
:cost_del, :int,
|
|
53
|
+
:cost_subst, :int,
|
|
54
|
+
:max_cost, :int,
|
|
55
|
+
:max_ins, :int,
|
|
56
|
+
:max_del, :int,
|
|
57
|
+
:max_subst, :int,
|
|
58
|
+
:max_err, :int
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Memory layout for TRE approximate match results
|
|
62
|
+
class RegAMatch < FFI::Struct
|
|
63
|
+
layout :nmatch, :size_t,
|
|
64
|
+
:pmatch, :pointer,
|
|
65
|
+
:cost, :int,
|
|
66
|
+
:num_ins, :int,
|
|
67
|
+
:num_del, :int,
|
|
68
|
+
:num_subst, :int
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
attach_function :tre_regcomp, %i[pointer string int], :int
|
|
72
|
+
attach_function :tre_regfree, [:pointer], :void
|
|
73
|
+
attach_function :tre_regaexec, [:pointer, :string, :pointer, RegAParams.by_value, :int], :int
|
|
74
|
+
attach_function :tre_regaparams_default, [:pointer], :void
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# User-Facing Ruby Class
|
|
78
|
+
class Regex
|
|
79
|
+
attr_reader :pattern
|
|
80
|
+
|
|
81
|
+
def initialize(pattern, ignore_case: false)
|
|
82
|
+
@pattern = pattern
|
|
83
|
+
# Allocate a safe 256-byte buffer in C memory for the regex_t struc
|
|
84
|
+
@preg = FFI::MemoryPointer.new(:char, 256)
|
|
85
|
+
|
|
86
|
+
flags = Native::REG_EXTENDED
|
|
87
|
+
flags |= Native::REG_ICASE if ignore_case
|
|
88
|
+
|
|
89
|
+
res = Native.tre_regcomp(@preg, pattern, flags)
|
|
90
|
+
raise TreRegex::Error, "Failed to compile regex pattern: #{pattern}" if res != 0
|
|
91
|
+
|
|
92
|
+
# Garbage Collection Hook: Tell Ruby to free the C memory when this object is destroyed
|
|
93
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(@preg))
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# The GC finalizer proc
|
|
97
|
+
def self.finalize(preg_ptr)
|
|
98
|
+
proc do
|
|
99
|
+
Native.tre_regfree(preg_ptr)
|
|
100
|
+
preg_ptr.free
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def exec(text, options = {})
|
|
105
|
+
params = build_params(options)
|
|
106
|
+
pmatch = FFI::MemoryPointer.new(Native::RegMatch)
|
|
107
|
+
match_data = prepare_match_data(pmatch)
|
|
108
|
+
|
|
109
|
+
res = Native.tre_regaexec(@preg, text, match_data, params, 0)
|
|
110
|
+
res.zero? ? parse_result(text, match_data, pmatch) : nil
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def test?(text, options = {})
|
|
114
|
+
!exec(text, options).nil?
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def match_all(text, options = {})
|
|
118
|
+
return enum_for(:match_all, text, options) unless block_given?
|
|
119
|
+
|
|
120
|
+
offset = 0
|
|
121
|
+
while offset <= text.length
|
|
122
|
+
result = exec(text[offset..] || '', options)
|
|
123
|
+
break unless result
|
|
124
|
+
|
|
125
|
+
result[:index] += offset
|
|
126
|
+
result[:end_index] += offset
|
|
127
|
+
yield result
|
|
128
|
+
|
|
129
|
+
advance = (result[:end_index] - result[:index]).clamp(1, Float::INFINITY)
|
|
130
|
+
offset = result[:index] + advance
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
private
|
|
135
|
+
|
|
136
|
+
def build_params(opts)
|
|
137
|
+
params = Native::RegAParams.new
|
|
138
|
+
Native.tre_regaparams_default(params.to_ptr)
|
|
139
|
+
return params.tap { |p| p[:max_err] = 0 } if opts.empty?
|
|
140
|
+
|
|
141
|
+
apply_limits(params, opts)
|
|
142
|
+
apply_costs(params, opts)
|
|
143
|
+
params
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def apply_limits(params, opts)
|
|
147
|
+
params[:max_err] = opts[:max_errors] if opts.key?(:max_errors)
|
|
148
|
+
params[:max_ins] = opts.fetch(:max_insertions, opts.key?(:max_errors) ? params[:max_ins] : 0)
|
|
149
|
+
params[:max_del] = opts.fetch(:max_deletions, opts.key?(:max_errors) ? params[:max_del] : 0)
|
|
150
|
+
params[:max_subst] = opts.fetch(:max_substitutions, opts.key?(:max_errors) ? params[:max_subst] : 0)
|
|
151
|
+
params[:max_cost] = opts[:max_cost] if opts.key?(:max_cost)
|
|
152
|
+
|
|
153
|
+
# Bound max_err if not explicitly set
|
|
154
|
+
return unless !opts.key?(:max_errors) && !opts.key?(:max_cost)
|
|
155
|
+
|
|
156
|
+
params[:max_err] =
|
|
157
|
+
params[:max_ins] + params[:max_del] + params[:max_subst]
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def apply_costs(params, opts)
|
|
161
|
+
params[:cost_ins] = opts[:weight_insertion] if opts.key?(:weight_insertion)
|
|
162
|
+
params[:cost_del] = opts[:weight_deletion] if opts.key?(:weight_deletion)
|
|
163
|
+
params[:cost_subst] = opts[:weight_substitution] if opts.key?(:weight_substitution)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def prepare_match_data(pmatch)
|
|
167
|
+
Native::RegAMatch.new.tap do |m|
|
|
168
|
+
m[:nmatch] = 1
|
|
169
|
+
m[:pmatch] = pmatch
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def parse_result(text, match_data, pmatch)
|
|
174
|
+
rm = Native::RegMatch.new(pmatch)
|
|
175
|
+
byte_match = text.byteslice(rm[:rm_so]...rm[:rm_eo])
|
|
176
|
+
char_start = text.byteslice(0...rm[:rm_so]).length
|
|
177
|
+
|
|
178
|
+
{
|
|
179
|
+
match: byte_match,
|
|
180
|
+
index: char_start,
|
|
181
|
+
end_index: char_start + byte_match.length,
|
|
182
|
+
cost: match_data[:cost],
|
|
183
|
+
errors: {
|
|
184
|
+
insertions: match_data[:num_ins],
|
|
185
|
+
deletions: match_data[:num_del],
|
|
186
|
+
substitutions: match_data[:num_subst]
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: tre_regex
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.1
|
|
5
|
+
platform: x86_64-linux-musl
|
|
6
|
+
authors:
|
|
7
|
+
- Oleksii Vasyliev
|
|
8
|
+
bindir: exe
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: ffi
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '1.0'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '1.0'
|
|
26
|
+
description: TreRegex provides a high-performance Ruby interface to the TRE C library
|
|
27
|
+
using FFI. It brings robust approximate (fuzzy) regular expression matching to Ruby,
|
|
28
|
+
featuring multi-byte Unicode string safety, granular error limits, and precompiled
|
|
29
|
+
cross-platform native binaries
|
|
30
|
+
email:
|
|
31
|
+
- leopard.not.a@gmail.com
|
|
32
|
+
executables: []
|
|
33
|
+
extensions: []
|
|
34
|
+
extra_rdoc_files: []
|
|
35
|
+
files:
|
|
36
|
+
- ext/tre_regex/extconf.rb
|
|
37
|
+
- ext/tre_regex/tre_regex.c
|
|
38
|
+
- lib/tre_regex.rb
|
|
39
|
+
- lib/tre_regex/3.3/tre_regex.so
|
|
40
|
+
- lib/tre_regex/3.4/tre_regex.so
|
|
41
|
+
- lib/tre_regex/4.0/tre_regex.so
|
|
42
|
+
- lib/tre_regex/bin/libtre.so
|
|
43
|
+
- lib/tre_regex/version.rb
|
|
44
|
+
homepage: https://github.com/le0pard/tre_regex
|
|
45
|
+
licenses:
|
|
46
|
+
- MIT
|
|
47
|
+
metadata:
|
|
48
|
+
homepage_uri: https://github.com/le0pard/tre_regex
|
|
49
|
+
source_code_uri: https://github.com/le0pard/tre_regex
|
|
50
|
+
changelog_uri: https://github.com/le0pard/tre_regex/releases
|
|
51
|
+
bug_tracker_uri: https://github.com/le0pard/tre_regex/issues
|
|
52
|
+
documentation_uri: https://github.com/le0pard/tre_regex/blob/main/README.md
|
|
53
|
+
rubygems_mfa_required: 'true'
|
|
54
|
+
rdoc_options: []
|
|
55
|
+
require_paths:
|
|
56
|
+
- lib
|
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - ">="
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '3.3'
|
|
62
|
+
- - "<"
|
|
63
|
+
- !ruby/object:Gem::Version
|
|
64
|
+
version: 4.1.dev
|
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
66
|
+
requirements:
|
|
67
|
+
- - ">="
|
|
68
|
+
- !ruby/object:Gem::Version
|
|
69
|
+
version: 3.3.22
|
|
70
|
+
requirements: []
|
|
71
|
+
rubygems_version: 4.0.6
|
|
72
|
+
specification_version: 4
|
|
73
|
+
summary: A fast Ruby FFI wrapper for the TRE approximate regex matching library.
|
|
74
|
+
test_files: []
|