pcre2 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/tests.yml +21 -0
- data/.gitignore +12 -0
- data/.rspec +3 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +67 -0
- data/Rakefile +8 -0
- data/benchmark.rake +90 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/pcre2.rb +11 -0
- data/lib/pcre2/error.rb +9 -0
- data/lib/pcre2/lib.rb +168 -0
- data/lib/pcre2/lib/constants.rb +349 -0
- data/lib/pcre2/matchdata.rb +41 -0
- data/lib/pcre2/regexp.rb +35 -0
- data/lib/pcre2/version.rb +3 -0
- data/pcre2.gemspec +29 -0
- metadata +79 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3af91ee4c80035897edf206316fbc0d3db890a04af6e8443ef6c2449f4d2c4ab
|
4
|
+
data.tar.gz: ac7380e81492952a72a5ccd7b20a704f673e11645eb32b258216db40b1fa6cad
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 32f765faedfbaeb55e3b63572d13546d0afb7fb69f2a1cc102cf9f2c393c2a3e957be61a187cf5f7744c0e8f2d63655e281b932fecf26058c754c450aa1d8ef5
|
7
|
+
data.tar.gz: 590060108d1f0f68d945a9753372936cf1bc46b4efa4e53544e4b13fde0e7ecd49b0b7a4aae1f9858bc734b636b29e4fd0622ca60cb1648f838ee136e7cfb22a
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# Setup Ruby, install gems, cache gems, and run test suite
|
2
|
+
# - https://github.com/ruby/setup-ruby
|
3
|
+
|
4
|
+
name: Tests
|
5
|
+
on: [push, pull_request]
|
6
|
+
|
7
|
+
jobs:
|
8
|
+
test:
|
9
|
+
runs-on: ubuntu-latest
|
10
|
+
strategy:
|
11
|
+
matrix:
|
12
|
+
ruby: [ '2.5', '2.6' ]
|
13
|
+
name: Ruby ${{ matrix.ruby }} tests
|
14
|
+
steps:
|
15
|
+
- uses: actions/checkout@v2
|
16
|
+
- uses: ruby/setup-ruby@v1
|
17
|
+
with:
|
18
|
+
bundler-cache: true
|
19
|
+
ruby-version: ${{ matrix.ruby }}
|
20
|
+
- run: bundle install
|
21
|
+
- run: bundle exec rake -t
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2020 David Verhasselt
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
# PCRE2
|
2
|
+
|
3
|
+
This library provides a Ruby interface for the PCRE2 library, which supports more advanced regular expression functionality than the built-in Ruby `Regexp`.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Install the PCRE2 library:
|
8
|
+
|
9
|
+
```bash
|
10
|
+
brew install pcre2
|
11
|
+
```
|
12
|
+
|
13
|
+
Add this line to your application's Gemfile:
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
gem 'pcre2'
|
17
|
+
```
|
18
|
+
|
19
|
+
And then execute:
|
20
|
+
|
21
|
+
$ bundle install
|
22
|
+
|
23
|
+
Or install it yourself as:
|
24
|
+
|
25
|
+
$ gem install pcre2
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
`PCRE2::Regexp` aims to act as much like Ruby's `Regexp` as possible. It has implemented a subset of the `Regexp` and `MatchData` APIs so it can be used as a drop-in replacement.
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
regexp = PCRE2::Regexp.new("hello")
|
33
|
+
subject = "well hello there!"
|
34
|
+
matchdata = regexp.match(subject)
|
35
|
+
|
36
|
+
matchdata.offset(0) # [5, 10] - start and end of the match
|
37
|
+
matchdata[0] # => "hello"
|
38
|
+
|
39
|
+
matchdata = regexp.match(subject, 11) # find next match
|
40
|
+
```
|
41
|
+
|
42
|
+
## Benchmark
|
43
|
+
|
44
|
+
You can run the benchmark that compares `PCRE2::Regexp` with Ruby's built-in `Regexp` as follows:
|
45
|
+
|
46
|
+
```bash
|
47
|
+
bundle exec rake benchmark
|
48
|
+
```
|
49
|
+
|
50
|
+
## Resources
|
51
|
+
|
52
|
+
- [PCRE2 Library](https://www.pcre.org/current/doc/html/)
|
53
|
+
- [PCRE2 demo](https://www.pcre.org/current/doc/html/pcre2demo.html)
|
54
|
+
|
55
|
+
## Development
|
56
|
+
|
57
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
58
|
+
|
59
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
60
|
+
|
61
|
+
## Contributing
|
62
|
+
|
63
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/dv/pcre2.
|
64
|
+
|
65
|
+
## License
|
66
|
+
|
67
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/benchmark.rake
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
require "benchmark"
|
2
|
+
require "pcre2"
|
3
|
+
|
4
|
+
desc "Run a benchmark to compare PCRE2 vs Ruby's built-in Regexp"
|
5
|
+
task :benchmark do
|
6
|
+
def benchmark!(pattern, string)
|
7
|
+
task = ->(re) {
|
8
|
+
pos = 0
|
9
|
+
|
10
|
+
while matchdata = re.match(string, pos)
|
11
|
+
pos = matchdata.offset(0)[1] + 1
|
12
|
+
end
|
13
|
+
}
|
14
|
+
|
15
|
+
GC.disable
|
16
|
+
Benchmark.bmbm do |benchmark|
|
17
|
+
ruby_re = Regexp.new(pattern)
|
18
|
+
pcre2_re = PCRE2::Regexp.new(pattern)
|
19
|
+
pcre2_re_jit = PCRE2::Regexp.new(pattern).tap(&:jit!)
|
20
|
+
|
21
|
+
benchmark.report("Ruby Regexp") do
|
22
|
+
100000.times { task.call(ruby_re) }
|
23
|
+
end
|
24
|
+
|
25
|
+
GC.start
|
26
|
+
|
27
|
+
benchmark.report("PCRE2 Regexp") do
|
28
|
+
100000.times { task.call(pcre2_re) }
|
29
|
+
end
|
30
|
+
|
31
|
+
GC.start
|
32
|
+
|
33
|
+
benchmark.report("PCRE2 Regexp - JIT enhanced") do
|
34
|
+
100000.times { task.call(pcre2_re_jit) }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
GC.enable
|
38
|
+
|
39
|
+
puts
|
40
|
+
puts
|
41
|
+
puts
|
42
|
+
end
|
43
|
+
|
44
|
+
puts "Benchmark 1: Small pattern, big string"
|
45
|
+
puts
|
46
|
+
|
47
|
+
pattern = "hello"
|
48
|
+
string = "abab" * 1000
|
49
|
+
string += "hello"
|
50
|
+
string += "abab" * 1000
|
51
|
+
|
52
|
+
benchmark!(pattern, string)
|
53
|
+
|
54
|
+
|
55
|
+
puts "Benchmark 2: Big pattern, big string"
|
56
|
+
puts
|
57
|
+
|
58
|
+
pattern = "hello" * 50
|
59
|
+
string = "abab" * 1000
|
60
|
+
string += "hello"
|
61
|
+
string += "abab" * 1000
|
62
|
+
string += pattern
|
63
|
+
string += "abab" * 1000
|
64
|
+
|
65
|
+
benchmark!(pattern, string)
|
66
|
+
|
67
|
+
|
68
|
+
puts "Benchmark 3: Small pattern, small string"
|
69
|
+
puts
|
70
|
+
|
71
|
+
pattern = "hello"
|
72
|
+
string = "abababab" + "hello" + "abababab"
|
73
|
+
|
74
|
+
benchmark!(pattern, string)
|
75
|
+
|
76
|
+
|
77
|
+
puts "Benchmark 3: Multiple matches"
|
78
|
+
puts
|
79
|
+
|
80
|
+
pattern = "hello"
|
81
|
+
string = ""
|
82
|
+
|
83
|
+
20.times do
|
84
|
+
string += "abab" * 5
|
85
|
+
string += "hello"
|
86
|
+
string += "abab" * 5
|
87
|
+
end
|
88
|
+
|
89
|
+
benchmark!(pattern, string)
|
90
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "pcre2"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/lib/pcre2.rb
ADDED
data/lib/pcre2/error.rb
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
class PCRE2::Error < StandardError
|
2
|
+
def self.from_error_code(error_code, extra_message = nil)
|
3
|
+
message = "Error #{error_code}: "
|
4
|
+
message += PCRE2::Lib.get_error_message(error_code)
|
5
|
+
message += " - #{extra_message}" if extra_message
|
6
|
+
|
7
|
+
self.new(message)
|
8
|
+
end
|
9
|
+
end
|
data/lib/pcre2/lib.rb
ADDED
@@ -0,0 +1,168 @@
|
|
1
|
+
require "ffi"
|
2
|
+
|
3
|
+
module PCRE2::Lib
|
4
|
+
RETURN_CODE_NO_ERROR = 100
|
5
|
+
|
6
|
+
extend FFI::Library
|
7
|
+
|
8
|
+
ffi_lib 'pcre2-8' # Able to do 16 or 32 too
|
9
|
+
|
10
|
+
PCRE2_SIZE = typedef :size_t, :PCRE2_SIZE
|
11
|
+
PCRE2_SPTR = typedef :pointer, :PCRE2_SPTR
|
12
|
+
PCRE2_UCHAR8 = typedef :uint8_t, :PCRE2_UCHAR8
|
13
|
+
PCRE2_UCHAR16 = typedef :uint16_t, :PCRE2_UCHAR16
|
14
|
+
PCRE2_UCHAR32 = typedef :uint32_t, :PCRE2_UCHAR32
|
15
|
+
|
16
|
+
# For 8-bit PCRE
|
17
|
+
PCRE2_UCHAR = typedef :PCRE2_UCHAR8, :PCRE2_UCHAR
|
18
|
+
|
19
|
+
# int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, PCRE2_SIZE bufflen);
|
20
|
+
attach_function :pcre2_get_error_message_8, [ :int, :pointer, :PCRE2_SIZE ], :int
|
21
|
+
|
22
|
+
# pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext);
|
23
|
+
attach_function :pcre2_compile_8, [ :PCRE2_SPTR, :PCRE2_SIZE, :uint32_t, :pointer, :pointer, :pointer ], :pointer
|
24
|
+
attach_function :pcre2_code_free_8, [ :pointer ], :void
|
25
|
+
|
26
|
+
# int pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where);
|
27
|
+
attach_function :pcre2_pattern_info_8, [ :pointer, :uint32_t, :pointer ], :int
|
28
|
+
|
29
|
+
# pcre2_match_data *pcre2_match_data_create_from_pattern( const pcre2_code *code, pcre2_general_context *gcontext);
|
30
|
+
attach_function :pcre2_match_data_create_from_pattern_8, [ :pointer, :pointer ], :pointer
|
31
|
+
attach_function :pcre2_match_data_free_8, [ :pointer ], :void
|
32
|
+
|
33
|
+
# int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, pcre2_match_data *match_data, pcre2_match_context *mcontext);
|
34
|
+
attach_function :pcre2_match_8, [ :pointer, :PCRE2_SPTR, :PCRE2_SIZE, :PCRE2_SIZE, :uint32_t, :pointer, :pointer ], :int
|
35
|
+
|
36
|
+
attach_function :pcre2_get_ovector_count_8, [ :pointer ], :uint32_t
|
37
|
+
attach_function :pcre2_get_ovector_pointer_8, [ :pointer ], :pointer
|
38
|
+
|
39
|
+
# int pcre2_jit_compile(pcre2_code *code, uint32_t options)
|
40
|
+
attach_function :pcre2_jit_compile_8, [ :pointer, :uint32_t ], :int
|
41
|
+
|
42
|
+
|
43
|
+
def self.get_error_message(error_code)
|
44
|
+
if error_code.kind_of?(FFI::MemoryPointer)
|
45
|
+
error_code = error_code.read_int
|
46
|
+
end
|
47
|
+
|
48
|
+
buffer = FFI::MemoryPointer.new(PCRE2_UCHAR, 120)
|
49
|
+
result = pcre2_get_error_message_8(error_code, buffer, buffer.size)
|
50
|
+
|
51
|
+
case result
|
52
|
+
when PCRE2::PCRE2_ERROR_BADDATA
|
53
|
+
"Error number #{error_code} unknown"
|
54
|
+
when PCRE2::PCRE2_ERROR_NOMEMORY
|
55
|
+
raise PCRE2::Error, "Buffer of #{buffer.size} is not large enough to contain message"
|
56
|
+
else
|
57
|
+
buffer.read_string
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Some utility functions to help make the above more palatable
|
62
|
+
def self.compile_pattern(pattern, options = [])
|
63
|
+
pattern_string_ptr = FFI::MemoryPointer.from_string(pattern)
|
64
|
+
error_code_ptr = FFI::MemoryPointer.new(:int, 1)
|
65
|
+
error_offset_ptr = FFI::MemoryPointer.new(PCRE2_SIZE, 1)
|
66
|
+
options = options.flatten.inject(0) { |memo, option| memo | option }
|
67
|
+
|
68
|
+
pattern_ptr = PCRE2::Lib.pcre2_compile_8(pattern_string_ptr, pattern.size, options, error_code_ptr, error_offset_ptr, nil)
|
69
|
+
|
70
|
+
if pattern_ptr.null?
|
71
|
+
error_code = error_code_ptr.read_int
|
72
|
+
error_offset = error_offset_ptr.read(PCRE2_SIZE)
|
73
|
+
|
74
|
+
raise PCRE2::Error.from_error_code(error_code, "while compiling pattern #{pattern} @ #{error_offset}")
|
75
|
+
end
|
76
|
+
|
77
|
+
FFI::AutoPointer.new(pattern_ptr, PCRE2::Lib.method(:pcre2_code_free_8))
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.create_match_data_for_pattern(pattern_ptr)
|
81
|
+
match_data_ptr = PCRE2::Lib.pcre2_match_data_create_from_pattern_8(pattern_ptr, nil)
|
82
|
+
FFI::AutoPointer.new(match_data_ptr, PCRE2::Lib.method(:pcre2_match_data_free_8))
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.match(pattern_ptr, body, position: 0, match_data_ptr: nil)
|
86
|
+
position ||= 0
|
87
|
+
match_data_ptr ||= create_match_data_for_pattern(pattern_ptr)
|
88
|
+
|
89
|
+
body_ptr = FFI::MemoryPointer.from_string(body)
|
90
|
+
|
91
|
+
return_code =
|
92
|
+
PCRE2::Lib.pcre2_match_8(
|
93
|
+
pattern_ptr,
|
94
|
+
body_ptr,
|
95
|
+
body_ptr.size,
|
96
|
+
position,
|
97
|
+
0,
|
98
|
+
match_data_ptr,
|
99
|
+
nil
|
100
|
+
)
|
101
|
+
|
102
|
+
case return_code
|
103
|
+
when 0
|
104
|
+
raise PCRE2::Error, "Not enough memory in MatchData to store all captures"
|
105
|
+
when PCRE2::PCRE2_ERROR_NOMATCH
|
106
|
+
result_count = 0
|
107
|
+
else
|
108
|
+
if return_code < 0
|
109
|
+
raise PCRE2::Error.from_error_code(return_code)
|
110
|
+
else
|
111
|
+
result_count = return_code
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
[result_count, match_data_ptr]
|
116
|
+
end
|
117
|
+
|
118
|
+
def self.get_ovector_pairs(match_data_ptr, pair_count)
|
119
|
+
if pair_count.nil?
|
120
|
+
pair_count = PCRE2::Lib.pcre2_get_ovector_count_8(match_data_ptr)
|
121
|
+
end
|
122
|
+
|
123
|
+
ovector_ptr = PCRE2::Lib.pcre2_get_ovector_pointer_8(match_data_ptr)
|
124
|
+
type_size = FFI.type_size(:size_t)
|
125
|
+
|
126
|
+
pair_count.times.map do |i|
|
127
|
+
[
|
128
|
+
ovector_ptr.get(:size_t, i*2 * type_size),
|
129
|
+
ovector_ptr.get(:size_t, (i*2+1) * type_size)
|
130
|
+
]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def self.named_captures(pattern_ptr)
|
135
|
+
named_captures_count = FFI::MemoryPointer.new(:uint32_t, 1)
|
136
|
+
name_entry_size = FFI::MemoryPointer.new(:uint32_t, 1)
|
137
|
+
name_table_ptr = FFI::MemoryPointer.new(:pointer, 1)
|
138
|
+
|
139
|
+
if PCRE2::Lib.pcre2_pattern_info_8(pattern_ptr, PCRE2::PCRE2_INFO_NAMECOUNT, named_captures_count) != 0
|
140
|
+
raise "Something went wrong"
|
141
|
+
end
|
142
|
+
|
143
|
+
if PCRE2::Lib.pcre2_pattern_info_8(pattern_ptr, PCRE2::PCRE2_INFO_NAMEENTRYSIZE, name_entry_size) != 0
|
144
|
+
raise "Something went wrong"
|
145
|
+
end
|
146
|
+
|
147
|
+
if PCRE2::Lib.pcre2_pattern_info_8(pattern_ptr, PCRE2::PCRE2_INFO_NAMETABLE, name_table_ptr) != 0
|
148
|
+
raise "Something went wrong"
|
149
|
+
end
|
150
|
+
|
151
|
+
named_captures_count = named_captures_count.read_uint32
|
152
|
+
name_entry_size = name_entry_size.read_uint32
|
153
|
+
name_table_ptr = name_table_ptr.read_pointer
|
154
|
+
|
155
|
+
names_and_positions =
|
156
|
+
named_captures_count.times.map do |i|
|
157
|
+
ovector_position = (name_table_ptr.get_int8(0) << 8) + name_table_ptr.get_int8(1)
|
158
|
+
match_name = (name_table_ptr+2).read_string_to_null
|
159
|
+
|
160
|
+
name_table_ptr += name_entry_size
|
161
|
+
|
162
|
+
[match_name, ovector_position]
|
163
|
+
end
|
164
|
+
|
165
|
+
# Convert an array of [name, position] into a Hash of name => [position (, position, ...)], with possible duplicate names
|
166
|
+
names_and_positions.each_with_object(Hash.new {[]} ) { |(name, position), hash| hash[name] <<= position }
|
167
|
+
end
|
168
|
+
end
|
@@ -0,0 +1,349 @@
|
|
1
|
+
#
|
2
|
+
# Use replace:
|
3
|
+
#
|
4
|
+
# "#define ([^\W]+) \W* (.*)/"" -> "\1 = \2"
|
5
|
+
# "(0x[^u]+)u" -> "\1"
|
6
|
+
#
|
7
|
+
module PCRE2
|
8
|
+
# The following option bits can be passed to pcre2_compile(), pcre2_match(),
|
9
|
+
# or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
|
10
|
+
# is passed. Put these bits at the most significant end of the options word so
|
11
|
+
# others can be added next to them
|
12
|
+
|
13
|
+
PCRE2_ANCHORED = 0x80000000
|
14
|
+
PCRE2_NO_UTF_CHECK = 0x40000000
|
15
|
+
PCRE2_ENDANCHORED = 0x20000000
|
16
|
+
|
17
|
+
# The following option bits can be passed only to pcre2_compile(). However,
|
18
|
+
# they may affect compilation, JIT compilation, and/or interpretive execution.
|
19
|
+
# The following tags indicate which:
|
20
|
+
# C alters what is compiled by pcre2_compile()
|
21
|
+
# J alters what is compiled by pcre2_jit_compile()
|
22
|
+
# M is inspected during pcre2_match() execution
|
23
|
+
# D is inspected during pcre2_dfa_match() execution
|
24
|
+
|
25
|
+
PCRE2_ALLOW_EMPTY_CLASS = 0x00000001 # C
|
26
|
+
PCRE2_ALT_BSUX = 0x00000002 # C
|
27
|
+
PCRE2_AUTO_CALLOUT = 0x00000004 # C
|
28
|
+
PCRE2_CASELESS = 0x00000008 # C
|
29
|
+
PCRE2_DOLLAR_ENDONLY = 0x00000010 # J M D
|
30
|
+
PCRE2_DOTALL = 0x00000020 # C
|
31
|
+
PCRE2_DUPNAMES = 0x00000040 # C
|
32
|
+
PCRE2_EXTENDED = 0x00000080 # C
|
33
|
+
PCRE2_FIRSTLINE = 0x00000100 # J M D
|
34
|
+
PCRE2_MATCH_UNSET_BACKREF = 0x00000200 # C J M
|
35
|
+
PCRE2_MULTILINE = 0x00000400 # C
|
36
|
+
PCRE2_NEVER_UCP = 0x00000800 # C
|
37
|
+
PCRE2_NEVER_UTF = 0x00001000 # C
|
38
|
+
PCRE2_NO_AUTO_CAPTURE = 0x00002000 # C
|
39
|
+
PCRE2_NO_AUTO_POSSESS = 0x00004000 # C
|
40
|
+
PCRE2_NO_DOTSTAR_ANCHOR = 0x00008000 # C
|
41
|
+
PCRE2_NO_START_OPTIMIZE = 0x00010000 # J M D
|
42
|
+
PCRE2_UCP = 0x00020000 # C J M D
|
43
|
+
PCRE2_UNGREEDY = 0x00040000 # C
|
44
|
+
PCRE2_UTF = 0x00080000 # C J M D
|
45
|
+
PCRE2_NEVER_BACKSLASH_C = 0x00100000 # C
|
46
|
+
PCRE2_ALT_CIRCUMFLEX = 0x00200000 # J M D
|
47
|
+
PCRE2_ALT_VERBNAMES = 0x00400000 # C
|
48
|
+
PCRE2_USE_OFFSET_LIMIT = 0x00800000 # J M D
|
49
|
+
PCRE2_EXTENDED_MORE = 0x01000000 # C
|
50
|
+
PCRE2_LITERAL = 0x02000000 # C
|
51
|
+
|
52
|
+
# An additional compile options word is available in the compile context.
|
53
|
+
|
54
|
+
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES = 0x00000001 # C
|
55
|
+
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL = 0x00000002 # C
|
56
|
+
PCRE2_EXTRA_MATCH_WORD = 0x00000004 # C
|
57
|
+
PCRE2_EXTRA_MATCH_LINE = 0x00000008 # C
|
58
|
+
PCRE2_EXTRA_ESCAPED_CR_IS_LF = 0x00000010 # C
|
59
|
+
PCRE2_EXTRA_ALT_BSUX = 0x00000020 # C
|
60
|
+
|
61
|
+
# These are for pcre2_jit_compile().
|
62
|
+
|
63
|
+
PCRE2_JIT_COMPLETE = 0x00000001 # For full matching
|
64
|
+
PCRE2_JIT_PARTIAL_SOFT = 0x00000002
|
65
|
+
PCRE2_JIT_PARTIAL_HARD = 0x00000004
|
66
|
+
PCRE2_JIT_INVALID_UTF = 0x00000100
|
67
|
+
|
68
|
+
# These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
|
69
|
+
# pcre2_substitute(). Some are allowed only for one of the functions, and in
|
70
|
+
# these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and
|
71
|
+
# PCRE2_NO_UTF_CHECK can also be passed to these functions (though
|
72
|
+
# pcre2_jit_match() ignores the latter since it bypasses all sanity checks).
|
73
|
+
|
74
|
+
PCRE2_NOTBOL = 0x00000001
|
75
|
+
PCRE2_NOTEOL = 0x00000002
|
76
|
+
PCRE2_NOTEMPTY = 0x00000004 # ) These two must be kept
|
77
|
+
PCRE2_NOTEMPTY_ATSTART = 0x00000008 # ) adjacent to each other.
|
78
|
+
PCRE2_PARTIAL_SOFT = 0x00000010
|
79
|
+
PCRE2_PARTIAL_HARD = 0x00000020
|
80
|
+
PCRE2_DFA_RESTART = 0x00000040 # pcre2_dfa_match() only
|
81
|
+
PCRE2_DFA_SHORTEST = 0x00000080 # pcre2_dfa_match() only
|
82
|
+
PCRE2_SUBSTITUTE_GLOBAL = 0x00000100 # pcre2_substitute() only
|
83
|
+
PCRE2_SUBSTITUTE_EXTENDED = 0x00000200 # pcre2_substitute() only
|
84
|
+
PCRE2_SUBSTITUTE_UNSET_EMPTY = 0x00000400 # pcre2_substitute() only
|
85
|
+
PCRE2_SUBSTITUTE_UNKNOWN_UNSET = 0x00000800 # pcre2_substitute() only
|
86
|
+
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH = 0x00001000 # pcre2_substitute() only
|
87
|
+
PCRE2_NO_JIT = 0x00002000 # Not for pcre2_dfa_match()
|
88
|
+
PCRE2_COPY_MATCHED_SUBJECT = 0x00004000
|
89
|
+
|
90
|
+
# Options for pcre2_pattern_convert().
|
91
|
+
|
92
|
+
PCRE2_CONVERT_UTF = 0x00000001
|
93
|
+
PCRE2_CONVERT_NO_UTF_CHECK = 0x00000002
|
94
|
+
PCRE2_CONVERT_POSIX_BASIC = 0x00000004
|
95
|
+
PCRE2_CONVERT_POSIX_EXTENDED = 0x00000008
|
96
|
+
PCRE2_CONVERT_GLOB = 0x00000010
|
97
|
+
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR = 0x00000030
|
98
|
+
PCRE2_CONVERT_GLOB_NO_STARSTAR = 0x00000050
|
99
|
+
|
100
|
+
# Newline and \R settings, for use in compile contexts. The newline values
|
101
|
+
# must be kept in step with values set in config.h and both sets must all be
|
102
|
+
# greater than zero.
|
103
|
+
|
104
|
+
PCRE2_NEWLINE_CR = 1
|
105
|
+
PCRE2_NEWLINE_LF = 2
|
106
|
+
PCRE2_NEWLINE_CRLF = 3
|
107
|
+
PCRE2_NEWLINE_ANY = 4
|
108
|
+
PCRE2_NEWLINE_ANYCRLF = 5
|
109
|
+
PCRE2_NEWLINE_NUL = 6
|
110
|
+
PCRE2_BSR_UNICODE = 1
|
111
|
+
PCRE2_BSR_ANYCRLF = 2
|
112
|
+
|
113
|
+
# Error codes for pcre2_compile(). Some of these are also used by
|
114
|
+
# pcre2_pattern_convert().
|
115
|
+
|
116
|
+
PCRE2_ERROR_END_BACKSLASH = 101
|
117
|
+
PCRE2_ERROR_END_BACKSLASH_C = 102
|
118
|
+
PCRE2_ERROR_UNKNOWN_ESCAPE = 103
|
119
|
+
PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER = 104
|
120
|
+
PCRE2_ERROR_QUANTIFIER_TOO_BIG = 105
|
121
|
+
PCRE2_ERROR_MISSING_SQUARE_BRACKET = 106
|
122
|
+
PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS = 107
|
123
|
+
PCRE2_ERROR_CLASS_RANGE_ORDER = 108
|
124
|
+
PCRE2_ERROR_QUANTIFIER_INVALID = 109
|
125
|
+
PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT = 110
|
126
|
+
PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY = 111
|
127
|
+
PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS = 112
|
128
|
+
PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING = 113
|
129
|
+
PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS = 114
|
130
|
+
PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE = 115
|
131
|
+
PCRE2_ERROR_NULL_PATTERN = 116
|
132
|
+
PCRE2_ERROR_BAD_OPTIONS = 117
|
133
|
+
PCRE2_ERROR_MISSING_COMMENT_CLOSING = 118
|
134
|
+
PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP = 119
|
135
|
+
PCRE2_ERROR_PATTERN_TOO_LARGE = 120
|
136
|
+
PCRE2_ERROR_HEAP_FAILED = 121
|
137
|
+
PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS = 122
|
138
|
+
PCRE2_ERROR_INTERNAL_CODE_OVERFLOW = 123
|
139
|
+
PCRE2_ERROR_MISSING_CONDITION_CLOSING = 124
|
140
|
+
PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH = 125
|
141
|
+
PCRE2_ERROR_ZERO_RELATIVE_REFERENCE = 126
|
142
|
+
PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES = 127
|
143
|
+
PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED = 128
|
144
|
+
PCRE2_ERROR_BAD_RELATIVE_REFERENCE = 129
|
145
|
+
PCRE2_ERROR_UNKNOWN_POSIX_CLASS = 130
|
146
|
+
PCRE2_ERROR_INTERNAL_STUDY_ERROR = 131
|
147
|
+
PCRE2_ERROR_UNICODE_NOT_SUPPORTED = 132
|
148
|
+
PCRE2_ERROR_PARENTHESES_STACK_CHECK = 133
|
149
|
+
PCRE2_ERROR_CODE_POINT_TOO_BIG = 134
|
150
|
+
PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED = 135
|
151
|
+
PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C = 136
|
152
|
+
PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE = 137
|
153
|
+
PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG = 138
|
154
|
+
PCRE2_ERROR_MISSING_CALLOUT_CLOSING = 139
|
155
|
+
PCRE2_ERROR_ESCAPE_INVALID_IN_VERB = 140
|
156
|
+
PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P = 141
|
157
|
+
PCRE2_ERROR_MISSING_NAME_TERMINATOR = 142
|
158
|
+
PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME = 143
|
159
|
+
PCRE2_ERROR_INVALID_SUBPATTERN_NAME = 144
|
160
|
+
PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE = 145
|
161
|
+
PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY = 146
|
162
|
+
PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY = 147
|
163
|
+
PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG = 148
|
164
|
+
PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS = 149
|
165
|
+
PCRE2_ERROR_CLASS_INVALID_RANGE = 150
|
166
|
+
PCRE2_ERROR_OCTAL_BYTE_TOO_BIG = 151
|
167
|
+
PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE = 152
|
168
|
+
PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN = 153
|
169
|
+
PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES = 154
|
170
|
+
PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE = 155
|
171
|
+
PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE = 156
|
172
|
+
PCRE2_ERROR_BACKSLASH_G_SYNTAX = 157
|
173
|
+
PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING = 158
|
174
|
+
|
175
|
+
# Error 159 is obsolete and should now never occur
|
176
|
+
|
177
|
+
PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED = 159
|
178
|
+
PCRE2_ERROR_VERB_UNKNOWN = 160
|
179
|
+
PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG = 161
|
180
|
+
PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED = 162
|
181
|
+
PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW = 163
|
182
|
+
PCRE2_ERROR_INVALID_OCTAL = 164
|
183
|
+
PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH = 165
|
184
|
+
PCRE2_ERROR_MARK_MISSING_ARGUMENT = 166
|
185
|
+
PCRE2_ERROR_INVALID_HEXADECIMAL = 167
|
186
|
+
PCRE2_ERROR_BACKSLASH_C_SYNTAX = 168
|
187
|
+
PCRE2_ERROR_BACKSLASH_K_SYNTAX = 169
|
188
|
+
PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS = 170
|
189
|
+
PCRE2_ERROR_BACKSLASH_N_IN_CLASS = 171
|
190
|
+
PCRE2_ERROR_CALLOUT_STRING_TOO_LONG = 172
|
191
|
+
PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT = 173
|
192
|
+
PCRE2_ERROR_UTF_IS_DISABLED = 174
|
193
|
+
PCRE2_ERROR_UCP_IS_DISABLED = 175
|
194
|
+
PCRE2_ERROR_VERB_NAME_TOO_LONG = 176
|
195
|
+
PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG = 177
|
196
|
+
PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS = 178
|
197
|
+
PCRE2_ERROR_VERSION_CONDITION_SYNTAX = 179
|
198
|
+
PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS = 180
|
199
|
+
PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER = 181
|
200
|
+
PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER = 182
|
201
|
+
PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED = 183
|
202
|
+
PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP = 184
|
203
|
+
PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED = 185
|
204
|
+
PCRE2_ERROR_PATTERN_TOO_COMPLICATED = 186
|
205
|
+
PCRE2_ERROR_LOOKBEHIND_TOO_LONG = 187
|
206
|
+
PCRE2_ERROR_PATTERN_STRING_TOO_LONG = 188
|
207
|
+
PCRE2_ERROR_INTERNAL_BAD_CODE = 189
|
208
|
+
PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP = 190
|
209
|
+
PCRE2_ERROR_NO_SURROGATES_IN_UTF16 = 191
|
210
|
+
PCRE2_ERROR_BAD_LITERAL_OPTIONS = 192
|
211
|
+
PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE = 193
|
212
|
+
PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS = 194
|
213
|
+
PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN = 195
|
214
|
+
PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE = 196
|
215
|
+
|
216
|
+
# "Expected" matching error codes: no match and partial match.
|
217
|
+
|
218
|
+
PCRE2_ERROR_NOMATCH = (-1)
|
219
|
+
PCRE2_ERROR_PARTIAL = (-2)
|
220
|
+
|
221
|
+
# Error codes for UTF-8 validity checks
|
222
|
+
|
223
|
+
PCRE2_ERROR_UTF8_ERR1 = (-3)
|
224
|
+
PCRE2_ERROR_UTF8_ERR2 = (-4)
|
225
|
+
PCRE2_ERROR_UTF8_ERR3 = (-5)
|
226
|
+
PCRE2_ERROR_UTF8_ERR4 = (-6)
|
227
|
+
PCRE2_ERROR_UTF8_ERR5 = (-7)
|
228
|
+
PCRE2_ERROR_UTF8_ERR6 = (-8)
|
229
|
+
PCRE2_ERROR_UTF8_ERR7 = (-9)
|
230
|
+
PCRE2_ERROR_UTF8_ERR8 = (-10)
|
231
|
+
PCRE2_ERROR_UTF8_ERR9 = (-11)
|
232
|
+
PCRE2_ERROR_UTF8_ERR10 = (-12)
|
233
|
+
PCRE2_ERROR_UTF8_ERR11 = (-13)
|
234
|
+
PCRE2_ERROR_UTF8_ERR12 = (-14)
|
235
|
+
PCRE2_ERROR_UTF8_ERR13 = (-15)
|
236
|
+
PCRE2_ERROR_UTF8_ERR14 = (-16)
|
237
|
+
PCRE2_ERROR_UTF8_ERR15 = (-17)
|
238
|
+
PCRE2_ERROR_UTF8_ERR16 = (-18)
|
239
|
+
PCRE2_ERROR_UTF8_ERR17 = (-19)
|
240
|
+
PCRE2_ERROR_UTF8_ERR18 = (-20)
|
241
|
+
PCRE2_ERROR_UTF8_ERR19 = (-21)
|
242
|
+
PCRE2_ERROR_UTF8_ERR20 = (-22)
|
243
|
+
PCRE2_ERROR_UTF8_ERR21 = (-23)
|
244
|
+
|
245
|
+
# Error codes for UTF-16 validity checks
|
246
|
+
|
247
|
+
PCRE2_ERROR_UTF16_ERR1 = (-24)
|
248
|
+
PCRE2_ERROR_UTF16_ERR2 = (-25)
|
249
|
+
PCRE2_ERROR_UTF16_ERR3 = (-26)
|
250
|
+
|
251
|
+
# Error codes for UTF-32 validity checks
|
252
|
+
|
253
|
+
PCRE2_ERROR_UTF32_ERR1 = (-27)
|
254
|
+
PCRE2_ERROR_UTF32_ERR2 = (-28)
|
255
|
+
|
256
|
+
# Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction
|
257
|
+
# functions, context functions, and serializing functions. They are in numerical
|
258
|
+
# order. Originally they were in alphabetical order too, but now that PCRE2 is
|
259
|
+
# released, the numbers must not be changed.
|
260
|
+
|
261
|
+
PCRE2_ERROR_BADDATA = (-29)
|
262
|
+
PCRE2_ERROR_MIXEDTABLES = (-30) # Name was changed
|
263
|
+
PCRE2_ERROR_BADMAGIC = (-31)
|
264
|
+
PCRE2_ERROR_BADMODE = (-32)
|
265
|
+
PCRE2_ERROR_BADOFFSET = (-33)
|
266
|
+
PCRE2_ERROR_BADOPTION = (-34)
|
267
|
+
PCRE2_ERROR_BADREPLACEMENT = (-35)
|
268
|
+
PCRE2_ERROR_BADUTFOFFSET = (-36)
|
269
|
+
PCRE2_ERROR_CALLOUT = (-37) # Never used by PCRE2 itself
|
270
|
+
PCRE2_ERROR_DFA_BADRESTART = (-38)
|
271
|
+
PCRE2_ERROR_DFA_RECURSE = (-39)
|
272
|
+
PCRE2_ERROR_DFA_UCOND = (-40)
|
273
|
+
PCRE2_ERROR_DFA_UFUNC = (-41)
|
274
|
+
PCRE2_ERROR_DFA_UITEM = (-42)
|
275
|
+
PCRE2_ERROR_DFA_WSSIZE = (-43)
|
276
|
+
PCRE2_ERROR_INTERNAL = (-44)
|
277
|
+
PCRE2_ERROR_JIT_BADOPTION = (-45)
|
278
|
+
PCRE2_ERROR_JIT_STACKLIMIT = (-46)
|
279
|
+
PCRE2_ERROR_MATCHLIMIT = (-47)
|
280
|
+
PCRE2_ERROR_NOMEMORY = (-48)
|
281
|
+
PCRE2_ERROR_NOSUBSTRING = (-49)
|
282
|
+
PCRE2_ERROR_NOUNIQUESUBSTRING = (-50)
|
283
|
+
PCRE2_ERROR_NULL = (-51)
|
284
|
+
PCRE2_ERROR_RECURSELOOP = (-52)
|
285
|
+
PCRE2_ERROR_DEPTHLIMIT = (-53)
|
286
|
+
PCRE2_ERROR_RECURSIONLIMIT = (-53) # Obsolete synonym
|
287
|
+
PCRE2_ERROR_UNAVAILABLE = (-54)
|
288
|
+
PCRE2_ERROR_UNSET = (-55)
|
289
|
+
PCRE2_ERROR_BADOFFSETLIMIT = (-56)
|
290
|
+
PCRE2_ERROR_BADREPESCAPE = (-57)
|
291
|
+
PCRE2_ERROR_REPMISSINGBRACE = (-58)
|
292
|
+
PCRE2_ERROR_BADSUBSTITUTION = (-59)
|
293
|
+
PCRE2_ERROR_BADSUBSPATTERN = (-60)
|
294
|
+
PCRE2_ERROR_TOOMANYREPLACE = (-61)
|
295
|
+
PCRE2_ERROR_BADSERIALIZEDDATA = (-62)
|
296
|
+
PCRE2_ERROR_HEAPLIMIT = (-63)
|
297
|
+
PCRE2_ERROR_CONVERT_SYNTAX = (-64)
|
298
|
+
PCRE2_ERROR_INTERNAL_DUPMATCH = (-65)
|
299
|
+
|
300
|
+
# Request types for pcre2_pattern_info()
|
301
|
+
|
302
|
+
PCRE2_INFO_ALLOPTIONS = 0
|
303
|
+
PCRE2_INFO_ARGOPTIONS = 1
|
304
|
+
PCRE2_INFO_BACKREFMAX = 2
|
305
|
+
PCRE2_INFO_BSR = 3
|
306
|
+
PCRE2_INFO_CAPTURECOUNT = 4
|
307
|
+
PCRE2_INFO_FIRSTCODEUNIT = 5
|
308
|
+
PCRE2_INFO_FIRSTCODETYPE = 6
|
309
|
+
PCRE2_INFO_FIRSTBITMAP = 7
|
310
|
+
PCRE2_INFO_HASCRORLF = 8
|
311
|
+
PCRE2_INFO_JCHANGED = 9
|
312
|
+
PCRE2_INFO_JITSIZE = 10
|
313
|
+
PCRE2_INFO_LASTCODEUNIT = 11
|
314
|
+
PCRE2_INFO_LASTCODETYPE = 12
|
315
|
+
PCRE2_INFO_MATCHEMPTY = 13
|
316
|
+
PCRE2_INFO_MATCHLIMIT = 14
|
317
|
+
PCRE2_INFO_MAXLOOKBEHIND = 15
|
318
|
+
PCRE2_INFO_MINLENGTH = 16
|
319
|
+
PCRE2_INFO_NAMECOUNT = 17
|
320
|
+
PCRE2_INFO_NAMEENTRYSIZE = 18
|
321
|
+
PCRE2_INFO_NAMETABLE = 19
|
322
|
+
PCRE2_INFO_NEWLINE = 20
|
323
|
+
PCRE2_INFO_DEPTHLIMIT = 21
|
324
|
+
PCRE2_INFO_RECURSIONLIMIT = 21 # Obsolete synonym
|
325
|
+
PCRE2_INFO_SIZE = 22
|
326
|
+
PCRE2_INFO_HASBACKSLASHC = 23
|
327
|
+
PCRE2_INFO_FRAMESIZE = 24
|
328
|
+
PCRE2_INFO_HEAPLIMIT = 25
|
329
|
+
PCRE2_INFO_EXTRAOPTIONS = 26
|
330
|
+
|
331
|
+
# Request types for pcre2_config().
|
332
|
+
|
333
|
+
PCRE2_CONFIG_BSR = 0
|
334
|
+
PCRE2_CONFIG_JIT = 1
|
335
|
+
PCRE2_CONFIG_JITTARGET = 2
|
336
|
+
PCRE2_CONFIG_LINKSIZE = 3
|
337
|
+
PCRE2_CONFIG_MATCHLIMIT = 4
|
338
|
+
PCRE2_CONFIG_NEWLINE = 5
|
339
|
+
PCRE2_CONFIG_PARENSLIMIT = 6
|
340
|
+
PCRE2_CONFIG_DEPTHLIMIT = 7
|
341
|
+
PCRE2_CONFIG_RECURSIONLIMIT = 7 # Obsolete synonym
|
342
|
+
PCRE2_CONFIG_STACKRECURSE = 8 # Obsolete
|
343
|
+
PCRE2_CONFIG_UNICODE = 9
|
344
|
+
PCRE2_CONFIG_UNICODE_VERSION = 10
|
345
|
+
PCRE2_CONFIG_VERSION = 11
|
346
|
+
PCRE2_CONFIG_HEAPLIMIT = 12
|
347
|
+
PCRE2_CONFIG_NEVER_BACKSLASH_C = 13
|
348
|
+
PCRE2_CONFIG_COMPILED_WIDTHS = 14
|
349
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
class PCRE2::MatchData
|
2
|
+
attr :regexp, :pairs, :string
|
3
|
+
|
4
|
+
def initialize(regexp, string, pairs)
|
5
|
+
@regexp = regexp
|
6
|
+
@string = string
|
7
|
+
@pairs = pairs
|
8
|
+
end
|
9
|
+
|
10
|
+
def [](key)
|
11
|
+
if !key.is_a?(Numeric)
|
12
|
+
key = regexp.named_captures[key.to_s].first
|
13
|
+
end
|
14
|
+
|
15
|
+
if pair = pairs[key]
|
16
|
+
string_from_pair(*pair)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def offset(n)
|
21
|
+
pairs[n]
|
22
|
+
end
|
23
|
+
|
24
|
+
def capture_pairs
|
25
|
+
pairs[1..-1]
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_a
|
29
|
+
pairs.map { |pair| string_from_pair(*pair) }
|
30
|
+
end
|
31
|
+
|
32
|
+
def captures
|
33
|
+
to_a[1..-1]
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def string_from_pair(start, ending)
|
39
|
+
string.slice(start, ending-start)
|
40
|
+
end
|
41
|
+
end
|
data/lib/pcre2/regexp.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
class PCRE2::Regexp
|
2
|
+
attr :source, :pattern_ptr
|
3
|
+
|
4
|
+
def initialize(pattern, *options)
|
5
|
+
@source = pattern
|
6
|
+
@pattern_ptr = PCRE2::Lib.compile_pattern(pattern, options)
|
7
|
+
end
|
8
|
+
|
9
|
+
# Compiles the Regexp into a JIT optimised version. Returns whether it was successful
|
10
|
+
def jit!
|
11
|
+
options = PCRE2::PCRE2_JIT_COMPLETE | PCRE2::PCRE2_JIT_PARTIAL_SOFT | PCRE2::PCRE2_JIT_PARTIAL_HARD
|
12
|
+
|
13
|
+
PCRE2::Lib.pcre2_jit_compile_8(pattern_ptr, options) == 0
|
14
|
+
end
|
15
|
+
|
16
|
+
def match(str, pos = nil)
|
17
|
+
result_count, match_data_ptr = PCRE2::Lib.match(@pattern_ptr, str, position: pos)
|
18
|
+
|
19
|
+
if result_count == 0
|
20
|
+
nil
|
21
|
+
else
|
22
|
+
pairs = PCRE2::Lib.get_ovector_pairs(match_data_ptr, result_count)
|
23
|
+
|
24
|
+
PCRE2::MatchData.new(self, str, pairs)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def named_captures
|
29
|
+
@named_captures ||= PCRE2::Lib.named_captures(pattern_ptr)
|
30
|
+
end
|
31
|
+
|
32
|
+
def names
|
33
|
+
named_captures.keys
|
34
|
+
end
|
35
|
+
end
|
data/pcre2.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require_relative 'lib/pcre2/version'
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = "pcre2"
|
5
|
+
spec.version = PCRE2::VERSION
|
6
|
+
spec.authors = ["David Verhasselt"]
|
7
|
+
spec.email = ["david@crowdway.com"]
|
8
|
+
|
9
|
+
spec.summary = %q{Use the PCRE2 library inside your Ruby projects}
|
10
|
+
spec.description = %q{Wraps the PCRE2 library using FFI so it and the advanced functionality it provides can be used in Ruby projects}
|
11
|
+
spec.homepage = "https://github.com/dv/pcre2"
|
12
|
+
spec.license = "MIT"
|
13
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
14
|
+
|
15
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
16
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
17
|
+
spec.metadata["changelog_uri"] = spec.homepage
|
18
|
+
|
19
|
+
# Specify which files should be added to the gem when it is released.
|
20
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
21
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
22
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
23
|
+
end
|
24
|
+
spec.bindir = "exe"
|
25
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
26
|
+
spec.require_paths = ["lib"]
|
27
|
+
|
28
|
+
spec.add_dependency "ffi"
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pcre2
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Verhasselt
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-08-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: ffi
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
description: Wraps the PCRE2 library using FFI so it and the advanced functionality
|
28
|
+
it provides can be used in Ruby projects
|
29
|
+
email:
|
30
|
+
- david@crowdway.com
|
31
|
+
executables: []
|
32
|
+
extensions: []
|
33
|
+
extra_rdoc_files: []
|
34
|
+
files:
|
35
|
+
- ".github/workflows/tests.yml"
|
36
|
+
- ".gitignore"
|
37
|
+
- ".rspec"
|
38
|
+
- Gemfile
|
39
|
+
- LICENSE.txt
|
40
|
+
- README.md
|
41
|
+
- Rakefile
|
42
|
+
- benchmark.rake
|
43
|
+
- bin/console
|
44
|
+
- bin/setup
|
45
|
+
- lib/pcre2.rb
|
46
|
+
- lib/pcre2/error.rb
|
47
|
+
- lib/pcre2/lib.rb
|
48
|
+
- lib/pcre2/lib/constants.rb
|
49
|
+
- lib/pcre2/matchdata.rb
|
50
|
+
- lib/pcre2/regexp.rb
|
51
|
+
- lib/pcre2/version.rb
|
52
|
+
- pcre2.gemspec
|
53
|
+
homepage: https://github.com/dv/pcre2
|
54
|
+
licenses:
|
55
|
+
- MIT
|
56
|
+
metadata:
|
57
|
+
homepage_uri: https://github.com/dv/pcre2
|
58
|
+
source_code_uri: https://github.com/dv/pcre2
|
59
|
+
changelog_uri: https://github.com/dv/pcre2
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options: []
|
62
|
+
require_paths:
|
63
|
+
- lib
|
64
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 2.3.0
|
69
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '0'
|
74
|
+
requirements: []
|
75
|
+
rubygems_version: 3.0.3
|
76
|
+
signing_key:
|
77
|
+
specification_version: 4
|
78
|
+
summary: Use the PCRE2 library inside your Ruby projects
|
79
|
+
test_files: []
|