ultragrep 0.1.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/ultragrep_build_indexes +45 -0
- data/lib/ultragrep.rb +47 -61
- data/lib/ultragrep/config.rb +6 -0
- data/lib/ultragrep/log_collector.rb +67 -0
- data/lib/ultragrep/version.rb +1 -1
- data/src/Makefile +24 -0
- data/{ext/ultragrep → src}/extconf.rb +0 -0
- data/src/pcre.h +668 -0
- data/src/request.h +13 -0
- data/src/ug_build_index.c +109 -0
- data/src/ug_cat.c +188 -0
- data/src/ug_guts.c +199 -0
- data/src/ug_gzip.c +242 -0
- data/src/ug_gzip.h +8 -0
- data/src/ug_index.c +62 -0
- data/src/ug_index.h +23 -0
- data/src/ug_lua.c +119 -0
- data/src/ug_lua.h +10 -0
- metadata +25 -28
- data/ext/ultragrep/Makefile +0 -39
- data/ext/ultragrep/rails_req.c +0 -102
- data/ext/ultragrep/rails_req.h +0 -6
- data/ext/ultragrep/req_matcher.h +0 -17
- data/ext/ultragrep/request.c +0 -41
- data/ext/ultragrep/request.h +0 -22
- data/ext/ultragrep/ug_build_index.c +0 -99
- data/ext/ultragrep/ug_cat.c +0 -46
- data/ext/ultragrep/ug_guts.c +0 -138
- data/ext/ultragrep/ug_index.c +0 -83
- data/ext/ultragrep/ug_index.h +0 -27
- data/ext/ultragrep/work_req.c +0 -200
- data/ext/ultragrep/work_req.h +0 -6
- data/ext/ultragrep/zran.c +0 -291
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 90fb78cb0ec3891e79a7206f6a81623e9c37578b
|
4
|
+
data.tar.gz: 4826c6158c14c474c204567170bf07e7e1318bd8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 352033e050f129927155baeebf378cd8cb8a1ee42073ec800811869fdbdebf98fc18765ae0062fe3bb1e4c83a2994fdd3e019ae3d853745ba0e9e037d7f411d2
|
7
|
+
data.tar.gz: cfc2726df0c3955d58d6e8784d7f5457931d51f028d76068628520fcd8b2cf4b1a4bdaf2e21df9965ed85f5b61b21c0b68026c45ca9091d7112b02c5cf3876e9
|
@@ -0,0 +1,45 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
|
3
|
+
|
4
|
+
require "optparse"
|
5
|
+
require "ultragrep/config"
|
6
|
+
require "ultragrep/log_collector"
|
7
|
+
|
8
|
+
options = {:files => [], :range_start => 0, :range_end => 2**64}
|
9
|
+
|
10
|
+
parser = OptionParser.new do |parser|
|
11
|
+
parser.banner = <<-BANNER.gsub(/^ {6,}/, "")
|
12
|
+
Usage: ultragrep_build_indexes -t type [OPTIONS]
|
13
|
+
|
14
|
+
Options are:
|
15
|
+
BANNER
|
16
|
+
parser.on("--help", "-h", "This text"){ puts parser; exit 0 }
|
17
|
+
parser.on("--config", "-c FILE", String, "Config file location (default: #{Ultragrep::Config::DEFAULT_LOCATIONS.join(", ")})") { |config| options[:config] = config }
|
18
|
+
parser.on("--type", "-t TYPE", String, "log file class to archive") { |config| options[:type] = config }
|
19
|
+
end
|
20
|
+
|
21
|
+
parser.parse!(ARGV)
|
22
|
+
if !options[:type]
|
23
|
+
puts parser
|
24
|
+
exit 1
|
25
|
+
end
|
26
|
+
|
27
|
+
def index_for_fname(name)
|
28
|
+
File.dirname(name) + "/.#{File.basename(name)}.idx"
|
29
|
+
end
|
30
|
+
|
31
|
+
def ug_build_index
|
32
|
+
File.dirname(__FILE__) + "/../src/ug_build_index"
|
33
|
+
end
|
34
|
+
|
35
|
+
config = Ultragrep::Config.new(options[:config])
|
36
|
+
collector = Ultragrep::LogCollector.new(config.log_path_glob(options[:type]), options)
|
37
|
+
files = collector.collect_files
|
38
|
+
files.flatten.each do |f|
|
39
|
+
next if f =~ /\.gz$/ && File.exist?(index_for_fname(f))
|
40
|
+
# double check that the file still exists; sands may have shifted
|
41
|
+
next unless File.exist?(f)
|
42
|
+
system("#{ug_build_index} #{config['types'][options[:type]]['lua']} #{f}")
|
43
|
+
puts("#{ug_build_index} #{config['types'][options[:type]]['lua']} #{f}")
|
44
|
+
end
|
45
|
+
|
data/lib/ultragrep.rb
CHANGED
@@ -5,11 +5,11 @@ require 'socket'
|
|
5
5
|
require 'yaml'
|
6
6
|
|
7
7
|
require 'ultragrep/config'
|
8
|
+
require 'ultragrep/log_collector'
|
8
9
|
|
9
10
|
module Ultragrep
|
10
11
|
HOUR = 60 * 60
|
11
12
|
DAY = 24 * HOUR
|
12
|
-
DATE_FROM_FILENAME = /(\d+)(\.\w+)?$/
|
13
13
|
|
14
14
|
class RequestPrinter
|
15
15
|
def initialize(verbose)
|
@@ -78,7 +78,8 @@ module Ultragrep
|
|
78
78
|
end
|
79
79
|
|
80
80
|
class RequestPerformancePrinter < RequestPrinter
|
81
|
-
def format_request(parsed_up_to,
|
81
|
+
def format_request(parsed_up_to, req)
|
82
|
+
text = req.join
|
82
83
|
return unless text =~ /.*Processing ([^ ]+) .*Completed in (\d+)ms/m
|
83
84
|
action = $1
|
84
85
|
time = $2
|
@@ -93,6 +94,8 @@ module Ultragrep
|
|
93
94
|
:range_start => Time.now.to_i - (Time.now.to_i % DAY),
|
94
95
|
:range_end => Time.now.to_i,
|
95
96
|
}
|
97
|
+
key_value = []
|
98
|
+
warn_about_missing_quotes_in_time_argument(argv)
|
96
99
|
|
97
100
|
parser = OptionParser.new do |parser|
|
98
101
|
parser.banner = <<-BANNER.gsub(/^ {6,}/, "")
|
@@ -112,9 +115,14 @@ module Ultragrep
|
|
112
115
|
parser.on("--config", "-c FILE", String, "Config file location (default: #{Config::DEFAULT_LOCATIONS.join(", ")})") { |config| options[:config] = config }
|
113
116
|
parser.on("--progress", "-p", "show grep progress to STDERR") { options[:verbose] = true }
|
114
117
|
parser.on("--verbose", "-v", "DEPRECATED") do
|
115
|
-
$stderr.puts("The --verbose option is
|
116
|
-
|
118
|
+
$stderr.puts("The --verbose option is gone. please use -p or --progress instead")
|
119
|
+
exit 0
|
117
120
|
end
|
121
|
+
parser.on("--not REGEXP", "the next given regular expression's match status should invert") do |regexp|
|
122
|
+
options[:not_regexps] ||= []
|
123
|
+
options[:not_regexps] << regexp
|
124
|
+
end
|
125
|
+
|
118
126
|
parser.on("--tail", "-t", "Tail requests, show matching requests as they arrive") do
|
119
127
|
options[:tail] = true
|
120
128
|
options[:range_end] = Time.now.to_i + 100 * DAY
|
@@ -169,22 +177,32 @@ module Ultragrep
|
|
169
177
|
|
170
178
|
def ultragrep(options)
|
171
179
|
lower_priority
|
172
|
-
|
173
180
|
config = options.fetch(:config)
|
174
181
|
file_type = options.fetch(:type, config.default_file_type)
|
175
|
-
|
182
|
+
if !config.types[file_type]
|
183
|
+
$stderr.puts("No such log type: #{file_type} -- available types are #{config.types.keys.join(',')}")
|
184
|
+
exit 1
|
185
|
+
end
|
186
|
+
|
187
|
+
lua = config.types[file_type]["lua"]
|
188
|
+
collector = Ultragrep::LogCollector.new(config.log_path_glob(file_type), options)
|
189
|
+
file_lists = collector.collect_files
|
176
190
|
|
177
191
|
request_printer = options.fetch(:printer)
|
178
192
|
request_printer.run
|
179
193
|
|
180
|
-
|
181
|
-
|
194
|
+
print_regex_info(options) if options[:verbose]
|
195
|
+
|
196
|
+
regexps = options[:regexps].map { |r| "+" + r }
|
197
|
+
regexps += options[:not_regexps].map { |r| "!" + r } if options[:not_regexps]
|
198
|
+
|
199
|
+
quoted_regexps = quote_shell_words(regexps)
|
182
200
|
|
183
201
|
file_lists.each do |files|
|
184
202
|
print_search_list(files) if options[:verbose]
|
185
203
|
|
186
204
|
children_pipes = files.map do |file|
|
187
|
-
[worker(file,
|
205
|
+
[worker(file, lua, quoted_regexps, options), file]
|
188
206
|
end
|
189
207
|
|
190
208
|
children_pipes.each do |pipe, _|
|
@@ -204,11 +222,9 @@ module Ultragrep
|
|
204
222
|
|
205
223
|
private
|
206
224
|
|
207
|
-
def worker(file,
|
208
|
-
core = "#{ug_guts} #{
|
209
|
-
command = if file =~ /\.
|
210
|
-
"gzip -dcf #{file}"
|
211
|
-
elsif file =~ /\.bz2$/
|
225
|
+
def worker(file, lua, quoted_regexps, options)
|
226
|
+
core = "#{ug_guts} -l #{lua} -s #{options[:range_start]} -e #{options[:range_end]} #{quoted_regexps}" #add -k an d-m here
|
227
|
+
command = if file =~ /\.bz2$/
|
212
228
|
"bzip2 -dcf #{file}"
|
213
229
|
elsif file =~ /^tail/
|
214
230
|
"#{file}"
|
@@ -229,7 +245,7 @@ module Ultragrep
|
|
229
245
|
parsed_up_to = $1.to_i
|
230
246
|
|
231
247
|
request_printer.set_read_up_to(pipe, parsed_up_to)
|
232
|
-
this_request = [parsed_up_to, ["\n# #{filename}"]]
|
248
|
+
this_request = [parsed_up_to, ["\n# #{filename}\n"]]
|
233
249
|
elsif line =~ /^---/
|
234
250
|
# end of request
|
235
251
|
this_request[1] << line if this_request
|
@@ -250,8 +266,13 @@ module Ultragrep
|
|
250
266
|
end
|
251
267
|
end
|
252
268
|
|
253
|
-
def print_regex_info(
|
254
|
-
|
269
|
+
def print_regex_info(options)
|
270
|
+
msg = "searching for regexps: #{options[:regexps].join(',')}"
|
271
|
+
if options[:not_regexps]
|
272
|
+
msg += " and not #{options[:not_regexps].join(',')}"
|
273
|
+
end
|
274
|
+
msg += " from #{range_description(options)}"
|
275
|
+
$stderr.puts(msg)
|
255
276
|
end
|
256
277
|
|
257
278
|
def range_description(options)
|
@@ -267,26 +288,6 @@ module Ultragrep
|
|
267
288
|
$stderr.puts("searching #{formatted_list}")
|
268
289
|
end
|
269
290
|
|
270
|
-
def file_list(globs, options)
|
271
|
-
file_list = Dir.glob(globs)
|
272
|
-
|
273
|
-
file_lists = if options[:tail]
|
274
|
-
# TODO fix before we open source -- this is a hard-coded file format.
|
275
|
-
tail_list = file_list.map do |f|
|
276
|
-
today = Time.now.strftime("%Y%m%d")
|
277
|
-
"tail -f #{f}" if f =~ /-#{today}$/
|
278
|
-
end.compact
|
279
|
-
[tail_list]
|
280
|
-
else
|
281
|
-
filter_and_group_files(file_list, options)
|
282
|
-
end
|
283
|
-
|
284
|
-
nothing_found!(globs, options) if file_lists.empty?
|
285
|
-
|
286
|
-
$stderr.puts("Grepping #{file_lists.map { |f| f.join(" ") }.join("\n\n\n")}") if options[:verbose]
|
287
|
-
file_lists
|
288
|
-
end
|
289
|
-
|
290
291
|
def encode_utf8!(line)
|
291
292
|
line.encode!('UTF-16', 'UTF-8', :invalid => :replace, :replace => '')
|
292
293
|
line.encode!('UTF-8', 'UTF-16')
|
@@ -303,28 +304,6 @@ module Ultragrep
|
|
303
304
|
system("renice -n 19 -p #$$ >/dev/null 2>&1")
|
304
305
|
end
|
305
306
|
|
306
|
-
def filter_and_group_files(files, options)
|
307
|
-
files = filter_files_by_host(files, options[:host_filter])
|
308
|
-
files = filter_files_by_date(files, options.fetch(:range_start)..options.fetch(:range_end))
|
309
|
-
files.group_by { |f| f[DATE_FROM_FILENAME, 1] }.values
|
310
|
-
end
|
311
|
-
|
312
|
-
def filter_files_by_host(files, host_filter)
|
313
|
-
return files unless host_filter
|
314
|
-
files.select { |file| host_filter.include?(file.split("/")[-2]) }
|
315
|
-
end
|
316
|
-
|
317
|
-
def filter_files_by_date(files, range)
|
318
|
-
files.select do |file|
|
319
|
-
logfile_date = Time.parse(file[DATE_FROM_FILENAME, 1]).to_i
|
320
|
-
range_overlap?(range, logfile_date..(logfile_date + DAY - 1))
|
321
|
-
end
|
322
|
-
end
|
323
|
-
|
324
|
-
def range_overlap?(a, b)
|
325
|
-
a.first <= b.last && b.first <= a.last
|
326
|
-
end
|
327
|
-
|
328
307
|
def parse_time(string)
|
329
308
|
if string =~ /^\d+$/ && string !~ /^20/
|
330
309
|
string.to_i
|
@@ -338,11 +317,18 @@ module Ultragrep
|
|
338
317
|
end
|
339
318
|
|
340
319
|
def ug_guts
|
341
|
-
File.expand_path("../../
|
320
|
+
File.expand_path("../../src/ug_guts", __FILE__)
|
342
321
|
end
|
343
322
|
|
344
323
|
def ug_cat
|
345
|
-
File.expand_path("../../
|
324
|
+
File.expand_path("../../src/ug_cat", __FILE__)
|
325
|
+
end
|
326
|
+
|
327
|
+
def warn_about_missing_quotes_in_time_argument(argv)
|
328
|
+
sep = "---"
|
329
|
+
if found = argv.join(sep)[/\d+-\d+-\d+#{sep}\d+:\d+:\d+/]
|
330
|
+
warn "WARN: Put time inside quotes like this '#{found.split(sep).join(" ")}'"
|
331
|
+
end
|
346
332
|
end
|
347
333
|
end
|
348
334
|
end
|
data/lib/ultragrep/config.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
1
3
|
module Ultragrep
|
2
4
|
class Config
|
3
5
|
DEFAULT_LOCATIONS = [".ultragrep.yml", "#{ENV['HOME']}/.ultragrep.yml", "/etc/ultragrep.yml"]
|
@@ -23,6 +25,10 @@ module Ultragrep
|
|
23
25
|
@data[val]
|
24
26
|
end
|
25
27
|
|
28
|
+
def to_s
|
29
|
+
@data.to_s
|
30
|
+
end
|
31
|
+
|
26
32
|
def fetch(*args)
|
27
33
|
@data.fetch(*args)
|
28
34
|
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
module Ultragrep
|
4
|
+
class LogCollector
|
5
|
+
# this constant is pretty implentation-specific. fix at will.
|
6
|
+
DATE_FROM_FILENAME = /(\d+)(\.\w+)?$/
|
7
|
+
|
8
|
+
HOUR = 60 * 60
|
9
|
+
DAY = 24 * HOUR
|
10
|
+
|
11
|
+
def initialize(globs, options)
|
12
|
+
@globs, @options = globs, options
|
13
|
+
end
|
14
|
+
|
15
|
+
def collect_files
|
16
|
+
file_list = Dir.glob(@globs)
|
17
|
+
file_lists = if @options[:tail]
|
18
|
+
# TODO fix before we open source -- this is a hard-coded file format.
|
19
|
+
tail_list = file_list.map do |f|
|
20
|
+
today = Time.now.strftime("%Y%m%d")
|
21
|
+
"tail -f #{f}" if f =~ /-#{today}$/
|
22
|
+
end.compact
|
23
|
+
[tail_list]
|
24
|
+
else
|
25
|
+
filter_and_group_files(file_list)
|
26
|
+
end
|
27
|
+
|
28
|
+
return nil if file_lists.empty?
|
29
|
+
|
30
|
+
$stderr.puts("Grepping #{file_lists.map { |f| f.join(" ") }.join("\n\n\n")}") if @options[:verbose]
|
31
|
+
file_lists
|
32
|
+
end
|
33
|
+
|
34
|
+
def filter_and_group_files(files)
|
35
|
+
files = filter_files_by_host(files)
|
36
|
+
files = filter_files_by_date(files, @options.fetch(:range_start)..@options.fetch(:range_end))
|
37
|
+
files.group_by { |f| f[DATE_FROM_FILENAME, 1] }.values
|
38
|
+
end
|
39
|
+
|
40
|
+
def filter_files_by_host(files)
|
41
|
+
return files unless @options[:host_filter]
|
42
|
+
files.select { |file| @options[:host_filter].include?(file.split("/")[-2]) }
|
43
|
+
end
|
44
|
+
|
45
|
+
def filter_files_by_date(files, range)
|
46
|
+
files.select do |file|
|
47
|
+
filename_date = file[DATE_FROM_FILENAME, 1]
|
48
|
+
if filename_date.nil?
|
49
|
+
$stderr.puts("Could not parse date out of #{file}, skipping.")
|
50
|
+
next
|
51
|
+
end
|
52
|
+
|
53
|
+
begin
|
54
|
+
logfile_date = Time.parse(filename_date).to_i
|
55
|
+
rescue
|
56
|
+
$stderr.puts("Could not parse date out of #{file}, skipping.")
|
57
|
+
next
|
58
|
+
end
|
59
|
+
range_overlap?(range, logfile_date..(logfile_date + DAY - 1))
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def range_overlap?(a, b)
|
64
|
+
a.first <= b.last && b.first <= a.last
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/ultragrep/version.rb
CHANGED
data/src/Makefile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
LUA_CFLAGS = $(shell pkg-config --cflags --silence-errors lua)
|
2
|
+
LUA_CFLAGS += $(shell pkg-config --cflags --silence-errors lua5.2)
|
3
|
+
LUA_LDFLAGS = $(shell pkg-config --libs --silence-errors lua)
|
4
|
+
LUA_LDFLAGS += $(shell pkg-config --libs --silence-errors lua5.2)
|
5
|
+
CFLAGS=-Wall -O3 -g $(LUA_CFLAGS)
|
6
|
+
LDFLAGS=$(LUA_LDFLAGS) -lpcre
|
7
|
+
all: ug_guts ug_cat ug_build_index
|
8
|
+
install: all
|
9
|
+
|
10
|
+
ug_guts.o: ug_guts.c
|
11
|
+
ug_index.o: ug_index.h ug_index.c
|
12
|
+
ug_build_index.o: ug_build_index.c ug_index.h
|
13
|
+
|
14
|
+
ug_guts: ug_guts.o ug_lua.o Makefile
|
15
|
+
gcc -o ug_guts ug_guts.o ug_lua.o ${LDFLAGS}
|
16
|
+
|
17
|
+
ug_build_index: ug_build_index.o ug_index.o Makefile ug_gzip.o ug_lua.o
|
18
|
+
gcc -o ug_build_index ug_lua.o ug_index.o ug_build_index.o ug_gzip.o -lz ${LDFLAGS}
|
19
|
+
|
20
|
+
ug_cat: ug_cat.o ug_index.o Makefile
|
21
|
+
gcc -o ug_cat ug_cat.o ug_index.o -lz ${LDFLAGS}
|
22
|
+
|
23
|
+
clean:
|
24
|
+
rm -rf *.o ug_guts ug_build_index ug_cat
|
File without changes
|
data/src/pcre.h
ADDED
@@ -0,0 +1,668 @@
|
|
1
|
+
/*
|
2
|
+
Copied from pcre so we do not need to hand-install and copy it on OSX Maverics.
|
3
|
+
Could be fixed by xcode-select --install but that seems to be broken too.
|
4
|
+
*/
|
5
|
+
|
6
|
+
/*************************************************
|
7
|
+
* Perl-Compatible Regular Expressions *
|
8
|
+
*************************************************/
|
9
|
+
|
10
|
+
/* This is the public header file for the PCRE library, to be #included by
|
11
|
+
applications that call the PCRE functions.
|
12
|
+
|
13
|
+
Copyright (c) 1997-2013 University of Cambridge
|
14
|
+
|
15
|
+
-----------------------------------------------------------------------------
|
16
|
+
Redistribution and use in source and binary forms, with or without
|
17
|
+
modification, are permitted provided that the following conditions are met:
|
18
|
+
|
19
|
+
* Redistributions of source code must retain the above copyright notice,
|
20
|
+
this list of conditions and the following disclaimer.
|
21
|
+
|
22
|
+
* Redistributions in binary form must reproduce the above copyright
|
23
|
+
notice, this list of conditions and the following disclaimer in the
|
24
|
+
documentation and/or other materials provided with the distribution.
|
25
|
+
|
26
|
+
* Neither the name of the University of Cambridge nor the names of its
|
27
|
+
contributors may be used to endorse or promote products derived from
|
28
|
+
this software without specific prior written permission.
|
29
|
+
|
30
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
31
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
32
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
33
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
34
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
35
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
36
|
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
37
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
38
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
39
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
40
|
+
POSSIBILITY OF SUCH DAMAGE.
|
41
|
+
-----------------------------------------------------------------------------
|
42
|
+
*/
|
43
|
+
|
44
|
+
#ifndef _PCRE_H
|
45
|
+
#define _PCRE_H
|
46
|
+
|
47
|
+
/* The current PCRE version information. */
|
48
|
+
|
49
|
+
#define PCRE_MAJOR 8
|
50
|
+
#define PCRE_MINOR 33
|
51
|
+
#define PCRE_PRERELEASE
|
52
|
+
#define PCRE_DATE 2013-05-28
|
53
|
+
|
54
|
+
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
55
|
+
imported have to be identified as such. When building PCRE, the appropriate
|
56
|
+
export setting is defined in pcre_internal.h, which includes this file. So we
|
57
|
+
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
|
58
|
+
|
59
|
+
#if defined(_WIN32) && !defined(PCRE_STATIC)
|
60
|
+
# ifndef PCRE_EXP_DECL
|
61
|
+
# define PCRE_EXP_DECL extern __declspec(dllimport)
|
62
|
+
# endif
|
63
|
+
# ifdef __cplusplus
|
64
|
+
# ifndef PCRECPP_EXP_DECL
|
65
|
+
# define PCRECPP_EXP_DECL extern __declspec(dllimport)
|
66
|
+
# endif
|
67
|
+
# ifndef PCRECPP_EXP_DEFN
|
68
|
+
# define PCRECPP_EXP_DEFN __declspec(dllimport)
|
69
|
+
# endif
|
70
|
+
# endif
|
71
|
+
#endif
|
72
|
+
|
73
|
+
/* By default, we use the standard "extern" declarations. */
|
74
|
+
|
75
|
+
#ifndef PCRE_EXP_DECL
|
76
|
+
# ifdef __cplusplus
|
77
|
+
# define PCRE_EXP_DECL extern "C"
|
78
|
+
# else
|
79
|
+
# define PCRE_EXP_DECL extern
|
80
|
+
# endif
|
81
|
+
#endif
|
82
|
+
|
83
|
+
#ifdef __cplusplus
|
84
|
+
# ifndef PCRECPP_EXP_DECL
|
85
|
+
# define PCRECPP_EXP_DECL extern
|
86
|
+
# endif
|
87
|
+
# ifndef PCRECPP_EXP_DEFN
|
88
|
+
# define PCRECPP_EXP_DEFN
|
89
|
+
# endif
|
90
|
+
#endif
|
91
|
+
|
92
|
+
/* Have to include stdlib.h in order to ensure that size_t is defined;
|
93
|
+
it is needed here for malloc. */
|
94
|
+
|
95
|
+
#include <stdlib.h>
|
96
|
+
|
97
|
+
/* Allow for C++ users */
|
98
|
+
|
99
|
+
#ifdef __cplusplus
|
100
|
+
extern "C" {
|
101
|
+
#endif
|
102
|
+
|
103
|
+
/* Public options. Some are compile-time only, some are run-time only, and some
|
104
|
+
are both. Most of the compile-time options are saved with the compiled regex so
|
105
|
+
that they can be inspected during studying (and therefore JIT compiling). Note
|
106
|
+
that pcre_study() has its own set of options. Originally, all the options
|
107
|
+
defined here used distinct bits. However, almost all the bits in a 32-bit word
|
108
|
+
are now used, so in order to conserve them, option bits that were previously
|
109
|
+
only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may
|
110
|
+
also be used for compile-time options that affect only compiling and are not
|
111
|
+
relevant for studying or JIT compiling.
|
112
|
+
|
113
|
+
Some options for pcre_compile() change its behaviour but do not affect the
|
114
|
+
behaviour of the execution functions. Other options are passed through to the
|
115
|
+
execution functions and affect their behaviour, with or without affecting the
|
116
|
+
behaviour of pcre_compile().
|
117
|
+
|
118
|
+
Options that can be passed to pcre_compile() are tagged Cx below, with these
|
119
|
+
variants:
|
120
|
+
|
121
|
+
C1 Affects compile only
|
122
|
+
C2 Does not affect compile; affects exec, dfa_exec
|
123
|
+
C3 Affects compile, exec, dfa_exec
|
124
|
+
C4 Affects compile, exec, dfa_exec, study
|
125
|
+
C5 Affects compile, exec, study
|
126
|
+
|
127
|
+
Options that can be set for pcre_exec() and/or pcre_dfa_exec() are flagged with
|
128
|
+
E and D, respectively. They take precedence over C3, C4, and C5 settings passed
|
129
|
+
from pcre_compile(). Those that are compatible with JIT execution are flagged
|
130
|
+
with J. */
|
131
|
+
|
132
|
+
#define PCRE_CASELESS 0x00000001 /* C1 */
|
133
|
+
#define PCRE_MULTILINE 0x00000002 /* C1 */
|
134
|
+
#define PCRE_DOTALL 0x00000004 /* C1 */
|
135
|
+
#define PCRE_EXTENDED 0x00000008 /* C1 */
|
136
|
+
#define PCRE_ANCHORED 0x00000010 /* C4 E D */
|
137
|
+
#define PCRE_DOLLAR_ENDONLY 0x00000020 /* C2 */
|
138
|
+
#define PCRE_EXTRA 0x00000040 /* C1 */
|
139
|
+
#define PCRE_NOTBOL 0x00000080 /* E D J */
|
140
|
+
#define PCRE_NOTEOL 0x00000100 /* E D J */
|
141
|
+
#define PCRE_UNGREEDY 0x00000200 /* C1 */
|
142
|
+
#define PCRE_NOTEMPTY 0x00000400 /* E D J */
|
143
|
+
#define PCRE_UTF8 0x00000800 /* C4 ) */
|
144
|
+
#define PCRE_UTF16 0x00000800 /* C4 ) Synonyms */
|
145
|
+
#define PCRE_UTF32 0x00000800 /* C4 ) */
|
146
|
+
#define PCRE_NO_AUTO_CAPTURE 0x00001000 /* C1 */
|
147
|
+
#define PCRE_NO_UTF8_CHECK 0x00002000 /* C1 E D J ) */
|
148
|
+
#define PCRE_NO_UTF16_CHECK 0x00002000 /* C1 E D J ) Synonyms */
|
149
|
+
#define PCRE_NO_UTF32_CHECK 0x00002000 /* C1 E D J ) */
|
150
|
+
#define PCRE_AUTO_CALLOUT 0x00004000 /* C1 */
|
151
|
+
#define PCRE_PARTIAL_SOFT 0x00008000 /* E D J ) Synonyms */
|
152
|
+
#define PCRE_PARTIAL 0x00008000 /* E D J ) */
|
153
|
+
|
154
|
+
/* This pair use the same bit. */
|
155
|
+
#define PCRE_NEVER_UTF 0x00010000 /* C1 ) Overlaid */
|
156
|
+
#define PCRE_DFA_SHORTEST 0x00010000 /* D ) Overlaid */
|
157
|
+
|
158
|
+
#define PCRE_DFA_RESTART 0x00020000 /* D */
|
159
|
+
#define PCRE_FIRSTLINE 0x00040000 /* C3 */
|
160
|
+
#define PCRE_DUPNAMES 0x00080000 /* C1 */
|
161
|
+
#define PCRE_NEWLINE_CR 0x00100000 /* C3 E D */
|
162
|
+
#define PCRE_NEWLINE_LF 0x00200000 /* C3 E D */
|
163
|
+
#define PCRE_NEWLINE_CRLF 0x00300000 /* C3 E D */
|
164
|
+
#define PCRE_NEWLINE_ANY 0x00400000 /* C3 E D */
|
165
|
+
#define PCRE_NEWLINE_ANYCRLF 0x00500000 /* C3 E D */
|
166
|
+
#define PCRE_BSR_ANYCRLF 0x00800000 /* C3 E D */
|
167
|
+
#define PCRE_BSR_UNICODE 0x01000000 /* C3 E D */
|
168
|
+
#define PCRE_JAVASCRIPT_COMPAT 0x02000000 /* C5 */
|
169
|
+
#define PCRE_NO_START_OPTIMIZE 0x04000000 /* C2 E D ) Synonyms */
|
170
|
+
#define PCRE_NO_START_OPTIMISE 0x04000000 /* C2 E D ) */
|
171
|
+
#define PCRE_PARTIAL_HARD 0x08000000 /* E D J */
|
172
|
+
#define PCRE_NOTEMPTY_ATSTART 0x10000000 /* E D J */
|
173
|
+
#define PCRE_UCP 0x20000000 /* C3 */
|
174
|
+
|
175
|
+
/* Exec-time and get/set-time error codes */
|
176
|
+
|
177
|
+
#define PCRE_ERROR_NOMATCH (-1)
|
178
|
+
#define PCRE_ERROR_NULL (-2)
|
179
|
+
#define PCRE_ERROR_BADOPTION (-3)
|
180
|
+
#define PCRE_ERROR_BADMAGIC (-4)
|
181
|
+
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
|
182
|
+
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
|
183
|
+
#define PCRE_ERROR_NOMEMORY (-6)
|
184
|
+
#define PCRE_ERROR_NOSUBSTRING (-7)
|
185
|
+
#define PCRE_ERROR_MATCHLIMIT (-8)
|
186
|
+
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
|
187
|
+
#define PCRE_ERROR_BADUTF8 (-10) /* Same for 8/16/32 */
|
188
|
+
#define PCRE_ERROR_BADUTF16 (-10) /* Same for 8/16/32 */
|
189
|
+
#define PCRE_ERROR_BADUTF32 (-10) /* Same for 8/16/32 */
|
190
|
+
#define PCRE_ERROR_BADUTF8_OFFSET (-11) /* Same for 8/16 */
|
191
|
+
#define PCRE_ERROR_BADUTF16_OFFSET (-11) /* Same for 8/16 */
|
192
|
+
#define PCRE_ERROR_PARTIAL (-12)
|
193
|
+
#define PCRE_ERROR_BADPARTIAL (-13)
|
194
|
+
#define PCRE_ERROR_INTERNAL (-14)
|
195
|
+
#define PCRE_ERROR_BADCOUNT (-15)
|
196
|
+
#define PCRE_ERROR_DFA_UITEM (-16)
|
197
|
+
#define PCRE_ERROR_DFA_UCOND (-17)
|
198
|
+
#define PCRE_ERROR_DFA_UMLIMIT (-18)
|
199
|
+
#define PCRE_ERROR_DFA_WSSIZE (-19)
|
200
|
+
#define PCRE_ERROR_DFA_RECURSE (-20)
|
201
|
+
#define PCRE_ERROR_RECURSIONLIMIT (-21)
|
202
|
+
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
|
203
|
+
#define PCRE_ERROR_BADNEWLINE (-23)
|
204
|
+
#define PCRE_ERROR_BADOFFSET (-24)
|
205
|
+
#define PCRE_ERROR_SHORTUTF8 (-25)
|
206
|
+
#define PCRE_ERROR_SHORTUTF16 (-25) /* Same for 8/16 */
|
207
|
+
#define PCRE_ERROR_RECURSELOOP (-26)
|
208
|
+
#define PCRE_ERROR_JIT_STACKLIMIT (-27)
|
209
|
+
#define PCRE_ERROR_BADMODE (-28)
|
210
|
+
#define PCRE_ERROR_BADENDIANNESS (-29)
|
211
|
+
#define PCRE_ERROR_DFA_BADRESTART (-30)
|
212
|
+
#define PCRE_ERROR_JIT_BADOPTION (-31)
|
213
|
+
#define PCRE_ERROR_BADLENGTH (-32)
|
214
|
+
#define PCRE_ERROR_UNSET (-33)
|
215
|
+
|
216
|
+
/* Specific error codes for UTF-8 validity checks */
|
217
|
+
|
218
|
+
#define PCRE_UTF8_ERR0 0
|
219
|
+
#define PCRE_UTF8_ERR1 1
|
220
|
+
#define PCRE_UTF8_ERR2 2
|
221
|
+
#define PCRE_UTF8_ERR3 3
|
222
|
+
#define PCRE_UTF8_ERR4 4
|
223
|
+
#define PCRE_UTF8_ERR5 5
|
224
|
+
#define PCRE_UTF8_ERR6 6
|
225
|
+
#define PCRE_UTF8_ERR7 7
|
226
|
+
#define PCRE_UTF8_ERR8 8
|
227
|
+
#define PCRE_UTF8_ERR9 9
|
228
|
+
#define PCRE_UTF8_ERR10 10
|
229
|
+
#define PCRE_UTF8_ERR11 11
|
230
|
+
#define PCRE_UTF8_ERR12 12
|
231
|
+
#define PCRE_UTF8_ERR13 13
|
232
|
+
#define PCRE_UTF8_ERR14 14
|
233
|
+
#define PCRE_UTF8_ERR15 15
|
234
|
+
#define PCRE_UTF8_ERR16 16
|
235
|
+
#define PCRE_UTF8_ERR17 17
|
236
|
+
#define PCRE_UTF8_ERR18 18
|
237
|
+
#define PCRE_UTF8_ERR19 19
|
238
|
+
#define PCRE_UTF8_ERR20 20
|
239
|
+
#define PCRE_UTF8_ERR21 21
|
240
|
+
#define PCRE_UTF8_ERR22 22 /* Unused (was non-character) */
|
241
|
+
|
242
|
+
/* Specific error codes for UTF-16 validity checks */
|
243
|
+
|
244
|
+
#define PCRE_UTF16_ERR0 0
|
245
|
+
#define PCRE_UTF16_ERR1 1
|
246
|
+
#define PCRE_UTF16_ERR2 2
|
247
|
+
#define PCRE_UTF16_ERR3 3
|
248
|
+
#define PCRE_UTF16_ERR4 4 /* Unused (was non-character) */
|
249
|
+
|
250
|
+
/* Specific error codes for UTF-32 validity checks */
|
251
|
+
|
252
|
+
#define PCRE_UTF32_ERR0 0
|
253
|
+
#define PCRE_UTF32_ERR1 1
|
254
|
+
#define PCRE_UTF32_ERR2 2 /* Unused (was non-character) */
|
255
|
+
#define PCRE_UTF32_ERR3 3
|
256
|
+
|
257
|
+
/* Request types for pcre_fullinfo() */
|
258
|
+
|
259
|
+
#define PCRE_INFO_OPTIONS 0
|
260
|
+
#define PCRE_INFO_SIZE 1
|
261
|
+
#define PCRE_INFO_CAPTURECOUNT 2
|
262
|
+
#define PCRE_INFO_BACKREFMAX 3
|
263
|
+
#define PCRE_INFO_FIRSTBYTE 4
|
264
|
+
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
|
265
|
+
#define PCRE_INFO_FIRSTTABLE 5
|
266
|
+
#define PCRE_INFO_LASTLITERAL 6
|
267
|
+
#define PCRE_INFO_NAMEENTRYSIZE 7
|
268
|
+
#define PCRE_INFO_NAMECOUNT 8
|
269
|
+
#define PCRE_INFO_NAMETABLE 9
|
270
|
+
#define PCRE_INFO_STUDYSIZE 10
|
271
|
+
#define PCRE_INFO_DEFAULT_TABLES 11
|
272
|
+
#define PCRE_INFO_OKPARTIAL 12
|
273
|
+
#define PCRE_INFO_JCHANGED 13
|
274
|
+
#define PCRE_INFO_HASCRORLF 14
|
275
|
+
#define PCRE_INFO_MINLENGTH 15
|
276
|
+
#define PCRE_INFO_JIT 16
|
277
|
+
#define PCRE_INFO_JITSIZE 17
|
278
|
+
#define PCRE_INFO_MAXLOOKBEHIND 18
|
279
|
+
#define PCRE_INFO_FIRSTCHARACTER 19
|
280
|
+
#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
|
281
|
+
#define PCRE_INFO_REQUIREDCHAR 21
|
282
|
+
#define PCRE_INFO_REQUIREDCHARFLAGS 22
|
283
|
+
#define PCRE_INFO_MATCHLIMIT 23
|
284
|
+
#define PCRE_INFO_RECURSIONLIMIT 24
|
285
|
+
|
286
|
+
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
287
|
+
compatible. */
|
288
|
+
|
289
|
+
#define PCRE_CONFIG_UTF8 0
|
290
|
+
#define PCRE_CONFIG_NEWLINE 1
|
291
|
+
#define PCRE_CONFIG_LINK_SIZE 2
|
292
|
+
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
|
293
|
+
#define PCRE_CONFIG_MATCH_LIMIT 4
|
294
|
+
#define PCRE_CONFIG_STACKRECURSE 5
|
295
|
+
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
|
296
|
+
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
|
297
|
+
#define PCRE_CONFIG_BSR 8
|
298
|
+
#define PCRE_CONFIG_JIT 9
|
299
|
+
#define PCRE_CONFIG_UTF16 10
|
300
|
+
#define PCRE_CONFIG_JITTARGET 11
|
301
|
+
#define PCRE_CONFIG_UTF32 12
|
302
|
+
|
303
|
+
/* Request types for pcre_study(). Do not re-arrange, in order to remain
|
304
|
+
compatible. */
|
305
|
+
|
306
|
+
#define PCRE_STUDY_JIT_COMPILE 0x0001
|
307
|
+
#define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE 0x0002
|
308
|
+
#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 0x0004
|
309
|
+
#define PCRE_STUDY_EXTRA_NEEDED 0x0008
|
310
|
+
|
311
|
+
/* Bit flags for the pcre[16|32]_extra structure. Do not re-arrange or redefine
|
312
|
+
these bits, just add new ones on the end, in order to remain compatible. */
|
313
|
+
|
314
|
+
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
315
|
+
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
316
|
+
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
317
|
+
#define PCRE_EXTRA_TABLES 0x0008
|
318
|
+
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
|
319
|
+
#define PCRE_EXTRA_MARK 0x0020
|
320
|
+
#define PCRE_EXTRA_EXECUTABLE_JIT 0x0040
|
321
|
+
|
322
|
+
/* Types */
|
323
|
+
|
324
|
+
struct real_pcre; /* declaration; the definition is private */
|
325
|
+
typedef struct real_pcre pcre;
|
326
|
+
|
327
|
+
struct real_pcre16; /* declaration; the definition is private */
|
328
|
+
typedef struct real_pcre16 pcre16;
|
329
|
+
|
330
|
+
struct real_pcre32; /* declaration; the definition is private */
|
331
|
+
typedef struct real_pcre32 pcre32;
|
332
|
+
|
333
|
+
struct real_pcre_jit_stack; /* declaration; the definition is private */
|
334
|
+
typedef struct real_pcre_jit_stack pcre_jit_stack;
|
335
|
+
|
336
|
+
struct real_pcre16_jit_stack; /* declaration; the definition is private */
|
337
|
+
typedef struct real_pcre16_jit_stack pcre16_jit_stack;
|
338
|
+
|
339
|
+
struct real_pcre32_jit_stack; /* declaration; the definition is private */
|
340
|
+
typedef struct real_pcre32_jit_stack pcre32_jit_stack;
|
341
|
+
|
342
|
+
/* If PCRE is compiled with 16 bit character support, PCRE_UCHAR16 must contain
|
343
|
+
a 16 bit wide signed data type. Otherwise it can be a dummy data type since
|
344
|
+
pcre16 functions are not implemented. There is a check for this in pcre_internal.h. */
|
345
|
+
#ifndef PCRE_UCHAR16
|
346
|
+
#define PCRE_UCHAR16 unsigned short
|
347
|
+
#endif
|
348
|
+
|
349
|
+
#ifndef PCRE_SPTR16
|
350
|
+
#define PCRE_SPTR16 const PCRE_UCHAR16 *
|
351
|
+
#endif
|
352
|
+
|
353
|
+
/* If PCRE is compiled with 32 bit character support, PCRE_UCHAR32 must contain
|
354
|
+
a 32 bit wide signed data type. Otherwise it can be a dummy data type since
|
355
|
+
pcre32 functions are not implemented. There is a check for this in pcre_internal.h. */
|
356
|
+
#ifndef PCRE_UCHAR32
|
357
|
+
#define PCRE_UCHAR32 unsigned int
|
358
|
+
#endif
|
359
|
+
|
360
|
+
#ifndef PCRE_SPTR32
|
361
|
+
#define PCRE_SPTR32 const PCRE_UCHAR32 *
|
362
|
+
#endif
|
363
|
+
|
364
|
+
/* When PCRE is compiled as a C++ library, the subject pointer type can be
|
365
|
+
replaced with a custom type. For conventional use, the public interface is a
|
366
|
+
const char *. */
|
367
|
+
|
368
|
+
#ifndef PCRE_SPTR
|
369
|
+
#define PCRE_SPTR const char *
|
370
|
+
#endif
|
371
|
+
|
372
|
+
/* The structure for passing additional data to pcre_exec(). This is defined in
|
373
|
+
such as way as to be extensible. Always add new fields at the end, in order to
|
374
|
+
remain compatible. */
|
375
|
+
|
376
|
+
typedef struct pcre_extra {
|
377
|
+
unsigned long int flags; /* Bits for which fields are set */
|
378
|
+
void *study_data; /* Opaque data from pcre_study() */
|
379
|
+
unsigned long int match_limit; /* Maximum number of calls to match() */
|
380
|
+
void *callout_data; /* Data passed back in callouts */
|
381
|
+
const unsigned char *tables; /* Pointer to character tables */
|
382
|
+
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
383
|
+
unsigned char **mark; /* For passing back a mark pointer */
|
384
|
+
void *executable_jit; /* Contains a pointer to a compiled jit code */
|
385
|
+
} pcre_extra;
|
386
|
+
|
387
|
+
/* Same structure as above, but with 16 bit char pointers. */
|
388
|
+
|
389
|
+
typedef struct pcre16_extra {
|
390
|
+
unsigned long int flags; /* Bits for which fields are set */
|
391
|
+
void *study_data; /* Opaque data from pcre_study() */
|
392
|
+
unsigned long int match_limit; /* Maximum number of calls to match() */
|
393
|
+
void *callout_data; /* Data passed back in callouts */
|
394
|
+
const unsigned char *tables; /* Pointer to character tables */
|
395
|
+
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
396
|
+
PCRE_UCHAR16 **mark; /* For passing back a mark pointer */
|
397
|
+
void *executable_jit; /* Contains a pointer to a compiled jit code */
|
398
|
+
} pcre16_extra;
|
399
|
+
|
400
|
+
/* Same structure as above, but with 32 bit char pointers. */
|
401
|
+
|
402
|
+
typedef struct pcre32_extra {
|
403
|
+
unsigned long int flags; /* Bits for which fields are set */
|
404
|
+
void *study_data; /* Opaque data from pcre_study() */
|
405
|
+
unsigned long int match_limit; /* Maximum number of calls to match() */
|
406
|
+
void *callout_data; /* Data passed back in callouts */
|
407
|
+
const unsigned char *tables; /* Pointer to character tables */
|
408
|
+
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
409
|
+
PCRE_UCHAR32 **mark; /* For passing back a mark pointer */
|
410
|
+
void *executable_jit; /* Contains a pointer to a compiled jit code */
|
411
|
+
} pcre32_extra;
|
412
|
+
|
413
|
+
/* The structure for passing out data via the pcre_callout_function. We use a
|
414
|
+
structure so that new fields can be added on the end in future versions,
|
415
|
+
without changing the API of the function, thereby allowing old clients to work
|
416
|
+
without modification. */
|
417
|
+
|
418
|
+
typedef struct pcre_callout_block {
|
419
|
+
int version; /* Identifies version of block */
|
420
|
+
/* ------------------------ Version 0 ------------------------------- */
|
421
|
+
int callout_number; /* Number compiled into pattern */
|
422
|
+
int *offset_vector; /* The offset vector */
|
423
|
+
PCRE_SPTR subject; /* The subject being matched */
|
424
|
+
int subject_length; /* The length of the subject */
|
425
|
+
int start_match; /* Offset to start of this match attempt */
|
426
|
+
int current_position; /* Where we currently are in the subject */
|
427
|
+
int capture_top; /* Max current capture */
|
428
|
+
int capture_last; /* Most recently closed capture */
|
429
|
+
void *callout_data; /* Data passed in with the call */
|
430
|
+
/* ------------------- Added for Version 1 -------------------------- */
|
431
|
+
int pattern_position; /* Offset to next item in the pattern */
|
432
|
+
int next_item_length; /* Length of next item in the pattern */
|
433
|
+
/* ------------------- Added for Version 2 -------------------------- */
|
434
|
+
const unsigned char *mark; /* Pointer to current mark or NULL */
|
435
|
+
/* ------------------------------------------------------------------ */
|
436
|
+
} pcre_callout_block;
|
437
|
+
|
438
|
+
/* Same structure as above, but with 16 bit char pointers. */
|
439
|
+
|
440
|
+
typedef struct pcre16_callout_block {
|
441
|
+
int version; /* Identifies version of block */
|
442
|
+
/* ------------------------ Version 0 ------------------------------- */
|
443
|
+
int callout_number; /* Number compiled into pattern */
|
444
|
+
int *offset_vector; /* The offset vector */
|
445
|
+
PCRE_SPTR16 subject; /* The subject being matched */
|
446
|
+
int subject_length; /* The length of the subject */
|
447
|
+
int start_match; /* Offset to start of this match attempt */
|
448
|
+
int current_position; /* Where we currently are in the subject */
|
449
|
+
int capture_top; /* Max current capture */
|
450
|
+
int capture_last; /* Most recently closed capture */
|
451
|
+
void *callout_data; /* Data passed in with the call */
|
452
|
+
/* ------------------- Added for Version 1 -------------------------- */
|
453
|
+
int pattern_position; /* Offset to next item in the pattern */
|
454
|
+
int next_item_length; /* Length of next item in the pattern */
|
455
|
+
/* ------------------- Added for Version 2 -------------------------- */
|
456
|
+
const PCRE_UCHAR16 *mark; /* Pointer to current mark or NULL */
|
457
|
+
/* ------------------------------------------------------------------ */
|
458
|
+
} pcre16_callout_block;
|
459
|
+
|
460
|
+
/* Same structure as above, but with 32 bit char pointers. */
|
461
|
+
|
462
|
+
typedef struct pcre32_callout_block {
|
463
|
+
int version; /* Identifies version of block */
|
464
|
+
/* ------------------------ Version 0 ------------------------------- */
|
465
|
+
int callout_number; /* Number compiled into pattern */
|
466
|
+
int *offset_vector; /* The offset vector */
|
467
|
+
PCRE_SPTR32 subject; /* The subject being matched */
|
468
|
+
int subject_length; /* The length of the subject */
|
469
|
+
int start_match; /* Offset to start of this match attempt */
|
470
|
+
int current_position; /* Where we currently are in the subject */
|
471
|
+
int capture_top; /* Max current capture */
|
472
|
+
int capture_last; /* Most recently closed capture */
|
473
|
+
void *callout_data; /* Data passed in with the call */
|
474
|
+
/* ------------------- Added for Version 1 -------------------------- */
|
475
|
+
int pattern_position; /* Offset to next item in the pattern */
|
476
|
+
int next_item_length; /* Length of next item in the pattern */
|
477
|
+
/* ------------------- Added for Version 2 -------------------------- */
|
478
|
+
const PCRE_UCHAR32 *mark; /* Pointer to current mark or NULL */
|
479
|
+
/* ------------------------------------------------------------------ */
|
480
|
+
} pcre32_callout_block;
|
481
|
+
|
482
|
+
/* Indirection for store get and free functions. These can be set to
|
483
|
+
alternative malloc/free functions if required. Special ones are used in the
|
484
|
+
non-recursive case for "frames". There is also an optional callout function
|
485
|
+
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
|
486
|
+
have to take another form. */
|
487
|
+
|
488
|
+
#ifndef VPCOMPAT
|
489
|
+
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
|
490
|
+
PCRE_EXP_DECL void (*pcre_free)(void *);
|
491
|
+
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
|
492
|
+
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
|
493
|
+
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
|
494
|
+
|
495
|
+
PCRE_EXP_DECL void *(*pcre16_malloc)(size_t);
|
496
|
+
PCRE_EXP_DECL void (*pcre16_free)(void *);
|
497
|
+
PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t);
|
498
|
+
PCRE_EXP_DECL void (*pcre16_stack_free)(void *);
|
499
|
+
PCRE_EXP_DECL int (*pcre16_callout)(pcre16_callout_block *);
|
500
|
+
|
501
|
+
PCRE_EXP_DECL void *(*pcre32_malloc)(size_t);
|
502
|
+
PCRE_EXP_DECL void (*pcre32_free)(void *);
|
503
|
+
PCRE_EXP_DECL void *(*pcre32_stack_malloc)(size_t);
|
504
|
+
PCRE_EXP_DECL void (*pcre32_stack_free)(void *);
|
505
|
+
PCRE_EXP_DECL int (*pcre32_callout)(pcre32_callout_block *);
|
506
|
+
#else /* VPCOMPAT */
|
507
|
+
PCRE_EXP_DECL void *pcre_malloc(size_t);
|
508
|
+
PCRE_EXP_DECL void pcre_free(void *);
|
509
|
+
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
|
510
|
+
PCRE_EXP_DECL void pcre_stack_free(void *);
|
511
|
+
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
|
512
|
+
|
513
|
+
PCRE_EXP_DECL void *pcre16_malloc(size_t);
|
514
|
+
PCRE_EXP_DECL void pcre16_free(void *);
|
515
|
+
PCRE_EXP_DECL void *pcre16_stack_malloc(size_t);
|
516
|
+
PCRE_EXP_DECL void pcre16_stack_free(void *);
|
517
|
+
PCRE_EXP_DECL int pcre16_callout(pcre16_callout_block *);
|
518
|
+
|
519
|
+
PCRE_EXP_DECL void *pcre32_malloc(size_t);
|
520
|
+
PCRE_EXP_DECL void pcre32_free(void *);
|
521
|
+
PCRE_EXP_DECL void *pcre32_stack_malloc(size_t);
|
522
|
+
PCRE_EXP_DECL void pcre32_stack_free(void *);
|
523
|
+
PCRE_EXP_DECL int pcre32_callout(pcre32_callout_block *);
|
524
|
+
#endif /* VPCOMPAT */
|
525
|
+
|
526
|
+
/* User defined callback which provides a stack just before the match starts. */
|
527
|
+
|
528
|
+
typedef pcre_jit_stack *(*pcre_jit_callback)(void *);
|
529
|
+
typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *);
|
530
|
+
typedef pcre32_jit_stack *(*pcre32_jit_callback)(void *);
|
531
|
+
|
532
|
+
/* Exported PCRE functions */
|
533
|
+
|
534
|
+
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
|
535
|
+
const unsigned char *);
|
536
|
+
PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *,
|
537
|
+
const unsigned char *);
|
538
|
+
PCRE_EXP_DECL pcre32 *pcre32_compile(PCRE_SPTR32, int, const char **, int *,
|
539
|
+
const unsigned char *);
|
540
|
+
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
|
541
|
+
int *, const unsigned char *);
|
542
|
+
PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **,
|
543
|
+
int *, const unsigned char *);
|
544
|
+
PCRE_EXP_DECL pcre32 *pcre32_compile2(PCRE_SPTR32, int, int *, const char **,
|
545
|
+
int *, const unsigned char *);
|
546
|
+
PCRE_EXP_DECL int pcre_config(int, void *);
|
547
|
+
PCRE_EXP_DECL int pcre16_config(int, void *);
|
548
|
+
PCRE_EXP_DECL int pcre32_config(int, void *);
|
549
|
+
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
|
550
|
+
int *, int, const char *, char *, int);
|
551
|
+
PCRE_EXP_DECL int pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16,
|
552
|
+
int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int);
|
553
|
+
PCRE_EXP_DECL int pcre32_copy_named_substring(const pcre32 *, PCRE_SPTR32,
|
554
|
+
int *, int, PCRE_SPTR32, PCRE_UCHAR32 *, int);
|
555
|
+
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int,
|
556
|
+
char *, int);
|
557
|
+
PCRE_EXP_DECL int pcre16_copy_substring(PCRE_SPTR16, int *, int, int,
|
558
|
+
PCRE_UCHAR16 *, int);
|
559
|
+
PCRE_EXP_DECL int pcre32_copy_substring(PCRE_SPTR32, int *, int, int,
|
560
|
+
PCRE_UCHAR32 *, int);
|
561
|
+
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
562
|
+
const char *, int, int, int, int *, int , int *, int);
|
563
|
+
PCRE_EXP_DECL int pcre16_dfa_exec(const pcre16 *, const pcre16_extra *,
|
564
|
+
PCRE_SPTR16, int, int, int, int *, int , int *, int);
|
565
|
+
PCRE_EXP_DECL int pcre32_dfa_exec(const pcre32 *, const pcre32_extra *,
|
566
|
+
PCRE_SPTR32, int, int, int, int *, int , int *, int);
|
567
|
+
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
568
|
+
int, int, int, int *, int);
|
569
|
+
PCRE_EXP_DECL int pcre16_exec(const pcre16 *, const pcre16_extra *,
|
570
|
+
PCRE_SPTR16, int, int, int, int *, int);
|
571
|
+
PCRE_EXP_DECL int pcre32_exec(const pcre32 *, const pcre32_extra *,
|
572
|
+
PCRE_SPTR32, int, int, int, int *, int);
|
573
|
+
PCRE_EXP_DECL int pcre_jit_exec(const pcre *, const pcre_extra *,
|
574
|
+
PCRE_SPTR, int, int, int, int *, int,
|
575
|
+
pcre_jit_stack *);
|
576
|
+
PCRE_EXP_DECL int pcre16_jit_exec(const pcre16 *, const pcre16_extra *,
|
577
|
+
PCRE_SPTR16, int, int, int, int *, int,
|
578
|
+
pcre16_jit_stack *);
|
579
|
+
PCRE_EXP_DECL int pcre32_jit_exec(const pcre32 *, const pcre32_extra *,
|
580
|
+
PCRE_SPTR32, int, int, int, int *, int,
|
581
|
+
pcre32_jit_stack *);
|
582
|
+
PCRE_EXP_DECL void pcre_free_substring(const char *);
|
583
|
+
PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
|
584
|
+
PCRE_EXP_DECL void pcre32_free_substring(PCRE_SPTR32);
|
585
|
+
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
|
586
|
+
PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *);
|
587
|
+
PCRE_EXP_DECL void pcre32_free_substring_list(PCRE_SPTR32 *);
|
588
|
+
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
589
|
+
void *);
|
590
|
+
PCRE_EXP_DECL int pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int,
|
591
|
+
void *);
|
592
|
+
PCRE_EXP_DECL int pcre32_fullinfo(const pcre32 *, const pcre32_extra *, int,
|
593
|
+
void *);
|
594
|
+
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
|
595
|
+
int *, int, const char *, const char **);
|
596
|
+
PCRE_EXP_DECL int pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16,
|
597
|
+
int *, int, PCRE_SPTR16, PCRE_SPTR16 *);
|
598
|
+
PCRE_EXP_DECL int pcre32_get_named_substring(const pcre32 *, PCRE_SPTR32,
|
599
|
+
int *, int, PCRE_SPTR32, PCRE_SPTR32 *);
|
600
|
+
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
|
601
|
+
PCRE_EXP_DECL int pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16);
|
602
|
+
PCRE_EXP_DECL int pcre32_get_stringnumber(const pcre32 *, PCRE_SPTR32);
|
603
|
+
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
|
604
|
+
char **, char **);
|
605
|
+
PCRE_EXP_DECL int pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16,
|
606
|
+
PCRE_UCHAR16 **, PCRE_UCHAR16 **);
|
607
|
+
PCRE_EXP_DECL int pcre32_get_stringtable_entries(const pcre32 *, PCRE_SPTR32,
|
608
|
+
PCRE_UCHAR32 **, PCRE_UCHAR32 **);
|
609
|
+
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
|
610
|
+
const char **);
|
611
|
+
PCRE_EXP_DECL int pcre16_get_substring(PCRE_SPTR16, int *, int, int,
|
612
|
+
PCRE_SPTR16 *);
|
613
|
+
PCRE_EXP_DECL int pcre32_get_substring(PCRE_SPTR32, int *, int, int,
|
614
|
+
PCRE_SPTR32 *);
|
615
|
+
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
|
616
|
+
const char ***);
|
617
|
+
PCRE_EXP_DECL int pcre16_get_substring_list(PCRE_SPTR16, int *, int,
|
618
|
+
PCRE_SPTR16 **);
|
619
|
+
PCRE_EXP_DECL int pcre32_get_substring_list(PCRE_SPTR32, int *, int,
|
620
|
+
PCRE_SPTR32 **);
|
621
|
+
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
|
622
|
+
PCRE_EXP_DECL const unsigned char *pcre16_maketables(void);
|
623
|
+
PCRE_EXP_DECL const unsigned char *pcre32_maketables(void);
|
624
|
+
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
|
625
|
+
PCRE_EXP_DECL int pcre16_refcount(pcre16 *, int);
|
626
|
+
PCRE_EXP_DECL int pcre32_refcount(pcre32 *, int);
|
627
|
+
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
|
628
|
+
PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **);
|
629
|
+
PCRE_EXP_DECL pcre32_extra *pcre32_study(const pcre32 *, int, const char **);
|
630
|
+
PCRE_EXP_DECL void pcre_free_study(pcre_extra *);
|
631
|
+
PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *);
|
632
|
+
PCRE_EXP_DECL void pcre32_free_study(pcre32_extra *);
|
633
|
+
PCRE_EXP_DECL const char *pcre_version(void);
|
634
|
+
PCRE_EXP_DECL const char *pcre16_version(void);
|
635
|
+
PCRE_EXP_DECL const char *pcre32_version(void);
|
636
|
+
|
637
|
+
/* Utility functions for byte order swaps. */
|
638
|
+
PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *, pcre_extra *,
|
639
|
+
const unsigned char *);
|
640
|
+
PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *, pcre16_extra *,
|
641
|
+
const unsigned char *);
|
642
|
+
PCRE_EXP_DECL int pcre32_pattern_to_host_byte_order(pcre32 *, pcre32_extra *,
|
643
|
+
const unsigned char *);
|
644
|
+
PCRE_EXP_DECL int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *,
|
645
|
+
PCRE_SPTR16, int, int *, int);
|
646
|
+
PCRE_EXP_DECL int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *,
|
647
|
+
PCRE_SPTR32, int, int *, int);
|
648
|
+
|
649
|
+
/* JIT compiler related functions. */
|
650
|
+
|
651
|
+
PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int);
|
652
|
+
PCRE_EXP_DECL pcre16_jit_stack *pcre16_jit_stack_alloc(int, int);
|
653
|
+
PCRE_EXP_DECL pcre32_jit_stack *pcre32_jit_stack_alloc(int, int);
|
654
|
+
PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *);
|
655
|
+
PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *);
|
656
|
+
PCRE_EXP_DECL void pcre32_jit_stack_free(pcre32_jit_stack *);
|
657
|
+
PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *,
|
658
|
+
pcre_jit_callback, void *);
|
659
|
+
PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *,
|
660
|
+
pcre16_jit_callback, void *);
|
661
|
+
PCRE_EXP_DECL void pcre32_assign_jit_stack(pcre32_extra *,
|
662
|
+
pcre32_jit_callback, void *);
|
663
|
+
|
664
|
+
#ifdef __cplusplus
|
665
|
+
} /* extern "C" */
|
666
|
+
#endif
|
667
|
+
|
668
|
+
#endif /* End of pcre.h */
|