regextest 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +25 -0
- data/README.md +88 -0
- data/Rakefile +55 -0
- data/bin/console +14 -0
- data/bin/regextest +4 -0
- data/bin/setup +7 -0
- data/contrib/Onigmo/RE.txt +522 -0
- data/contrib/Onigmo/UnicodeProps.txt +728 -0
- data/contrib/Onigmo/testpy.py +1319 -0
- data/contrib/unicode/Blocks.txt +298 -0
- data/contrib/unicode/CaseFolding.txt +1414 -0
- data/contrib/unicode/DerivedAge.txt +1538 -0
- data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
- data/contrib/unicode/PropList.txt +1525 -0
- data/contrib/unicode/PropertyAliases.txt +193 -0
- data/contrib/unicode/PropertyValueAliases.txt +1420 -0
- data/contrib/unicode/README.txt +25 -0
- data/contrib/unicode/Scripts.txt +2539 -0
- data/contrib/unicode/UnicodeData.txt +29215 -0
- data/lib/pre-case-folding.rb +101 -0
- data/lib/pre-posix-char-class.rb +150 -0
- data/lib/pre-unicode.rb +116 -0
- data/lib/regextest.rb +268 -0
- data/lib/regextest/back.rb +58 -0
- data/lib/regextest/back/element.rb +151 -0
- data/lib/regextest/back/main.rb +356 -0
- data/lib/regextest/back/result.rb +498 -0
- data/lib/regextest/back/test-case.rb +268 -0
- data/lib/regextest/back/work-thread.rb +119 -0
- data/lib/regextest/common.rb +63 -0
- data/lib/regextest/front.rb +60 -0
- data/lib/regextest/front/anchor.rb +45 -0
- data/lib/regextest/front/back-refer.rb +120 -0
- data/lib/regextest/front/bracket-parser.rb +400 -0
- data/lib/regextest/front/bracket-parser.y +117 -0
- data/lib/regextest/front/bracket-scanner.rb +124 -0
- data/lib/regextest/front/bracket.rb +64 -0
- data/lib/regextest/front/builtin-functions.rb +31 -0
- data/lib/regextest/front/case-folding.rb +18 -0
- data/lib/regextest/front/char-class.rb +243 -0
- data/lib/regextest/front/empty.rb +43 -0
- data/lib/regextest/front/letter.rb +327 -0
- data/lib/regextest/front/manage-parentheses.rb +74 -0
- data/lib/regextest/front/parenthesis.rb +153 -0
- data/lib/regextest/front/parser.rb +1366 -0
- data/lib/regextest/front/parser.y +271 -0
- data/lib/regextest/front/range.rb +60 -0
- data/lib/regextest/front/repeat.rb +90 -0
- data/lib/regextest/front/repeatable.rb +77 -0
- data/lib/regextest/front/scanner.rb +187 -0
- data/lib/regextest/front/selectable.rb +65 -0
- data/lib/regextest/front/sequence.rb +73 -0
- data/lib/regextest/front/unicode.rb +1272 -0
- data/lib/regextest/regex-option.rb +144 -0
- data/lib/regextest/regexp.rb +44 -0
- data/lib/regextest/version.rb +5 -0
- data/lib/tst-reg-test.rb +159 -0
- data/regextest.gemspec +26 -0
- metadata +162 -0
@@ -0,0 +1,101 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
require "pp"
|
6
|
+
|
7
|
+
# A script for generating case-folding of Unicode
|
8
|
+
# This uses tables of Unicode.org, i.e.
|
9
|
+
|
10
|
+
class RegextestPreCaseFolding
|
11
|
+
def self.generate(input_file, output_file)
|
12
|
+
# Get valid casefoldings from unicode table
|
13
|
+
case_foldings = read_unicode_case_folding("./contrib/unicode/CaseFolding.txt")
|
14
|
+
puts_unicode_case_folding('lib/regextest/front/case-folding.rb', case_foldings)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Get list of case-folding pairs from Unicode.org table
|
18
|
+
def self.read_unicode_case_folding(file)
|
19
|
+
case_foldings = {}
|
20
|
+
read_unicode_line(file) do | line |
|
21
|
+
if md = line.match(/^(\h{4,6});\s*([CFST]); ([ \h]+);/)
|
22
|
+
code_point = md[1].to_i(16)
|
23
|
+
mapping = md[3].split(" ").map{|elem| elem.to_i(16)}
|
24
|
+
code_point_string = [code_point].pack("U*")
|
25
|
+
mapping_string = mapping.map{|elem| [elem].pack("U*")}.join("")
|
26
|
+
if /(?ai:#{code_point_string})/.match(mapping_string)
|
27
|
+
case_foldings[[code_point]] ||= []
|
28
|
+
case_foldings[[code_point]].push mapping
|
29
|
+
case_foldings[mapping] ||= []
|
30
|
+
case_foldings[mapping].push [code_point]
|
31
|
+
else
|
32
|
+
# puts "code=#{code_point_string}, map=#{mapping_string}"
|
33
|
+
end
|
34
|
+
else
|
35
|
+
raise "not matched line: #{line}"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
# case_foldings.each do | key, value |
|
39
|
+
# value.each do | elem |
|
40
|
+
# puts "#{key.pack("U*")} #{key}: #{elem.pack("U*") } #{elem}"
|
41
|
+
# end
|
42
|
+
# end
|
43
|
+
case_foldings
|
44
|
+
end
|
45
|
+
|
46
|
+
# common process for parsing tables of Unicode.org
|
47
|
+
def self.read_unicode_line(file)
|
48
|
+
content = open(file, 'r:BOM|UTF-8') {|f| f.read} # ignore BOM header
|
49
|
+
content.split(/\r?\n/).each do | line |
|
50
|
+
next if(line.length == 0 || line[0..0] == '#')
|
51
|
+
yield(line)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# puts source to unicode.rb
|
56
|
+
def self.puts_unicode_case_folding(case_folding_file, case_folding)
|
57
|
+
|
58
|
+
template =<<" END_OF_TEMPLATE"
|
59
|
+
# encoding: utf-8
|
60
|
+
# DO NOT Modify This File Since Automatically Generated
|
61
|
+
|
62
|
+
# Range of Unicode
|
63
|
+
class Regextest::Front::CaseFolding
|
64
|
+
# return case foldings
|
65
|
+
def self.ignore_case(letter_array)
|
66
|
+
CASE_FOLDING_HASH[letter_array]
|
67
|
+
end
|
68
|
+
|
69
|
+
# case folding hash [codepoint] => [[mapping_1], ...]
|
70
|
+
CASE_FOLDING_HASH =
|
71
|
+
#{case_folding.inspect}
|
72
|
+
end
|
73
|
+
|
74
|
+
# Test suite (execute when this file is specified in command line)
|
75
|
+
if __FILE__ == $0
|
76
|
+
end
|
77
|
+
END_OF_TEMPLATE
|
78
|
+
template.gsub!(/^ /, "")
|
79
|
+
File.open(case_folding_file, "w") do |fp|
|
80
|
+
fp.puts template
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
input_file = "./contrib/unicode/CaseFolding.txt"
|
87
|
+
output_file = "./lib/regextest/front/case-folding.rb"
|
88
|
+
|
89
|
+
|
90
|
+
RegextestPreCaseFolding.generate(input_file, output_file)
|
91
|
+
|
92
|
+
# test code
|
93
|
+
require "regextest"
|
94
|
+
require "#{output_file}"
|
95
|
+
|
96
|
+
if Regextest::Front::CaseFolding.ignore_case([65]) == [[97]]
|
97
|
+
puts "OK"
|
98
|
+
else
|
99
|
+
puts "NG"
|
100
|
+
exit(1)
|
101
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
# CURRENTLY NOT USED
|
6
|
+
|
7
|
+
require "pp"
|
8
|
+
|
9
|
+
# 鬼雲のマニュアルからPOSIX文字クラスの定義を得る
|
10
|
+
def get_onigmo_posix_char_class(file, hash)
|
11
|
+
content = IO.read(file)
|
12
|
+
if(!md = content.match(/\r?\n\d\.\s+Character\s+class.+? Unicode Case:(.+?)\r?\n\r?\n\r?\n/m))
|
13
|
+
raise "#{file} format is unmatched"
|
14
|
+
end
|
15
|
+
posix_def = md[1]
|
16
|
+
posix_def.gsub!(/\r?\n+/m, "\n")
|
17
|
+
posix_def.gsub!(/\|\r?\n/m, "|")
|
18
|
+
posix_def.split(/\r?\n/).each do | line |
|
19
|
+
elems = line.split(/\s+/)
|
20
|
+
if(elems[1] && elems[1].match(/^\w+$/) && elems[2])
|
21
|
+
raise "Duplicated symbol #{elems[1]}" if hash[elems[1]]
|
22
|
+
hash[elems[1]] = elems[2..-1].join("")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# 鬼雲のマニュアルからUnicode文字クラスの一覧を得る
|
28
|
+
def get_onigmo_unicode_propety_class(file, hash)
|
29
|
+
content = IO.read(file)
|
30
|
+
class_name = nil
|
31
|
+
content.split(/\r?\n/).each do | line |
|
32
|
+
if(line[0..0] == "*")
|
33
|
+
class_name = line[2..-1].gsub(/\W+/, "_")
|
34
|
+
class_name.chop! if(class_name[-1..-1] == "_")
|
35
|
+
next
|
36
|
+
end
|
37
|
+
next if(!class_name || line.length == 0)
|
38
|
+
prop_name = line.gsub(/^\s+/, "")
|
39
|
+
raise "Duplicated symbol #{prop_name}" if hash[prop_name]
|
40
|
+
hash[prop_name] = class_name.to_sym
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
hash = {}
|
45
|
+
get_onigmo_posix_char_class("../contrib/onigmo/RE.txt", hash)
|
46
|
+
# get_onigmo_unicode_propety_class("../contrib/onigmo/UnicodeProps.txt", hash)
|
47
|
+
pp hash
|
48
|
+
exit
|
49
|
+
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
# Unicode定義ファイルの共通文法の処理
|
55
|
+
def read_unicode_line(file)
|
56
|
+
content = IO.read(file)
|
57
|
+
content.split(/\r?\n/).each do | line |
|
58
|
+
next if(line.length == 0 || line[0..0] == '#')
|
59
|
+
yield(line)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# スクリプトファイルの読み込み
|
64
|
+
def read_scripts(scripts_file, ranges)
|
65
|
+
read_unicode_line(scripts_file) do | line |
|
66
|
+
if(md = line.match(/^(\h{4,6})(?:\.\.(\h{4,6}))?\s+;\s+(\w+)\s+#\s+(\S+)\s+/))
|
67
|
+
range_start = md[1].hex
|
68
|
+
range_end = (md[2])?(md[2].hex):(range_start)
|
69
|
+
|
70
|
+
script1 = md[3]
|
71
|
+
script2 = md[4]
|
72
|
+
script2 = "LC" if(script2 == "L&")
|
73
|
+
script3 = script2[0..0]
|
74
|
+
|
75
|
+
#puts "range: [#{range_start}:#{range_end}]\t#{script1}\t#{script2}"
|
76
|
+
[script1, script2, script3].each do | script |
|
77
|
+
if(ranges[script])
|
78
|
+
if(range_start == ranges[script][-1][1] + 1)
|
79
|
+
ranges[script][-1][1] = range_end
|
80
|
+
else
|
81
|
+
ranges[script].push [range_start, range_end]
|
82
|
+
end
|
83
|
+
else
|
84
|
+
ranges[script] = [[range_start, range_end]]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
else
|
89
|
+
raise "syntax error: #{line}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# ブロックファイルの読み込み
|
95
|
+
def read_blocks(blocks_file, ranges)
|
96
|
+
read_unicode_line(blocks_file) do | line |
|
97
|
+
if(md = line.match(/^(\h{4,6})\.\.(\h{4,6})\s*;\s+(.+)$/))
|
98
|
+
range_start = md[1].hex
|
99
|
+
range_end = md[2].hex
|
100
|
+
block_name = "In_" + md[3].gsub(/\W/, "_")
|
101
|
+
if ranges[block_name]
|
102
|
+
raise "block name #{block_name} is already used"
|
103
|
+
else
|
104
|
+
ranges[block_name] = [[range_start, range_end]]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Unicodeのスクリプト、ブロックに対応したTRangeのRubyソースの出力
|
111
|
+
def puts_unicode_ranges(unicode_file, ranges)
|
112
|
+
ranges_source = ranges.keys.map { |class_name|
|
113
|
+
(" "*12) +
|
114
|
+
"hash[\"#{class_name}\"] = CharClass.new([" +
|
115
|
+
( ranges[class_name].map{|range| "TRange.new(#{range[0]}, #{range[1]})"}.join(", ") ) +
|
116
|
+
"])"
|
117
|
+
}.join("\n")
|
118
|
+
|
119
|
+
template =<<" END_OF_TEMPLATE"
|
120
|
+
# encoding: utf-8
|
121
|
+
# DO NOT Modify This File Since Automatically Generated
|
122
|
+
|
123
|
+
# Unicodeのレンジ
|
124
|
+
module Regextest::Front::ParseUnicode
|
125
|
+
class Unicode
|
126
|
+
# ハッシュの生成
|
127
|
+
def self.ranges()
|
128
|
+
hash = {}
|
129
|
+
#{ranges_source}
|
130
|
+
hash
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Test suite (execute when this file is specified in command line)
|
136
|
+
if __FILE__ == $0
|
137
|
+
end
|
138
|
+
END_OF_TEMPLATE
|
139
|
+
template.gsub!(/^ /, "")
|
140
|
+
File.open(unicode_file, "w") do |fp|
|
141
|
+
fp.puts template
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
ranges = {}
|
147
|
+
read_scripts("./unicode/Scripts.txt", ranges)
|
148
|
+
read_blocks("./unicode/Blocks.txt", ranges)
|
149
|
+
puts_unicode_ranges('tst-reg-parse-unicode', ranges)
|
150
|
+
# pp ranges
|
data/lib/pre-unicode.rb
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
require "pp"
|
6
|
+
|
7
|
+
# A script for generating character class of Unicode
|
8
|
+
# It does not use tables of Unicode.org,
|
9
|
+
# but use result of Ruby Regexp execution
|
10
|
+
|
11
|
+
class RegextestPreUnicode
|
12
|
+
def self.generate
|
13
|
+
# Get valid properties of Ruby
|
14
|
+
onig_properties = read_onig_properties("./contrib/Onigmo/UnicodeProps.txt")
|
15
|
+
ranges = get_ranges_of_properties(onig_properties)
|
16
|
+
puts_unicode_ranges('lib/regextest/front/unicode.rb', ranges)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Get list of Unicode classes from Onigmo manual
|
20
|
+
def self.read_onig_properties(file)
|
21
|
+
content = IO.read(file)
|
22
|
+
class_name = nil
|
23
|
+
properties = {}
|
24
|
+
content.split(/\r?\n/).each_with_index do | line, i |
|
25
|
+
# Type or property
|
26
|
+
if(line[0..0] == "*")
|
27
|
+
class_name = line[2..-1].gsub(/\W+/, "_")
|
28
|
+
class_name.chop! if(class_name[-1..-1] == "_")
|
29
|
+
next
|
30
|
+
end
|
31
|
+
next if(!class_name || line.length == 0)
|
32
|
+
prop_name = line.gsub(/^\s+/, "").downcase
|
33
|
+
raise "Duplicated symbol #{prop_name}" if properties[prop_name]
|
34
|
+
begin
|
35
|
+
properties[prop_name] = { class: class_name, reg: /\p{#{prop_name}}+/ , ranges: []}
|
36
|
+
rescue RegexpError
|
37
|
+
# Somehow some property name fails. ignore as for now
|
38
|
+
warn "Regexp error at /\\p{#{prop_name}}/"
|
39
|
+
end
|
40
|
+
|
41
|
+
# for debugging
|
42
|
+
# break if(i > 10)
|
43
|
+
end
|
44
|
+
properties
|
45
|
+
end
|
46
|
+
|
47
|
+
# output ruby source (using TRange) corresponding to scripts/blocks of Unicode
|
48
|
+
def self.get_ranges_of_properties(properties)
|
49
|
+
puts "\nGenerating Unicode table. It takes 1-2 minutes."
|
50
|
+
ranges = {}
|
51
|
+
|
52
|
+
# form whole letter to array, then join all letters
|
53
|
+
# (concatinating string cause performance problem)
|
54
|
+
whole_letters_array = []
|
55
|
+
0.step(0x10ffff).each do | codepoint |
|
56
|
+
# skip surrogate part
|
57
|
+
next if (codepoint >= 0xd800 && codepoint <= 0xdfff)
|
58
|
+
whole_letters_array.push [codepoint].pack("U*")
|
59
|
+
end
|
60
|
+
whole_letters = whole_letters_array.join("")
|
61
|
+
|
62
|
+
# scan string generated for each class
|
63
|
+
properties.each do | prop_name, value |
|
64
|
+
whole_letters.scan(value[:reg]) do | matched |
|
65
|
+
|
66
|
+
value[:ranges].push (matched[0].unpack("U*")[0]..matched[-1].unpack("U*")[0])
|
67
|
+
end
|
68
|
+
# puts "#{prop_name}: #{value}"
|
69
|
+
ranges[prop_name] = value[:ranges]
|
70
|
+
end
|
71
|
+
ranges
|
72
|
+
end
|
73
|
+
|
74
|
+
# puts source to unicode.rb
|
75
|
+
def self.puts_unicode_ranges(unicode_file, ranges)
|
76
|
+
ranges_source = ranges.keys.map { |prop_name|
|
77
|
+
(" "*14) + "when \"#{prop_name}\"\n" +
|
78
|
+
(" "*16) + "([" +
|
79
|
+
( ranges[prop_name].map{|range| "[#{range.begin}, #{range.end}]"}.join(", ") ) +
|
80
|
+
"])"
|
81
|
+
}.join("\n")
|
82
|
+
|
83
|
+
template =<<" END_OF_TEMPLATE"
|
84
|
+
# encoding: utf-8
|
85
|
+
# DO NOT Modify This File Since Automatically Generated
|
86
|
+
|
87
|
+
# Range of Unicode
|
88
|
+
class Regextest::Front::Unicode
|
89
|
+
# Generate hash of properties
|
90
|
+
def self.property(class_name)
|
91
|
+
case class_name.downcase
|
92
|
+
#{ranges_source}
|
93
|
+
else
|
94
|
+
raise "Internal error. Class name (#\{class_name\}) not found"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# enumerate char-set
|
99
|
+
def self.enumerate(class_name)
|
100
|
+
self.property(class_name).inject([]){|result,elem| result += (elem[0]..elem[1]).to_a}
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# Test suite (execute when this file is specified in command line)
|
105
|
+
if __FILE__ == $0
|
106
|
+
end
|
107
|
+
END_OF_TEMPLATE
|
108
|
+
template.gsub!(/^ /, "")
|
109
|
+
File.open(unicode_file, "w") do |fp|
|
110
|
+
fp.puts template
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
RegextestPreUnicode.generate
|
data/lib/regextest.rb
ADDED
@@ -0,0 +1,268 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
# This routine defines Regextest class
|
6
|
+
class Regextest; end
|
7
|
+
|
8
|
+
# Required classes
|
9
|
+
require 'regextest/version'
|
10
|
+
require 'regextest/common'
|
11
|
+
require 'regextest/front'
|
12
|
+
require 'regextest/regex-option'
|
13
|
+
require 'regextest/back'
|
14
|
+
require 'regextest/regexp'
|
15
|
+
require 'timeout'
|
16
|
+
|
17
|
+
class Regextest
|
18
|
+
include Regextest::Common
|
19
|
+
|
20
|
+
# exceptions
|
21
|
+
class RegextestError < RuntimeError; end
|
22
|
+
class RegextestFailedToGenerate < RuntimeError; end
|
23
|
+
class RegextestTimeout < RuntimeError; end
|
24
|
+
|
25
|
+
# Constructor of Regextest class
|
26
|
+
# @param [String|Regexp] regex regular expression object (or string)
|
27
|
+
# @param [Hash] options parameters for generating
|
28
|
+
# @option options [Regextest::RegexOption] :reg_options Regex option parameter
|
29
|
+
# @option options [Fixnum] :seed seed for randomization
|
30
|
+
# @option options [TrueClass] :verification specify true (or not speficy) to verify generated string using ruby Regexp.
|
31
|
+
# @option options [FalseClass] :verification specify false if skip to verify generated string.
|
32
|
+
# @return [Regextest] constructed object
|
33
|
+
def initialize(regex, options = {})
|
34
|
+
@@parse_options = options
|
35
|
+
@@parse_options[:reg_options] ||= Regextest::RegexOption.new
|
36
|
+
@verification = (options && options[:verification] == false)?false:true
|
37
|
+
@reg_string = nil
|
38
|
+
@reg_exp = nil
|
39
|
+
|
40
|
+
# Set seed for randomizing
|
41
|
+
@seed = set_seed_for_randomizing(@@parse_options[:seed])
|
42
|
+
|
43
|
+
# Covert to source string if necessary
|
44
|
+
set_regex(regex)
|
45
|
+
|
46
|
+
# Parse string
|
47
|
+
@front_end = Regextest::Front.new(@reg_string, @@parse_options)
|
48
|
+
|
49
|
+
# To json (use json format for backend)
|
50
|
+
@json_obj = @front_end.get_json_obj
|
51
|
+
|
52
|
+
# Prepare back-end process. (use generate method for generating string)
|
53
|
+
@back_end = Regextest::Back.new(@json_obj)
|
54
|
+
|
55
|
+
@result = nil
|
56
|
+
@reason = nil
|
57
|
+
end
|
58
|
+
|
59
|
+
# @!attribute [r] reason
|
60
|
+
# Reason if failed to generate
|
61
|
+
# @return [hash] return reasons if failed to generate
|
62
|
+
# @return [nil] return nil unless error
|
63
|
+
attr_reader :reason
|
64
|
+
|
65
|
+
# @!attribute [r] seed
|
66
|
+
# Seed for randomization
|
67
|
+
# @return [Fixnum] return seed for randomization
|
68
|
+
# @return [nil] return nil if no seed provided
|
69
|
+
attr_reader :seed
|
70
|
+
|
71
|
+
# Genetate string matched with specified regular expression
|
72
|
+
# @return [MatchData] if matched and verified.
|
73
|
+
# @return [String] if matched without verification (i.e. return unverified matched string).
|
74
|
+
# @return [nil] nil if failed to generate
|
75
|
+
# @raise [RuntimeError] if something wrong...
|
76
|
+
# @raise [Regextest::RegextestTimeout] if detected timeout while verification. Option 'verification: false' may be workaround.
|
77
|
+
def generate
|
78
|
+
TstConstRetryMax.times do
|
79
|
+
|
80
|
+
# generate string
|
81
|
+
reset_random_called
|
82
|
+
@result = @back_end.generate
|
83
|
+
if !@result
|
84
|
+
TstLog "NG: Failed to generate"
|
85
|
+
@reason = :failed_to_generate
|
86
|
+
if !is_random?
|
87
|
+
raise(RegextestError, "It is impossible to generate sample string of #{@reg_string}.")
|
88
|
+
end
|
89
|
+
next
|
90
|
+
end
|
91
|
+
|
92
|
+
result_string = @result.pre_match + @result.match + @result.post_match
|
93
|
+
|
94
|
+
# verify generated string
|
95
|
+
if @verification
|
96
|
+
@result = verify(result_string) # returns a match-object
|
97
|
+
if !@result
|
98
|
+
TstLog "NG: Failed to verify"
|
99
|
+
@reason = :failed_to_verify
|
100
|
+
next
|
101
|
+
end
|
102
|
+
# break if @result is verified
|
103
|
+
else
|
104
|
+
@result = result_string # returns a string
|
105
|
+
end
|
106
|
+
break
|
107
|
+
end
|
108
|
+
|
109
|
+
if !@result
|
110
|
+
raise(RegextestFailedToGenerate, "Regextest failed to generate sample string of #{@reg_string}.")
|
111
|
+
end
|
112
|
+
@result
|
113
|
+
end
|
114
|
+
|
115
|
+
# Get parsed result as JSON string
|
116
|
+
# @return [String] parsed result as JSON string
|
117
|
+
def to_json
|
118
|
+
@front_end.get_json_string
|
119
|
+
end
|
120
|
+
#---------------#
|
121
|
+
private
|
122
|
+
|
123
|
+
# Set seed for randomizing
|
124
|
+
def set_seed_for_randomizing(seed)
|
125
|
+
if seed
|
126
|
+
raise "Invalid seed (#{seed}: #{seed.class}) specified" if !(Integer === seed)
|
127
|
+
srand seed
|
128
|
+
seed
|
129
|
+
else
|
130
|
+
srand # return preset seed
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# Covert to source string if necessary
|
135
|
+
def set_regex(param)
|
136
|
+
case param
|
137
|
+
when String
|
138
|
+
if md = param.match(/^\/(.*)\/([imx]*)$/)
|
139
|
+
@reg_exp = eval(param)
|
140
|
+
@reg_string = @reg_exp.source
|
141
|
+
else
|
142
|
+
new_param = check_builtin(param)
|
143
|
+
@reg_string = new_param
|
144
|
+
@reg_exp = /#{@reg_string}/
|
145
|
+
end
|
146
|
+
@@parse_options[:reg_source] = @reg_string
|
147
|
+
when Regexp
|
148
|
+
@reg_exp = param
|
149
|
+
@@parse_options[:reg_options].set(@reg_exp.options) # inner regex options have priorty
|
150
|
+
@reg_string = @reg_exp.source
|
151
|
+
else
|
152
|
+
raise "Error: string or regular expression required"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# add built-in functions if any
|
157
|
+
def check_builtin(param)
|
158
|
+
builtin_functions = {}
|
159
|
+
param.scan(/\\g[\<\'](_\w+_)[\>\']/) do | func_name |
|
160
|
+
builtin_functions[func_name[0]] = true
|
161
|
+
end
|
162
|
+
if builtin_functions.keys.size > 0
|
163
|
+
require 'regextest/front/builtin-functions'
|
164
|
+
functions = Regextest::Front::BuiltinFunctions.new
|
165
|
+
builtin_functions.keys.each do | func_name |
|
166
|
+
if func_string = functions.find_func(func_name)
|
167
|
+
param = param + func_string
|
168
|
+
else
|
169
|
+
raise "invalid built-in function name (#{func_name})"
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
param
|
174
|
+
end
|
175
|
+
|
176
|
+
# Verifies the result
|
177
|
+
def verify(result_string)
|
178
|
+
md = nil
|
179
|
+
begin
|
180
|
+
timeout(TstConstTimeout){
|
181
|
+
md = @reg_exp.match(result_string)
|
182
|
+
}
|
183
|
+
rescue Timeout::Error => ex
|
184
|
+
raise(RegextestTimeout,
|
185
|
+
"Timeout(#{TstConstTimeout} sec) detected while verifying string(#{result_string}) matched with regex(#{@reg_exp}).")
|
186
|
+
end
|
187
|
+
|
188
|
+
if(md)
|
189
|
+
# matched string sometime differs from expected one...
|
190
|
+
if(md.pre_match != @result.pre_match ||
|
191
|
+
md.to_a[0] != @result.match ||
|
192
|
+
md.post_match != @result.post_match)
|
193
|
+
@reason = :invalid_match_string
|
194
|
+
TstLog "WARN: Invalid matched string, expected <--> actual"
|
195
|
+
TstLog " proc: #{md.pre_match.inspect} <--> #{@result.pre_match.inspect}"
|
196
|
+
TstLog " body: #{md.to_a[0].inspect} <--> #{@result.match.inspect}"
|
197
|
+
TstLog " succ: #{md.post_match.inspect} <--> #{@result.post_match.inspect}"
|
198
|
+
end
|
199
|
+
else
|
200
|
+
@reason = { rc: :not_matched, string: result_string}
|
201
|
+
raise("failed to generate. Not matched regex(#{@reg_string}) string(#{result_string.inspect})")
|
202
|
+
end
|
203
|
+
md
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
# Test program
|
208
|
+
if __FILE__ == $0
|
209
|
+
# ruby regextest.rb 'regular-expression' => regular-expression
|
210
|
+
# ruby regextest.rb '[ab]' => a
|
211
|
+
include Regextest::Common
|
212
|
+
|
213
|
+
begin
|
214
|
+
|
215
|
+
regex = ARGV[0] || $<
|
216
|
+
if(regex == "reg")
|
217
|
+
regex = /ab # comment
|
218
|
+
[a-z]{5,10}
|
219
|
+
cd /ix
|
220
|
+
end
|
221
|
+
if(regex == "reg2")
|
222
|
+
regex = %r(
|
223
|
+
(?<name> [a-zA-Z_:]+ ){0}
|
224
|
+
(?<stag> < \g<name> > ){0}
|
225
|
+
(?<content> ||\w+|\w+|\w+ (\g<element> | \w+)* ){0}
|
226
|
+
(?<etag> </ \k<name+1> >){0}
|
227
|
+
(?<element> \g<stag> \g<content>* \g<etag> ){0}
|
228
|
+
\g<element>
|
229
|
+
)x
|
230
|
+
end
|
231
|
+
|
232
|
+
begin
|
233
|
+
if ARGV[1]
|
234
|
+
reg = eval "/#{regex}/#{ARGV[1]}"
|
235
|
+
else
|
236
|
+
reg = regex
|
237
|
+
end
|
238
|
+
rescue SyntaxError => ex
|
239
|
+
warn "Ruby Regexp: Syntax error: " + ex.message
|
240
|
+
reg = regex
|
241
|
+
end
|
242
|
+
|
243
|
+
prog = Regextest.new(reg)
|
244
|
+
|
245
|
+
10.times do
|
246
|
+
if(md = prog.generate)
|
247
|
+
puts " " + TstMdPrint(md) # md.string.inspect
|
248
|
+
else
|
249
|
+
puts "Failed to generate regex(#{reg})"
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
rescue RegexpError => ex
|
254
|
+
$stderr.puts "Parse error. #{ex.message}"
|
255
|
+
exit(1)
|
256
|
+
|
257
|
+
rescue Regextest::RegextestTimeout => ex
|
258
|
+
$stderr.puts ex.message
|
259
|
+
exit(1)
|
260
|
+
|
261
|
+
rescue RuntimeError => ex
|
262
|
+
# Error process. put error message and exit
|
263
|
+
$stderr.puts ex.message
|
264
|
+
exit(1)
|
265
|
+
end
|
266
|
+
|
267
|
+
end
|
268
|
+
|