regextest 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +3 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +25 -0
  7. data/README.md +88 -0
  8. data/Rakefile +55 -0
  9. data/bin/console +14 -0
  10. data/bin/regextest +4 -0
  11. data/bin/setup +7 -0
  12. data/contrib/Onigmo/RE.txt +522 -0
  13. data/contrib/Onigmo/UnicodeProps.txt +728 -0
  14. data/contrib/Onigmo/testpy.py +1319 -0
  15. data/contrib/unicode/Blocks.txt +298 -0
  16. data/contrib/unicode/CaseFolding.txt +1414 -0
  17. data/contrib/unicode/DerivedAge.txt +1538 -0
  18. data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
  19. data/contrib/unicode/PropList.txt +1525 -0
  20. data/contrib/unicode/PropertyAliases.txt +193 -0
  21. data/contrib/unicode/PropertyValueAliases.txt +1420 -0
  22. data/contrib/unicode/README.txt +25 -0
  23. data/contrib/unicode/Scripts.txt +2539 -0
  24. data/contrib/unicode/UnicodeData.txt +29215 -0
  25. data/lib/pre-case-folding.rb +101 -0
  26. data/lib/pre-posix-char-class.rb +150 -0
  27. data/lib/pre-unicode.rb +116 -0
  28. data/lib/regextest.rb +268 -0
  29. data/lib/regextest/back.rb +58 -0
  30. data/lib/regextest/back/element.rb +151 -0
  31. data/lib/regextest/back/main.rb +356 -0
  32. data/lib/regextest/back/result.rb +498 -0
  33. data/lib/regextest/back/test-case.rb +268 -0
  34. data/lib/regextest/back/work-thread.rb +119 -0
  35. data/lib/regextest/common.rb +63 -0
  36. data/lib/regextest/front.rb +60 -0
  37. data/lib/regextest/front/anchor.rb +45 -0
  38. data/lib/regextest/front/back-refer.rb +120 -0
  39. data/lib/regextest/front/bracket-parser.rb +400 -0
  40. data/lib/regextest/front/bracket-parser.y +117 -0
  41. data/lib/regextest/front/bracket-scanner.rb +124 -0
  42. data/lib/regextest/front/bracket.rb +64 -0
  43. data/lib/regextest/front/builtin-functions.rb +31 -0
  44. data/lib/regextest/front/case-folding.rb +18 -0
  45. data/lib/regextest/front/char-class.rb +243 -0
  46. data/lib/regextest/front/empty.rb +43 -0
  47. data/lib/regextest/front/letter.rb +327 -0
  48. data/lib/regextest/front/manage-parentheses.rb +74 -0
  49. data/lib/regextest/front/parenthesis.rb +153 -0
  50. data/lib/regextest/front/parser.rb +1366 -0
  51. data/lib/regextest/front/parser.y +271 -0
  52. data/lib/regextest/front/range.rb +60 -0
  53. data/lib/regextest/front/repeat.rb +90 -0
  54. data/lib/regextest/front/repeatable.rb +77 -0
  55. data/lib/regextest/front/scanner.rb +187 -0
  56. data/lib/regextest/front/selectable.rb +65 -0
  57. data/lib/regextest/front/sequence.rb +73 -0
  58. data/lib/regextest/front/unicode.rb +1272 -0
  59. data/lib/regextest/regex-option.rb +144 -0
  60. data/lib/regextest/regexp.rb +44 -0
  61. data/lib/regextest/version.rb +5 -0
  62. data/lib/tst-reg-test.rb +159 -0
  63. data/regextest.gemspec +26 -0
  64. metadata +162 -0
@@ -0,0 +1,101 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ require "pp"
6
+
7
+ # A script for generating case-folding of Unicode
8
+ # This uses tables of Unicode.org, i.e.
9
+
10
+ class RegextestPreCaseFolding
11
+ def self.generate(input_file, output_file)
12
+ # Get valid casefoldings from unicode table
13
+ case_foldings = read_unicode_case_folding("./contrib/unicode/CaseFolding.txt")
14
+ puts_unicode_case_folding('lib/regextest/front/case-folding.rb', case_foldings)
15
+ end
16
+
17
+ # Get list of case-folding pairs from Unicode.org table
18
+ def self.read_unicode_case_folding(file)
19
+ case_foldings = {}
20
+ read_unicode_line(file) do | line |
21
+ if md = line.match(/^(\h{4,6});\s*([CFST]); ([ \h]+);/)
22
+ code_point = md[1].to_i(16)
23
+ mapping = md[3].split(" ").map{|elem| elem.to_i(16)}
24
+ code_point_string = [code_point].pack("U*")
25
+ mapping_string = mapping.map{|elem| [elem].pack("U*")}.join("")
26
+ if /(?ai:#{code_point_string})/.match(mapping_string)
27
+ case_foldings[[code_point]] ||= []
28
+ case_foldings[[code_point]].push mapping
29
+ case_foldings[mapping] ||= []
30
+ case_foldings[mapping].push [code_point]
31
+ else
32
+ # puts "code=#{code_point_string}, map=#{mapping_string}"
33
+ end
34
+ else
35
+ raise "not matched line: #{line}"
36
+ end
37
+ end
38
+ # case_foldings.each do | key, value |
39
+ # value.each do | elem |
40
+ # puts "#{key.pack("U*")} #{key}: #{elem.pack("U*") } #{elem}"
41
+ # end
42
+ # end
43
+ case_foldings
44
+ end
45
+
46
+ # common process for parsing tables of Unicode.org
47
+ def self.read_unicode_line(file)
48
+ content = open(file, 'r:BOM|UTF-8') {|f| f.read} # ignore BOM header
49
+ content.split(/\r?\n/).each do | line |
50
+ next if(line.length == 0 || line[0..0] == '#')
51
+ yield(line)
52
+ end
53
+ end
54
+
55
+ # puts source to unicode.rb
56
+ def self.puts_unicode_case_folding(case_folding_file, case_folding)
57
+
58
+ template =<<" END_OF_TEMPLATE"
59
+ # encoding: utf-8
60
+ # DO NOT Modify This File Since Automatically Generated
61
+
62
+ # Range of Unicode
63
+ class Regextest::Front::CaseFolding
64
+ # return case foldings
65
+ def self.ignore_case(letter_array)
66
+ CASE_FOLDING_HASH[letter_array]
67
+ end
68
+
69
+ # case folding hash [codepoint] => [[mapping_1], ...]
70
+ CASE_FOLDING_HASH =
71
+ #{case_folding.inspect}
72
+ end
73
+
74
+ # Test suite (execute when this file is specified in command line)
75
+ if __FILE__ == $0
76
+ end
77
+ END_OF_TEMPLATE
78
+ template.gsub!(/^ /, "")
79
+ File.open(case_folding_file, "w") do |fp|
80
+ fp.puts template
81
+ end
82
+
83
+ end
84
+ end
85
+
86
+ input_file = "./contrib/unicode/CaseFolding.txt"
87
+ output_file = "./lib/regextest/front/case-folding.rb"
88
+
89
+
90
+ RegextestPreCaseFolding.generate(input_file, output_file)
91
+
92
+ # test code
93
+ require "regextest"
94
+ require "#{output_file}"
95
+
96
+ if Regextest::Front::CaseFolding.ignore_case([65]) == [[97]]
97
+ puts "OK"
98
+ else
99
+ puts "NG"
100
+ exit(1)
101
+ end
@@ -0,0 +1,150 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ # CURRENTLY NOT USED
6
+
7
+ require "pp"
8
+
9
+ # 鬼雲のマニュアルからPOSIX文字クラスの定義を得る
10
+ def get_onigmo_posix_char_class(file, hash)
11
+ content = IO.read(file)
12
+ if(!md = content.match(/\r?\n\d\.\s+Character\s+class.+? Unicode Case:(.+?)\r?\n\r?\n\r?\n/m))
13
+ raise "#{file} format is unmatched"
14
+ end
15
+ posix_def = md[1]
16
+ posix_def.gsub!(/\r?\n+/m, "\n")
17
+ posix_def.gsub!(/\|\r?\n/m, "|")
18
+ posix_def.split(/\r?\n/).each do | line |
19
+ elems = line.split(/\s+/)
20
+ if(elems[1] && elems[1].match(/^\w+$/) && elems[2])
21
+ raise "Duplicated symbol #{elems[1]}" if hash[elems[1]]
22
+ hash[elems[1]] = elems[2..-1].join("")
23
+ end
24
+ end
25
+ end
26
+
27
+ # 鬼雲のマニュアルからUnicode文字クラスの一覧を得る
28
+ def get_onigmo_unicode_propety_class(file, hash)
29
+ content = IO.read(file)
30
+ class_name = nil
31
+ content.split(/\r?\n/).each do | line |
32
+ if(line[0..0] == "*")
33
+ class_name = line[2..-1].gsub(/\W+/, "_")
34
+ class_name.chop! if(class_name[-1..-1] == "_")
35
+ next
36
+ end
37
+ next if(!class_name || line.length == 0)
38
+ prop_name = line.gsub(/^\s+/, "")
39
+ raise "Duplicated symbol #{prop_name}" if hash[prop_name]
40
+ hash[prop_name] = class_name.to_sym
41
+ end
42
+ end
43
+
44
+ hash = {}
45
+ get_onigmo_posix_char_class("../contrib/onigmo/RE.txt", hash)
46
+ # get_onigmo_unicode_propety_class("../contrib/onigmo/UnicodeProps.txt", hash)
47
+ pp hash
48
+ exit
49
+
50
+
51
+
52
+
53
+
54
+ # Unicode定義ファイルの共通文法の処理
55
+ def read_unicode_line(file)
56
+ content = IO.read(file)
57
+ content.split(/\r?\n/).each do | line |
58
+ next if(line.length == 0 || line[0..0] == '#')
59
+ yield(line)
60
+ end
61
+ end
62
+
63
+ # スクリプトファイルの読み込み
64
+ def read_scripts(scripts_file, ranges)
65
+ read_unicode_line(scripts_file) do | line |
66
+ if(md = line.match(/^(\h{4,6})(?:\.\.(\h{4,6}))?\s+;\s+(\w+)\s+#\s+(\S+)\s+/))
67
+ range_start = md[1].hex
68
+ range_end = (md[2])?(md[2].hex):(range_start)
69
+
70
+ script1 = md[3]
71
+ script2 = md[4]
72
+ script2 = "LC" if(script2 == "L&")
73
+ script3 = script2[0..0]
74
+
75
+ #puts "range: [#{range_start}:#{range_end}]\t#{script1}\t#{script2}"
76
+ [script1, script2, script3].each do | script |
77
+ if(ranges[script])
78
+ if(range_start == ranges[script][-1][1] + 1)
79
+ ranges[script][-1][1] = range_end
80
+ else
81
+ ranges[script].push [range_start, range_end]
82
+ end
83
+ else
84
+ ranges[script] = [[range_start, range_end]]
85
+ end
86
+ end
87
+
88
+ else
89
+ raise "syntax error: #{line}"
90
+ end
91
+ end
92
+ end
93
+
94
+ # ブロックファイルの読み込み
95
+ def read_blocks(blocks_file, ranges)
96
+ read_unicode_line(blocks_file) do | line |
97
+ if(md = line.match(/^(\h{4,6})\.\.(\h{4,6})\s*;\s+(.+)$/))
98
+ range_start = md[1].hex
99
+ range_end = md[2].hex
100
+ block_name = "In_" + md[3].gsub(/\W/, "_")
101
+ if ranges[block_name]
102
+ raise "block name #{block_name} is already used"
103
+ else
104
+ ranges[block_name] = [[range_start, range_end]]
105
+ end
106
+ end
107
+ end
108
+ end
109
+
110
+ # Unicodeのスクリプト、ブロックに対応したTRangeのRubyソースの出力
111
+ def puts_unicode_ranges(unicode_file, ranges)
112
+ ranges_source = ranges.keys.map { |class_name|
113
+ (" "*12) +
114
+ "hash[\"#{class_name}\"] = CharClass.new([" +
115
+ ( ranges[class_name].map{|range| "TRange.new(#{range[0]}, #{range[1]})"}.join(", ") ) +
116
+ "])"
117
+ }.join("\n")
118
+
119
+ template =<<" END_OF_TEMPLATE"
120
+ # encoding: utf-8
121
+ # DO NOT Modify This File Since Automatically Generated
122
+
123
+ # Unicodeのレンジ
124
+ module Regextest::Front::ParseUnicode
125
+ class Unicode
126
+ # ハッシュの生成
127
+ def self.ranges()
128
+ hash = {}
129
+ #{ranges_source}
130
+ hash
131
+ end
132
+ end
133
+ end
134
+
135
+ # Test suite (execute when this file is specified in command line)
136
+ if __FILE__ == $0
137
+ end
138
+ END_OF_TEMPLATE
139
+ template.gsub!(/^ /, "")
140
+ File.open(unicode_file, "w") do |fp|
141
+ fp.puts template
142
+ end
143
+
144
+ end
145
+
146
+ ranges = {}
147
+ read_scripts("./unicode/Scripts.txt", ranges)
148
+ read_blocks("./unicode/Blocks.txt", ranges)
149
+ puts_unicode_ranges('tst-reg-parse-unicode', ranges)
150
+ # pp ranges
@@ -0,0 +1,116 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ require "pp"
6
+
7
+ # A script for generating character class of Unicode
8
+ # It does not use tables of Unicode.org,
9
+ # but use result of Ruby Regexp execution
10
+
11
+ class RegextestPreUnicode
12
+ def self.generate
13
+ # Get valid properties of Ruby
14
+ onig_properties = read_onig_properties("./contrib/Onigmo/UnicodeProps.txt")
15
+ ranges = get_ranges_of_properties(onig_properties)
16
+ puts_unicode_ranges('lib/regextest/front/unicode.rb', ranges)
17
+ end
18
+
19
+ # Get list of Unicode classes from Onigmo manual
20
+ def self.read_onig_properties(file)
21
+ content = IO.read(file)
22
+ class_name = nil
23
+ properties = {}
24
+ content.split(/\r?\n/).each_with_index do | line, i |
25
+ # Type or property
26
+ if(line[0..0] == "*")
27
+ class_name = line[2..-1].gsub(/\W+/, "_")
28
+ class_name.chop! if(class_name[-1..-1] == "_")
29
+ next
30
+ end
31
+ next if(!class_name || line.length == 0)
32
+ prop_name = line.gsub(/^\s+/, "").downcase
33
+ raise "Duplicated symbol #{prop_name}" if properties[prop_name]
34
+ begin
35
+ properties[prop_name] = { class: class_name, reg: /\p{#{prop_name}}+/ , ranges: []}
36
+ rescue RegexpError
37
+ # Somehow some property name fails. ignore as for now
38
+ warn "Regexp error at /\\p{#{prop_name}}/"
39
+ end
40
+
41
+ # for debugging
42
+ # break if(i > 10)
43
+ end
44
+ properties
45
+ end
46
+
47
+ # output ruby source (using TRange) corresponding to scripts/blocks of Unicode
48
+ def self.get_ranges_of_properties(properties)
49
+ puts "\nGenerating Unicode table. It takes 1-2 minutes."
50
+ ranges = {}
51
+
52
+ # form whole letter to array, then join all letters
53
+ # (concatinating string cause performance problem)
54
+ whole_letters_array = []
55
+ 0.step(0x10ffff).each do | codepoint |
56
+ # skip surrogate part
57
+ next if (codepoint >= 0xd800 && codepoint <= 0xdfff)
58
+ whole_letters_array.push [codepoint].pack("U*")
59
+ end
60
+ whole_letters = whole_letters_array.join("")
61
+
62
+ # scan string generated for each class
63
+ properties.each do | prop_name, value |
64
+ whole_letters.scan(value[:reg]) do | matched |
65
+
66
+ value[:ranges].push (matched[0].unpack("U*")[0]..matched[-1].unpack("U*")[0])
67
+ end
68
+ # puts "#{prop_name}: #{value}"
69
+ ranges[prop_name] = value[:ranges]
70
+ end
71
+ ranges
72
+ end
73
+
74
+ # puts source to unicode.rb
75
+ def self.puts_unicode_ranges(unicode_file, ranges)
76
+ ranges_source = ranges.keys.map { |prop_name|
77
+ (" "*14) + "when \"#{prop_name}\"\n" +
78
+ (" "*16) + "([" +
79
+ ( ranges[prop_name].map{|range| "[#{range.begin}, #{range.end}]"}.join(", ") ) +
80
+ "])"
81
+ }.join("\n")
82
+
83
+ template =<<" END_OF_TEMPLATE"
84
+ # encoding: utf-8
85
+ # DO NOT Modify This File Since Automatically Generated
86
+
87
+ # Range of Unicode
88
+ class Regextest::Front::Unicode
89
+ # Generate hash of properties
90
+ def self.property(class_name)
91
+ case class_name.downcase
92
+ #{ranges_source}
93
+ else
94
+ raise "Internal error. Class name (#\{class_name\}) not found"
95
+ end
96
+ end
97
+
98
+ # enumerate char-set
99
+ def self.enumerate(class_name)
100
+ self.property(class_name).inject([]){|result,elem| result += (elem[0]..elem[1]).to_a}
101
+ end
102
+ end
103
+
104
+ # Test suite (execute when this file is specified in command line)
105
+ if __FILE__ == $0
106
+ end
107
+ END_OF_TEMPLATE
108
+ template.gsub!(/^ /, "")
109
+ File.open(unicode_file, "w") do |fp|
110
+ fp.puts template
111
+ end
112
+
113
+ end
114
+ end
115
+
116
+ RegextestPreUnicode.generate
@@ -0,0 +1,268 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ # This routine defines Regextest class
6
+ class Regextest; end
7
+
8
+ # Required classes
9
+ require 'regextest/version'
10
+ require 'regextest/common'
11
+ require 'regextest/front'
12
+ require 'regextest/regex-option'
13
+ require 'regextest/back'
14
+ require 'regextest/regexp'
15
+ require 'timeout'
16
+
17
+ class Regextest
18
+ include Regextest::Common
19
+
20
+ # exceptions
21
+ class RegextestError < RuntimeError; end
22
+ class RegextestFailedToGenerate < RuntimeError; end
23
+ class RegextestTimeout < RuntimeError; end
24
+
25
+ # Constructor of Regextest class
26
+ # @param [String|Regexp] regex regular expression object (or string)
27
+ # @param [Hash] options parameters for generating
28
+ # @option options [Regextest::RegexOption] :reg_options Regex option parameter
29
+ # @option options [Fixnum] :seed seed for randomization
30
+ # @option options [TrueClass] :verification specify true (or not speficy) to verify generated string using ruby Regexp.
31
+ # @option options [FalseClass] :verification specify false if skip to verify generated string.
32
+ # @return [Regextest] constructed object
33
+ def initialize(regex, options = {})
34
+ @@parse_options = options
35
+ @@parse_options[:reg_options] ||= Regextest::RegexOption.new
36
+ @verification = (options && options[:verification] == false)?false:true
37
+ @reg_string = nil
38
+ @reg_exp = nil
39
+
40
+ # Set seed for randomizing
41
+ @seed = set_seed_for_randomizing(@@parse_options[:seed])
42
+
43
+ # Covert to source string if necessary
44
+ set_regex(regex)
45
+
46
+ # Parse string
47
+ @front_end = Regextest::Front.new(@reg_string, @@parse_options)
48
+
49
+ # To json (use json format for backend)
50
+ @json_obj = @front_end.get_json_obj
51
+
52
+ # Prepare back-end process. (use generate method for generating string)
53
+ @back_end = Regextest::Back.new(@json_obj)
54
+
55
+ @result = nil
56
+ @reason = nil
57
+ end
58
+
59
+ # @!attribute [r] reason
60
+ # Reason if failed to generate
61
+ # @return [hash] return reasons if failed to generate
62
+ # @return [nil] return nil unless error
63
+ attr_reader :reason
64
+
65
+ # @!attribute [r] seed
66
+ # Seed for randomization
67
+ # @return [Fixnum] return seed for randomization
68
+ # @return [nil] return nil if no seed provided
69
+ attr_reader :seed
70
+
71
+ # Genetate string matched with specified regular expression
72
+ # @return [MatchData] if matched and verified.
73
+ # @return [String] if matched without verification (i.e. return unverified matched string).
74
+ # @return [nil] nil if failed to generate
75
+ # @raise [RuntimeError] if something wrong...
76
+ # @raise [Regextest::RegextestTimeout] if detected timeout while verification. Option 'verification: false' may be workaround.
77
+ def generate
78
+ TstConstRetryMax.times do
79
+
80
+ # generate string
81
+ reset_random_called
82
+ @result = @back_end.generate
83
+ if !@result
84
+ TstLog "NG: Failed to generate"
85
+ @reason = :failed_to_generate
86
+ if !is_random?
87
+ raise(RegextestError, "It is impossible to generate sample string of #{@reg_string}.")
88
+ end
89
+ next
90
+ end
91
+
92
+ result_string = @result.pre_match + @result.match + @result.post_match
93
+
94
+ # verify generated string
95
+ if @verification
96
+ @result = verify(result_string) # returns a match-object
97
+ if !@result
98
+ TstLog "NG: Failed to verify"
99
+ @reason = :failed_to_verify
100
+ next
101
+ end
102
+ # break if @result is verified
103
+ else
104
+ @result = result_string # returns a string
105
+ end
106
+ break
107
+ end
108
+
109
+ if !@result
110
+ raise(RegextestFailedToGenerate, "Regextest failed to generate sample string of #{@reg_string}.")
111
+ end
112
+ @result
113
+ end
114
+
115
+ # Get parsed result as JSON string
116
+ # @return [String] parsed result as JSON string
117
+ def to_json
118
+ @front_end.get_json_string
119
+ end
120
+ #---------------#
121
+ private
122
+
123
+ # Set seed for randomizing
124
+ def set_seed_for_randomizing(seed)
125
+ if seed
126
+ raise "Invalid seed (#{seed}: #{seed.class}) specified" if !(Integer === seed)
127
+ srand seed
128
+ seed
129
+ else
130
+ srand # return preset seed
131
+ end
132
+ end
133
+
134
+ # Covert to source string if necessary
135
+ def set_regex(param)
136
+ case param
137
+ when String
138
+ if md = param.match(/^\/(.*)\/([imx]*)$/)
139
+ @reg_exp = eval(param)
140
+ @reg_string = @reg_exp.source
141
+ else
142
+ new_param = check_builtin(param)
143
+ @reg_string = new_param
144
+ @reg_exp = /#{@reg_string}/
145
+ end
146
+ @@parse_options[:reg_source] = @reg_string
147
+ when Regexp
148
+ @reg_exp = param
149
+ @@parse_options[:reg_options].set(@reg_exp.options) # inner regex options have priorty
150
+ @reg_string = @reg_exp.source
151
+ else
152
+ raise "Error: string or regular expression required"
153
+ end
154
+ end
155
+
156
+ # add built-in functions if any
157
+ def check_builtin(param)
158
+ builtin_functions = {}
159
+ param.scan(/\\g[\<\'](_\w+_)[\>\']/) do | func_name |
160
+ builtin_functions[func_name[0]] = true
161
+ end
162
+ if builtin_functions.keys.size > 0
163
+ require 'regextest/front/builtin-functions'
164
+ functions = Regextest::Front::BuiltinFunctions.new
165
+ builtin_functions.keys.each do | func_name |
166
+ if func_string = functions.find_func(func_name)
167
+ param = param + func_string
168
+ else
169
+ raise "invalid built-in function name (#{func_name})"
170
+ end
171
+ end
172
+ end
173
+ param
174
+ end
175
+
176
+ # Verifies the result
177
+ def verify(result_string)
178
+ md = nil
179
+ begin
180
+ timeout(TstConstTimeout){
181
+ md = @reg_exp.match(result_string)
182
+ }
183
+ rescue Timeout::Error => ex
184
+ raise(RegextestTimeout,
185
+ "Timeout(#{TstConstTimeout} sec) detected while verifying string(#{result_string}) matched with regex(#{@reg_exp}).")
186
+ end
187
+
188
+ if(md)
189
+ # matched string sometime differs from expected one...
190
+ if(md.pre_match != @result.pre_match ||
191
+ md.to_a[0] != @result.match ||
192
+ md.post_match != @result.post_match)
193
+ @reason = :invalid_match_string
194
+ TstLog "WARN: Invalid matched string, expected <--> actual"
195
+ TstLog " proc: #{md.pre_match.inspect} <--> #{@result.pre_match.inspect}"
196
+ TstLog " body: #{md.to_a[0].inspect} <--> #{@result.match.inspect}"
197
+ TstLog " succ: #{md.post_match.inspect} <--> #{@result.post_match.inspect}"
198
+ end
199
+ else
200
+ @reason = { rc: :not_matched, string: result_string}
201
+ raise("failed to generate. Not matched regex(#{@reg_string}) string(#{result_string.inspect})")
202
+ end
203
+ md
204
+ end
205
+ end
206
+
207
+ # Test program
208
+ if __FILE__ == $0
209
+ # ruby regextest.rb 'regular-expression' => regular-expression
210
+ # ruby regextest.rb '[ab]' => a
211
+ include Regextest::Common
212
+
213
+ begin
214
+
215
+ regex = ARGV[0] || $<
216
+ if(regex == "reg")
217
+ regex = /ab # comment
218
+ [a-z]{5,10}
219
+ cd /ix
220
+ end
221
+ if(regex == "reg2")
222
+ regex = %r(
223
+ (?<name> [a-zA-Z_:]+ ){0}
224
+ (?<stag> < \g<name> > ){0}
225
+ (?<content> ||\w+|\w+|\w+ (\g<element> | \w+)* ){0}
226
+ (?<etag> </ \k<name+1> >){0}
227
+ (?<element> \g<stag> \g<content>* \g<etag> ){0}
228
+ \g<element>
229
+ )x
230
+ end
231
+
232
+ begin
233
+ if ARGV[1]
234
+ reg = eval "/#{regex}/#{ARGV[1]}"
235
+ else
236
+ reg = regex
237
+ end
238
+ rescue SyntaxError => ex
239
+ warn "Ruby Regexp: Syntax error: " + ex.message
240
+ reg = regex
241
+ end
242
+
243
+ prog = Regextest.new(reg)
244
+
245
+ 10.times do
246
+ if(md = prog.generate)
247
+ puts " " + TstMdPrint(md) # md.string.inspect
248
+ else
249
+ puts "Failed to generate regex(#{reg})"
250
+ end
251
+ end
252
+
253
+ rescue RegexpError => ex
254
+ $stderr.puts "Parse error. #{ex.message}"
255
+ exit(1)
256
+
257
+ rescue Regextest::RegextestTimeout => ex
258
+ $stderr.puts ex.message
259
+ exit(1)
260
+
261
+ rescue RuntimeError => ex
262
+ # Error process. put error message and exit
263
+ $stderr.puts ex.message
264
+ exit(1)
265
+ end
266
+
267
+ end
268
+