regextest 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +3 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +25 -0
  7. data/README.md +88 -0
  8. data/Rakefile +55 -0
  9. data/bin/console +14 -0
  10. data/bin/regextest +4 -0
  11. data/bin/setup +7 -0
  12. data/contrib/Onigmo/RE.txt +522 -0
  13. data/contrib/Onigmo/UnicodeProps.txt +728 -0
  14. data/contrib/Onigmo/testpy.py +1319 -0
  15. data/contrib/unicode/Blocks.txt +298 -0
  16. data/contrib/unicode/CaseFolding.txt +1414 -0
  17. data/contrib/unicode/DerivedAge.txt +1538 -0
  18. data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
  19. data/contrib/unicode/PropList.txt +1525 -0
  20. data/contrib/unicode/PropertyAliases.txt +193 -0
  21. data/contrib/unicode/PropertyValueAliases.txt +1420 -0
  22. data/contrib/unicode/README.txt +25 -0
  23. data/contrib/unicode/Scripts.txt +2539 -0
  24. data/contrib/unicode/UnicodeData.txt +29215 -0
  25. data/lib/pre-case-folding.rb +101 -0
  26. data/lib/pre-posix-char-class.rb +150 -0
  27. data/lib/pre-unicode.rb +116 -0
  28. data/lib/regextest.rb +268 -0
  29. data/lib/regextest/back.rb +58 -0
  30. data/lib/regextest/back/element.rb +151 -0
  31. data/lib/regextest/back/main.rb +356 -0
  32. data/lib/regextest/back/result.rb +498 -0
  33. data/lib/regextest/back/test-case.rb +268 -0
  34. data/lib/regextest/back/work-thread.rb +119 -0
  35. data/lib/regextest/common.rb +63 -0
  36. data/lib/regextest/front.rb +60 -0
  37. data/lib/regextest/front/anchor.rb +45 -0
  38. data/lib/regextest/front/back-refer.rb +120 -0
  39. data/lib/regextest/front/bracket-parser.rb +400 -0
  40. data/lib/regextest/front/bracket-parser.y +117 -0
  41. data/lib/regextest/front/bracket-scanner.rb +124 -0
  42. data/lib/regextest/front/bracket.rb +64 -0
  43. data/lib/regextest/front/builtin-functions.rb +31 -0
  44. data/lib/regextest/front/case-folding.rb +18 -0
  45. data/lib/regextest/front/char-class.rb +243 -0
  46. data/lib/regextest/front/empty.rb +43 -0
  47. data/lib/regextest/front/letter.rb +327 -0
  48. data/lib/regextest/front/manage-parentheses.rb +74 -0
  49. data/lib/regextest/front/parenthesis.rb +153 -0
  50. data/lib/regextest/front/parser.rb +1366 -0
  51. data/lib/regextest/front/parser.y +271 -0
  52. data/lib/regextest/front/range.rb +60 -0
  53. data/lib/regextest/front/repeat.rb +90 -0
  54. data/lib/regextest/front/repeatable.rb +77 -0
  55. data/lib/regextest/front/scanner.rb +187 -0
  56. data/lib/regextest/front/selectable.rb +65 -0
  57. data/lib/regextest/front/sequence.rb +73 -0
  58. data/lib/regextest/front/unicode.rb +1272 -0
  59. data/lib/regextest/regex-option.rb +144 -0
  60. data/lib/regextest/regexp.rb +44 -0
  61. data/lib/regextest/version.rb +5 -0
  62. data/lib/tst-reg-test.rb +159 -0
  63. data/regextest.gemspec +26 -0
  64. metadata +162 -0
@@ -0,0 +1,101 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ require "pp"
6
+
7
+ # A script for generating case-folding of Unicode
8
+ # This uses tables of Unicode.org, i.e.
9
+
10
+ class RegextestPreCaseFolding
11
+ def self.generate(input_file, output_file)
12
+ # Get valid casefoldings from unicode table
13
+ case_foldings = read_unicode_case_folding("./contrib/unicode/CaseFolding.txt")
14
+ puts_unicode_case_folding('lib/regextest/front/case-folding.rb', case_foldings)
15
+ end
16
+
17
+ # Get list of case-folding pairs from Unicode.org table
18
+ def self.read_unicode_case_folding(file)
19
+ case_foldings = {}
20
+ read_unicode_line(file) do | line |
21
+ if md = line.match(/^(\h{4,6});\s*([CFST]); ([ \h]+);/)
22
+ code_point = md[1].to_i(16)
23
+ mapping = md[3].split(" ").map{|elem| elem.to_i(16)}
24
+ code_point_string = [code_point].pack("U*")
25
+ mapping_string = mapping.map{|elem| [elem].pack("U*")}.join("")
26
+ if /(?ai:#{code_point_string})/.match(mapping_string)
27
+ case_foldings[[code_point]] ||= []
28
+ case_foldings[[code_point]].push mapping
29
+ case_foldings[mapping] ||= []
30
+ case_foldings[mapping].push [code_point]
31
+ else
32
+ # puts "code=#{code_point_string}, map=#{mapping_string}"
33
+ end
34
+ else
35
+ raise "not matched line: #{line}"
36
+ end
37
+ end
38
+ # case_foldings.each do | key, value |
39
+ # value.each do | elem |
40
+ # puts "#{key.pack("U*")} #{key}: #{elem.pack("U*") } #{elem}"
41
+ # end
42
+ # end
43
+ case_foldings
44
+ end
45
+
46
+ # common process for parsing tables of Unicode.org
47
+ def self.read_unicode_line(file)
48
+ content = open(file, 'r:BOM|UTF-8') {|f| f.read} # ignore BOM header
49
+ content.split(/\r?\n/).each do | line |
50
+ next if(line.length == 0 || line[0..0] == '#')
51
+ yield(line)
52
+ end
53
+ end
54
+
55
+ # puts source to unicode.rb
56
+ def self.puts_unicode_case_folding(case_folding_file, case_folding)
57
+
58
+ template =<<" END_OF_TEMPLATE"
59
+ # encoding: utf-8
60
+ # DO NOT Modify This File Since Automatically Generated
61
+
62
+ # Range of Unicode
63
+ class Regextest::Front::CaseFolding
64
+ # return case foldings
65
+ def self.ignore_case(letter_array)
66
+ CASE_FOLDING_HASH[letter_array]
67
+ end
68
+
69
+ # case folding hash [codepoint] => [[mapping_1], ...]
70
+ CASE_FOLDING_HASH =
71
+ #{case_folding.inspect}
72
+ end
73
+
74
+ # Test suite (execute when this file is specified in command line)
75
+ if __FILE__ == $0
76
+ end
77
+ END_OF_TEMPLATE
78
+ template.gsub!(/^ /, "")
79
+ File.open(case_folding_file, "w") do |fp|
80
+ fp.puts template
81
+ end
82
+
83
+ end
84
+ end
85
+
86
+ input_file = "./contrib/unicode/CaseFolding.txt"
87
+ output_file = "./lib/regextest/front/case-folding.rb"
88
+
89
+
90
+ RegextestPreCaseFolding.generate(input_file, output_file)
91
+
92
+ # test code
93
+ require "regextest"
94
+ require "#{output_file}"
95
+
96
+ if Regextest::Front::CaseFolding.ignore_case([65]) == [[97]]
97
+ puts "OK"
98
+ else
99
+ puts "NG"
100
+ exit(1)
101
+ end
@@ -0,0 +1,150 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ # CURRENTLY NOT USED
6
+
7
+ require "pp"
8
+
9
+ # 鬼雲のマニュアルからPOSIX文字クラスの定義を得る
10
+ def get_onigmo_posix_char_class(file, hash)
11
+ content = IO.read(file)
12
+ if(!md = content.match(/\r?\n\d\.\s+Character\s+class.+? Unicode Case:(.+?)\r?\n\r?\n\r?\n/m))
13
+ raise "#{file} format is unmatched"
14
+ end
15
+ posix_def = md[1]
16
+ posix_def.gsub!(/\r?\n+/m, "\n")
17
+ posix_def.gsub!(/\|\r?\n/m, "|")
18
+ posix_def.split(/\r?\n/).each do | line |
19
+ elems = line.split(/\s+/)
20
+ if(elems[1] && elems[1].match(/^\w+$/) && elems[2])
21
+ raise "Duplicated symbol #{elems[1]}" if hash[elems[1]]
22
+ hash[elems[1]] = elems[2..-1].join("")
23
+ end
24
+ end
25
+ end
26
+
27
+ # 鬼雲のマニュアルからUnicode文字クラスの一覧を得る
28
+ def get_onigmo_unicode_propety_class(file, hash)
29
+ content = IO.read(file)
30
+ class_name = nil
31
+ content.split(/\r?\n/).each do | line |
32
+ if(line[0..0] == "*")
33
+ class_name = line[2..-1].gsub(/\W+/, "_")
34
+ class_name.chop! if(class_name[-1..-1] == "_")
35
+ next
36
+ end
37
+ next if(!class_name || line.length == 0)
38
+ prop_name = line.gsub(/^\s+/, "")
39
+ raise "Duplicated symbol #{prop_name}" if hash[prop_name]
40
+ hash[prop_name] = class_name.to_sym
41
+ end
42
+ end
43
+
44
+ hash = {}
45
+ get_onigmo_posix_char_class("../contrib/onigmo/RE.txt", hash)
46
+ # get_onigmo_unicode_propety_class("../contrib/onigmo/UnicodeProps.txt", hash)
47
+ pp hash
48
+ exit
49
+
50
+
51
+
52
+
53
+
54
+ # Unicode定義ファイルの共通文法の処理
55
+ def read_unicode_line(file)
56
+ content = IO.read(file)
57
+ content.split(/\r?\n/).each do | line |
58
+ next if(line.length == 0 || line[0..0] == '#')
59
+ yield(line)
60
+ end
61
+ end
62
+
63
+ # スクリプトファイルの読み込み
64
+ def read_scripts(scripts_file, ranges)
65
+ read_unicode_line(scripts_file) do | line |
66
+ if(md = line.match(/^(\h{4,6})(?:\.\.(\h{4,6}))?\s+;\s+(\w+)\s+#\s+(\S+)\s+/))
67
+ range_start = md[1].hex
68
+ range_end = (md[2])?(md[2].hex):(range_start)
69
+
70
+ script1 = md[3]
71
+ script2 = md[4]
72
+ script2 = "LC" if(script2 == "L&")
73
+ script3 = script2[0..0]
74
+
75
+ #puts "range: [#{range_start}:#{range_end}]\t#{script1}\t#{script2}"
76
+ [script1, script2, script3].each do | script |
77
+ if(ranges[script])
78
+ if(range_start == ranges[script][-1][1] + 1)
79
+ ranges[script][-1][1] = range_end
80
+ else
81
+ ranges[script].push [range_start, range_end]
82
+ end
83
+ else
84
+ ranges[script] = [[range_start, range_end]]
85
+ end
86
+ end
87
+
88
+ else
89
+ raise "syntax error: #{line}"
90
+ end
91
+ end
92
+ end
93
+
94
+ # ブロックファイルの読み込み
95
+ def read_blocks(blocks_file, ranges)
96
+ read_unicode_line(blocks_file) do | line |
97
+ if(md = line.match(/^(\h{4,6})\.\.(\h{4,6})\s*;\s+(.+)$/))
98
+ range_start = md[1].hex
99
+ range_end = md[2].hex
100
+ block_name = "In_" + md[3].gsub(/\W/, "_")
101
+ if ranges[block_name]
102
+ raise "block name #{block_name} is already used"
103
+ else
104
+ ranges[block_name] = [[range_start, range_end]]
105
+ end
106
+ end
107
+ end
108
+ end
109
+
110
+ # Unicodeのスクリプト、ブロックに対応したTRangeのRubyソースの出力
111
+ def puts_unicode_ranges(unicode_file, ranges)
112
+ ranges_source = ranges.keys.map { |class_name|
113
+ (" "*12) +
114
+ "hash[\"#{class_name}\"] = CharClass.new([" +
115
+ ( ranges[class_name].map{|range| "TRange.new(#{range[0]}, #{range[1]})"}.join(", ") ) +
116
+ "])"
117
+ }.join("\n")
118
+
119
+ template =<<" END_OF_TEMPLATE"
120
+ # encoding: utf-8
121
+ # DO NOT Modify This File Since Automatically Generated
122
+
123
+ # Unicodeのレンジ
124
+ module Regextest::Front::ParseUnicode
125
+ class Unicode
126
+ # ハッシュの生成
127
+ def self.ranges()
128
+ hash = {}
129
+ #{ranges_source}
130
+ hash
131
+ end
132
+ end
133
+ end
134
+
135
+ # Test suite (execute when this file is specified in command line)
136
+ if __FILE__ == $0
137
+ end
138
+ END_OF_TEMPLATE
139
+ template.gsub!(/^ /, "")
140
+ File.open(unicode_file, "w") do |fp|
141
+ fp.puts template
142
+ end
143
+
144
+ end
145
+
146
+ ranges = {}
147
+ read_scripts("./unicode/Scripts.txt", ranges)
148
+ read_blocks("./unicode/Blocks.txt", ranges)
149
+ puts_unicode_ranges('tst-reg-parse-unicode', ranges)
150
+ # pp ranges
@@ -0,0 +1,116 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ require "pp"
6
+
7
+ # A script for generating character class of Unicode
8
+ # It does not use tables of Unicode.org,
9
+ # but use result of Ruby Regexp execution
10
+
11
+ class RegextestPreUnicode
12
+ def self.generate
13
+ # Get valid properties of Ruby
14
+ onig_properties = read_onig_properties("./contrib/Onigmo/UnicodeProps.txt")
15
+ ranges = get_ranges_of_properties(onig_properties)
16
+ puts_unicode_ranges('lib/regextest/front/unicode.rb', ranges)
17
+ end
18
+
19
+ # Get list of Unicode classes from Onigmo manual
20
+ def self.read_onig_properties(file)
21
+ content = IO.read(file)
22
+ class_name = nil
23
+ properties = {}
24
+ content.split(/\r?\n/).each_with_index do | line, i |
25
+ # Type or property
26
+ if(line[0..0] == "*")
27
+ class_name = line[2..-1].gsub(/\W+/, "_")
28
+ class_name.chop! if(class_name[-1..-1] == "_")
29
+ next
30
+ end
31
+ next if(!class_name || line.length == 0)
32
+ prop_name = line.gsub(/^\s+/, "").downcase
33
+ raise "Duplicated symbol #{prop_name}" if properties[prop_name]
34
+ begin
35
+ properties[prop_name] = { class: class_name, reg: /\p{#{prop_name}}+/ , ranges: []}
36
+ rescue RegexpError
37
+ # Somehow some property name fails. ignore as for now
38
+ warn "Regexp error at /\\p{#{prop_name}}/"
39
+ end
40
+
41
+ # for debugging
42
+ # break if(i > 10)
43
+ end
44
+ properties
45
+ end
46
+
47
+ # output ruby source (using TRange) corresponding to scripts/blocks of Unicode
48
+ def self.get_ranges_of_properties(properties)
49
+ puts "\nGenerating Unicode table. It takes 1-2 minutes."
50
+ ranges = {}
51
+
52
+ # form whole letter to array, then join all letters
53
+ # (concatinating string cause performance problem)
54
+ whole_letters_array = []
55
+ 0.step(0x10ffff).each do | codepoint |
56
+ # skip surrogate part
57
+ next if (codepoint >= 0xd800 && codepoint <= 0xdfff)
58
+ whole_letters_array.push [codepoint].pack("U*")
59
+ end
60
+ whole_letters = whole_letters_array.join("")
61
+
62
+ # scan string generated for each class
63
+ properties.each do | prop_name, value |
64
+ whole_letters.scan(value[:reg]) do | matched |
65
+
66
+ value[:ranges].push (matched[0].unpack("U*")[0]..matched[-1].unpack("U*")[0])
67
+ end
68
+ # puts "#{prop_name}: #{value}"
69
+ ranges[prop_name] = value[:ranges]
70
+ end
71
+ ranges
72
+ end
73
+
74
+ # puts source to unicode.rb
75
+ def self.puts_unicode_ranges(unicode_file, ranges)
76
+ ranges_source = ranges.keys.map { |prop_name|
77
+ (" "*14) + "when \"#{prop_name}\"\n" +
78
+ (" "*16) + "([" +
79
+ ( ranges[prop_name].map{|range| "[#{range.begin}, #{range.end}]"}.join(", ") ) +
80
+ "])"
81
+ }.join("\n")
82
+
83
+ template =<<" END_OF_TEMPLATE"
84
+ # encoding: utf-8
85
+ # DO NOT Modify This File Since Automatically Generated
86
+
87
+ # Range of Unicode
88
+ class Regextest::Front::Unicode
89
+ # Generate hash of properties
90
+ def self.property(class_name)
91
+ case class_name.downcase
92
+ #{ranges_source}
93
+ else
94
+ raise "Internal error. Class name (#\{class_name\}) not found"
95
+ end
96
+ end
97
+
98
+ # enumerate char-set
99
+ def self.enumerate(class_name)
100
+ self.property(class_name).inject([]){|result,elem| result += (elem[0]..elem[1]).to_a}
101
+ end
102
+ end
103
+
104
+ # Test suite (execute when this file is specified in command line)
105
+ if __FILE__ == $0
106
+ end
107
+ END_OF_TEMPLATE
108
+ template.gsub!(/^ /, "")
109
+ File.open(unicode_file, "w") do |fp|
110
+ fp.puts template
111
+ end
112
+
113
+ end
114
+ end
115
+
116
+ RegextestPreUnicode.generate
@@ -0,0 +1,268 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ # This routine defines Regextest class
6
+ class Regextest; end
7
+
8
+ # Required classes
9
+ require 'regextest/version'
10
+ require 'regextest/common'
11
+ require 'regextest/front'
12
+ require 'regextest/regex-option'
13
+ require 'regextest/back'
14
+ require 'regextest/regexp'
15
+ require 'timeout'
16
+
17
+ class Regextest
18
+ include Regextest::Common
19
+
20
+ # exceptions
21
+ class RegextestError < RuntimeError; end
22
+ class RegextestFailedToGenerate < RuntimeError; end
23
+ class RegextestTimeout < RuntimeError; end
24
+
25
+ # Constructor of Regextest class
26
+ # @param [String|Regexp] regex regular expression object (or string)
27
+ # @param [Hash] options parameters for generating
28
+ # @option options [Regextest::RegexOption] :reg_options Regex option parameter
29
+ # @option options [Fixnum] :seed seed for randomization
30
+ # @option options [TrueClass] :verification specify true (or not speficy) to verify generated string using ruby Regexp.
31
+ # @option options [FalseClass] :verification specify false if skip to verify generated string.
32
+ # @return [Regextest] constructed object
33
+ def initialize(regex, options = {})
34
+ @@parse_options = options
35
+ @@parse_options[:reg_options] ||= Regextest::RegexOption.new
36
+ @verification = (options && options[:verification] == false)?false:true
37
+ @reg_string = nil
38
+ @reg_exp = nil
39
+
40
+ # Set seed for randomizing
41
+ @seed = set_seed_for_randomizing(@@parse_options[:seed])
42
+
43
+ # Covert to source string if necessary
44
+ set_regex(regex)
45
+
46
+ # Parse string
47
+ @front_end = Regextest::Front.new(@reg_string, @@parse_options)
48
+
49
+ # To json (use json format for backend)
50
+ @json_obj = @front_end.get_json_obj
51
+
52
+ # Prepare back-end process. (use generate method for generating string)
53
+ @back_end = Regextest::Back.new(@json_obj)
54
+
55
+ @result = nil
56
+ @reason = nil
57
+ end
58
+
59
+ # @!attribute [r] reason
60
+ # Reason if failed to generate
61
+ # @return [hash] return reasons if failed to generate
62
+ # @return [nil] return nil unless error
63
+ attr_reader :reason
64
+
65
+ # @!attribute [r] seed
66
+ # Seed for randomization
67
+ # @return [Fixnum] return seed for randomization
68
+ # @return [nil] return nil if no seed provided
69
+ attr_reader :seed
70
+
71
+ # Genetate string matched with specified regular expression
72
+ # @return [MatchData] if matched and verified.
73
+ # @return [String] if matched without verification (i.e. return unverified matched string).
74
+ # @return [nil] nil if failed to generate
75
+ # @raise [RuntimeError] if something wrong...
76
+ # @raise [Regextest::RegextestTimeout] if detected timeout while verification. Option 'verification: false' may be workaround.
77
+ def generate
78
+ TstConstRetryMax.times do
79
+
80
+ # generate string
81
+ reset_random_called
82
+ @result = @back_end.generate
83
+ if !@result
84
+ TstLog "NG: Failed to generate"
85
+ @reason = :failed_to_generate
86
+ if !is_random?
87
+ raise(RegextestError, "It is impossible to generate sample string of #{@reg_string}.")
88
+ end
89
+ next
90
+ end
91
+
92
+ result_string = @result.pre_match + @result.match + @result.post_match
93
+
94
+ # verify generated string
95
+ if @verification
96
+ @result = verify(result_string) # returns a match-object
97
+ if !@result
98
+ TstLog "NG: Failed to verify"
99
+ @reason = :failed_to_verify
100
+ next
101
+ end
102
+ # break if @result is verified
103
+ else
104
+ @result = result_string # returns a string
105
+ end
106
+ break
107
+ end
108
+
109
+ if !@result
110
+ raise(RegextestFailedToGenerate, "Regextest failed to generate sample string of #{@reg_string}.")
111
+ end
112
+ @result
113
+ end
114
+
115
+ # Get parsed result as JSON string
116
+ # @return [String] parsed result as JSON string
117
+ def to_json
118
+ @front_end.get_json_string
119
+ end
120
+ #---------------#
121
+ private
122
+
123
+ # Set seed for randomizing
124
+ def set_seed_for_randomizing(seed)
125
+ if seed
126
+ raise "Invalid seed (#{seed}: #{seed.class}) specified" if !(Integer === seed)
127
+ srand seed
128
+ seed
129
+ else
130
+ srand # return preset seed
131
+ end
132
+ end
133
+
134
+ # Covert to source string if necessary
135
+ def set_regex(param)
136
+ case param
137
+ when String
138
+ if md = param.match(/^\/(.*)\/([imx]*)$/)
139
+ @reg_exp = eval(param)
140
+ @reg_string = @reg_exp.source
141
+ else
142
+ new_param = check_builtin(param)
143
+ @reg_string = new_param
144
+ @reg_exp = /#{@reg_string}/
145
+ end
146
+ @@parse_options[:reg_source] = @reg_string
147
+ when Regexp
148
+ @reg_exp = param
149
+ @@parse_options[:reg_options].set(@reg_exp.options) # inner regex options have priorty
150
+ @reg_string = @reg_exp.source
151
+ else
152
+ raise "Error: string or regular expression required"
153
+ end
154
+ end
155
+
156
+ # add built-in functions if any
157
+ def check_builtin(param)
158
+ builtin_functions = {}
159
+ param.scan(/\\g[\<\'](_\w+_)[\>\']/) do | func_name |
160
+ builtin_functions[func_name[0]] = true
161
+ end
162
+ if builtin_functions.keys.size > 0
163
+ require 'regextest/front/builtin-functions'
164
+ functions = Regextest::Front::BuiltinFunctions.new
165
+ builtin_functions.keys.each do | func_name |
166
+ if func_string = functions.find_func(func_name)
167
+ param = param + func_string
168
+ else
169
+ raise "invalid built-in function name (#{func_name})"
170
+ end
171
+ end
172
+ end
173
+ param
174
+ end
175
+
176
+ # Verifies the result
177
+ def verify(result_string)
178
+ md = nil
179
+ begin
180
+ timeout(TstConstTimeout){
181
+ md = @reg_exp.match(result_string)
182
+ }
183
+ rescue Timeout::Error => ex
184
+ raise(RegextestTimeout,
185
+ "Timeout(#{TstConstTimeout} sec) detected while verifying string(#{result_string}) matched with regex(#{@reg_exp}).")
186
+ end
187
+
188
+ if(md)
189
+ # matched string sometime differs from expected one...
190
+ if(md.pre_match != @result.pre_match ||
191
+ md.to_a[0] != @result.match ||
192
+ md.post_match != @result.post_match)
193
+ @reason = :invalid_match_string
194
+ TstLog "WARN: Invalid matched string, expected <--> actual"
195
+ TstLog " proc: #{md.pre_match.inspect} <--> #{@result.pre_match.inspect}"
196
+ TstLog " body: #{md.to_a[0].inspect} <--> #{@result.match.inspect}"
197
+ TstLog " succ: #{md.post_match.inspect} <--> #{@result.post_match.inspect}"
198
+ end
199
+ else
200
+ @reason = { rc: :not_matched, string: result_string}
201
+ raise("failed to generate. Not matched regex(#{@reg_string}) string(#{result_string.inspect})")
202
+ end
203
+ md
204
+ end
205
+ end
206
+
207
+ # Test program
208
+ if __FILE__ == $0
209
+ # ruby regextest.rb 'regular-expression' => regular-expression
210
+ # ruby regextest.rb '[ab]' => a
211
+ include Regextest::Common
212
+
213
+ begin
214
+
215
+ regex = ARGV[0] || $<
216
+ if(regex == "reg")
217
+ regex = /ab # comment
218
+ [a-z]{5,10}
219
+ cd /ix
220
+ end
221
+ if(regex == "reg2")
222
+ regex = %r(
223
+ (?<name> [a-zA-Z_:]+ ){0}
224
+ (?<stag> < \g<name> > ){0}
225
+ (?<content> ||\w+|\w+|\w+ (\g<element> | \w+)* ){0}
226
+ (?<etag> </ \k<name+1> >){0}
227
+ (?<element> \g<stag> \g<content>* \g<etag> ){0}
228
+ \g<element>
229
+ )x
230
+ end
231
+
232
+ begin
233
+ if ARGV[1]
234
+ reg = eval "/#{regex}/#{ARGV[1]}"
235
+ else
236
+ reg = regex
237
+ end
238
+ rescue SyntaxError => ex
239
+ warn "Ruby Regexp: Syntax error: " + ex.message
240
+ reg = regex
241
+ end
242
+
243
+ prog = Regextest.new(reg)
244
+
245
+ 10.times do
246
+ if(md = prog.generate)
247
+ puts " " + TstMdPrint(md) # md.string.inspect
248
+ else
249
+ puts "Failed to generate regex(#{reg})"
250
+ end
251
+ end
252
+
253
+ rescue RegexpError => ex
254
+ $stderr.puts "Parse error. #{ex.message}"
255
+ exit(1)
256
+
257
+ rescue Regextest::RegextestTimeout => ex
258
+ $stderr.puts ex.message
259
+ exit(1)
260
+
261
+ rescue RuntimeError => ex
262
+ # Error process. put error message and exit
263
+ $stderr.puts ex.message
264
+ exit(1)
265
+ end
266
+
267
+ end
268
+