regexbuilder 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/regexbuilder.rb +1 -0
- data/lib/regexbuilder/regex_builder.rb +114 -0
- data/lib/regexbuilder/regex_util.rb +47 -0
- data/tests/regex_builder_test.rb +156 -0
- data/tests/regex_util_test.rb +22 -0
- metadata +51 -0
data/lib/regexbuilder.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Dir[File.join(File.dirname(__FILE__), 'regexbuilder/**/*.rb')].sort.each { |lib| require lib }
|
@@ -0,0 +1,114 @@
|
|
1
|
+
module RegexpBuilder
|
2
|
+
|
3
|
+
def pattern(pattern)
|
4
|
+
return /#{pattern}/
|
5
|
+
end
|
6
|
+
|
7
|
+
# Anchors
|
8
|
+
def start_with(pattern)
|
9
|
+
"\\A" + pattern
|
10
|
+
end
|
11
|
+
|
12
|
+
def end_with(pattern)
|
13
|
+
pattern + "\\z"
|
14
|
+
end
|
15
|
+
|
16
|
+
def end_with_if_ignore_last_new_line(pattern)
|
17
|
+
pattern + "\\Z"
|
18
|
+
end
|
19
|
+
|
20
|
+
def line_start_with(pattern)
|
21
|
+
"^" + pattern
|
22
|
+
end
|
23
|
+
|
24
|
+
def line_end_with(pattern)
|
25
|
+
pattern + "$"
|
26
|
+
end
|
27
|
+
|
28
|
+
def word_boundary(pattern)
|
29
|
+
"\\b" + pattern
|
30
|
+
end
|
31
|
+
|
32
|
+
def nonword_boundary(pattern)
|
33
|
+
"\\B" + pattern
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
# Character classes
|
38
|
+
def either(pattern)
|
39
|
+
"[" + pattern + "]"
|
40
|
+
end
|
41
|
+
|
42
|
+
def neither(pattern)
|
43
|
+
"[^" + pattern + "]"
|
44
|
+
end
|
45
|
+
|
46
|
+
def digit
|
47
|
+
"\\d"
|
48
|
+
end
|
49
|
+
|
50
|
+
def non_digit
|
51
|
+
"\\D"
|
52
|
+
end
|
53
|
+
|
54
|
+
def space
|
55
|
+
"\\s"
|
56
|
+
end
|
57
|
+
|
58
|
+
def non_space
|
59
|
+
"\\S"
|
60
|
+
end
|
61
|
+
|
62
|
+
def word_char
|
63
|
+
"\\w"
|
64
|
+
end
|
65
|
+
|
66
|
+
def non_word_char
|
67
|
+
"\\W"
|
68
|
+
end
|
69
|
+
|
70
|
+
def any
|
71
|
+
"."
|
72
|
+
end
|
73
|
+
|
74
|
+
def literal(pattern)
|
75
|
+
Regexp.escape(pattern)
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
# repeat
|
80
|
+
def repeat_any_times(pattern, mode=:greedy)
|
81
|
+
pattern + (mode == :non_greedy ? "*?" : "*")
|
82
|
+
end
|
83
|
+
|
84
|
+
def repeat(pattern, least, most=-1, mode=:greedy)
|
85
|
+
return pattern + "{" + least.to_s + "}" if most == -1
|
86
|
+
return pattern + "{" + least.to_s + "," + most.to_s + "}" if mode == :greedy
|
87
|
+
return pattern + "{" + least.to_s + "," + most.to_s + "}?"
|
88
|
+
end
|
89
|
+
|
90
|
+
def at_least_once(pattern, mode=:greedy)
|
91
|
+
pattern + (mode == :non_greedy ? "+?" : "+")
|
92
|
+
end
|
93
|
+
|
94
|
+
def at_most_once(pattern)
|
95
|
+
pattern + "?"
|
96
|
+
end
|
97
|
+
|
98
|
+
def at_least(times, pattern)
|
99
|
+
pattern + "{" + times.to_s + ",}"
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
# Alternation
|
104
|
+
def one_of(*patterns)
|
105
|
+
"#{patterns.join '|' }"
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
# group
|
110
|
+
def group(pattern)
|
111
|
+
"(" + pattern + ")"
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/regex_builder'
|
2
|
+
|
3
|
+
class RegexUtil
|
4
|
+
|
5
|
+
class IP
|
6
|
+
include RegexpBuilder
|
7
|
+
|
8
|
+
def less_than_255
|
9
|
+
_0_to_100 = repeat(digit(), 1, 2)
|
10
|
+
_100_to_200 = "1" + repeat(digit(), 2)
|
11
|
+
_200_to_250 = "2" + either("0-4") + either("0-9")
|
12
|
+
_250_to_255 = "25" + either("0-4")
|
13
|
+
one_of(_0_to_100, _100_to_200, _200_to_250, _250_to_255)
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
@ip_pattern = repeat(group(group(less_than_255()) + literal(".")), 3) + group(less_than_255())
|
18
|
+
end
|
19
|
+
|
20
|
+
attr_reader :ip_pattern
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
class Email
|
26
|
+
include RegexpBuilder
|
27
|
+
|
28
|
+
def before_at
|
29
|
+
at_least_once(word_boundary(either("a-z0-9.%+-")))
|
30
|
+
end
|
31
|
+
|
32
|
+
def after_at
|
33
|
+
at_least_once(either("a-z0-9.-")) + literal(".") + repeat(either("a-z"), 2, 4) + word_boundary("")
|
34
|
+
end
|
35
|
+
|
36
|
+
def initialize
|
37
|
+
@email_pattern = before_at() + "@" + after_at()
|
38
|
+
end
|
39
|
+
|
40
|
+
attr_reader :email_pattern
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
Email_Pattern = Regexp.new(Email.new.email_pattern, "i")
|
45
|
+
IP_Pattern = Regexp.new(IP.new.ip_pattern)
|
46
|
+
|
47
|
+
end
|
@@ -0,0 +1,156 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require File.dirname(__FILE__) + '/../lib/regexbuilder'
|
3
|
+
|
4
|
+
class RegexpBuilderTest < Test::Unit::TestCase
|
5
|
+
include RegexpBuilder
|
6
|
+
|
7
|
+
def initialize(name)
|
8
|
+
super
|
9
|
+
@ruby = "ruby"
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_ruby_start_with_r
|
13
|
+
assert_match(pattern(start_with("r")), @ruby)
|
14
|
+
assert_match(pattern(start_with("ru")), @ruby)
|
15
|
+
assert_match(pattern(start_with("rub")), @ruby)
|
16
|
+
assert_match(pattern(start_with(@ruby)), @ruby)
|
17
|
+
assert_no_match(pattern(start_with("u")), @ruby)
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_ruby_end_with_y
|
21
|
+
assert_match(pattern(end_with("y")), @ruby)
|
22
|
+
assert_match(pattern(end_with("by")), @ruby)
|
23
|
+
assert_match(pattern(end_with("uby")), @ruby)
|
24
|
+
assert_match(pattern(end_with(@ruby)), @ruby)
|
25
|
+
assert_no_match(pattern(end_with("b")), @ruby)
|
26
|
+
|
27
|
+
assert_match(pattern(end_with("\n")), @ruby + "\n")
|
28
|
+
assert_match(pattern(end_with_if_ignore_last_new_line("y")), @ruby + "\n")
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_line_begin
|
32
|
+
adidas = "impossible is\nnothing"
|
33
|
+
assert_match(pattern(line_start_with("impossible")), adidas);
|
34
|
+
assert_no_match(pattern(line_start_with("is")), adidas);
|
35
|
+
assert_match(pattern(line_start_with("nothing")), adidas);
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_line_end
|
39
|
+
adidas = "impossible is\nnothing"
|
40
|
+
assert_no_match(pattern(line_end_with("impossible")), adidas)
|
41
|
+
assert_match(pattern(line_end_with("is")), adidas)
|
42
|
+
assert_match(pattern(line_end_with("nothing")), adidas)
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_word_boundary
|
46
|
+
truth = "ruby is not python"
|
47
|
+
assert_match(pattern(word_boundary("is")), truth)
|
48
|
+
assert_match(pattern(word_boundary("not")), truth)
|
49
|
+
assert_no_match(pattern(word_boundary("by")), truth)
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_nonword_boundary
|
53
|
+
truth = "ruby step by step"
|
54
|
+
assert_match(pattern(word_boundary("by")), truth)
|
55
|
+
assert_match(pattern(nonword_boundary("by")), truth)
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_either
|
59
|
+
assert_match(pattern(either("abc")), "a")
|
60
|
+
assert_match(pattern(either("abc")), "b")
|
61
|
+
assert_match(pattern(either("abc")), "c")
|
62
|
+
assert_no_match(pattern(either("abc")), "A")
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_neither
|
66
|
+
assert_match(pattern(neither("abc")), "1")
|
67
|
+
assert_match(pattern(neither("abc")), "d")
|
68
|
+
assert_match(pattern(neither("abc")), "A")
|
69
|
+
assert_no_match(pattern(neither("abc")), "abc")
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_digit
|
73
|
+
assert_match(pattern(digit()), "2b")
|
74
|
+
assert_match(pattern(digit()), "F4")
|
75
|
+
assert_match(pattern(non_digit()), "PK")
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_space
|
79
|
+
assert_match(pattern(space()), " ")
|
80
|
+
assert_match(pattern(space()), "\n")
|
81
|
+
assert_match(pattern(space()), "\r")
|
82
|
+
assert_match(pattern(space()), "\t")
|
83
|
+
assert_match(pattern(non_space()), "sentence_without_spaces")
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_word_char
|
87
|
+
assert_match(pattern(word_char()), "_")
|
88
|
+
assert_match(pattern(word_char()), "123")
|
89
|
+
assert_match(pattern(word_char()), "haha")
|
90
|
+
assert_match(pattern(non_word_char()), "<>:;")
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_any
|
94
|
+
assert_match(pattern(any()), "!@#%^&*")
|
95
|
+
assert_match(pattern(any()), "1234567890")
|
96
|
+
assert_match(pattern(any()), "abcABC")
|
97
|
+
assert_match(pattern(any()), " \r\t\b")
|
98
|
+
assert_no_match(pattern(any()), "\n")
|
99
|
+
end
|
100
|
+
|
101
|
+
def test_literal_should_escape_special_characters
|
102
|
+
assert_match(pattern(literal(".|()[]{}+\\^$*?")), ".|()[]{}+\\^$*?")
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_repeat_any_times
|
106
|
+
assert_match(pattern(repeat_any_times(digit())), "abc")
|
107
|
+
assert_match(pattern(repeat_any_times(digit())), "1")
|
108
|
+
assert_match(pattern(repeat_any_times(digit())), "12")
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_at_least_once
|
112
|
+
assert_no_match(pattern(at_least_once(digit())), "abc")
|
113
|
+
assert_match(pattern(at_least_once(digit())), "1")
|
114
|
+
assert_match(pattern(at_least_once(digit())), "12")
|
115
|
+
end
|
116
|
+
|
117
|
+
def test_at_most_once
|
118
|
+
assert_match(pattern(at_most_once(digit())), "abc")
|
119
|
+
assert_match(pattern(at_most_once(digit())), "1")
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_repeat
|
123
|
+
assert_match(pattern(repeat(digit(), 3)), "123")
|
124
|
+
assert_no_match(pattern(repeat(digit(), 3)), "12abc")
|
125
|
+
|
126
|
+
assert_match(pattern(repeat(digit(), 3, 5)), "1234")
|
127
|
+
assert_no_match(pattern(repeat(digit(), 4, 5)), "12abc")
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_at_least_m
|
131
|
+
assert_match(pattern(at_least(3, digit())), "123")
|
132
|
+
assert_match(pattern(at_least(3, digit())), "1234")
|
133
|
+
assert_no_match(pattern(at_least(3, digit())), "12abc")
|
134
|
+
end
|
135
|
+
|
136
|
+
def test_non_greedy
|
137
|
+
assert "123" =~ pattern(at_least_once(digit()))
|
138
|
+
assert_equal("123", $&)
|
139
|
+
|
140
|
+
assert "123" =~ pattern(at_least_once(digit(), :non_greedy))
|
141
|
+
assert_equal("1", $&)
|
142
|
+
end
|
143
|
+
|
144
|
+
def test_one_of
|
145
|
+
assert_match(pattern(one_of("ruby", "python")), "the ruby language")
|
146
|
+
assert_match(pattern(one_of("ruby", "python")), "the python language")
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_group
|
150
|
+
assert "banana" =~ pattern(at_least_once("an"))
|
151
|
+
assert_equal("an", $&)
|
152
|
+
|
153
|
+
assert "banana" =~ pattern(at_least_once(group("an")))
|
154
|
+
assert_equal("anan", $&)
|
155
|
+
end
|
156
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require File.dirname(__FILE__) + '/../lib/regexbuilder'
|
3
|
+
|
4
|
+
class RegexUtilTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_192_168_0_3_is_a_valid_ip_address
|
7
|
+
assert "192.168.0.3".match(RegexUtil::IP_Pattern)
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_500_312_0_3_is_not_a_valid_ip_address
|
11
|
+
assert_nil( "500.312.0.3".match(RegexUtil::IP_Pattern) )
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_chelsea_at_gmail_dot_com_is_a_valid_email_address
|
15
|
+
assert "Chelsea@gmail.com".match(RegexUtil::Email_Pattern)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_www_dot_google_dot_com_is_not_a_valid_email_address
|
19
|
+
assert_nil( "www.google.com".match(RegexUtil::Email_Pattern) )
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
metadata
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.2
|
3
|
+
specification_version: 1
|
4
|
+
name: regexbuilder
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.0.0
|
7
|
+
date: 2007-12-12 00:00:00 +08:00
|
8
|
+
summary: An online Diary for families
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: chelsea@mail.csdn.net
|
12
|
+
homepage: http://regexbuilder.rubyforge.org/
|
13
|
+
rubyforge_project:
|
14
|
+
description:
|
15
|
+
autorequire: regexbuilder
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: false
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Guanglei Li
|
31
|
+
files:
|
32
|
+
- tests/regex_builder_test.rb
|
33
|
+
- tests/regex_util_test.rb
|
34
|
+
- lib/regexbuilder
|
35
|
+
- lib/regexbuilder/regex_builder.rb
|
36
|
+
- lib/regexbuilder/regex_util.rb
|
37
|
+
- lib/regexbuilder.rb
|
38
|
+
test_files:
|
39
|
+
- tests/regex_builder_test.rb
|
40
|
+
rdoc_options: []
|
41
|
+
|
42
|
+
extra_rdoc_files: []
|
43
|
+
|
44
|
+
executables: []
|
45
|
+
|
46
|
+
extensions: []
|
47
|
+
|
48
|
+
requirements: []
|
49
|
+
|
50
|
+
dependencies: []
|
51
|
+
|