regex 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ name : regex
2
+ date : 2010-10-10
3
+ version : 1.1.0
4
+
5
+ requires:
6
+ - syckle (build)
7
+ - qed (test)
@@ -0,0 +1,221 @@
1
+ require 'stringio'
2
+
3
+ module Regex
4
+
5
+ #
6
+ class Replacer
7
+
8
+ # Array of [search, replace] rules.
9
+ attr_reader :rules
10
+
11
+ # Make all patterns exact string matchers.
12
+ attr_accessor :escape
13
+
14
+ # Make all patterns global matchers.
15
+ attr_accessor :global
16
+
17
+ # Make all patterns case-insenstive matchers.
18
+ attr_accessor :insensitive
19
+
20
+ # Make all patterns multi-line matchers.
21
+ attr_accessor :multiline
22
+
23
+ # Make backups of files when they change.
24
+ attr_accessor :backup
25
+
26
+ #
27
+ def initialize(options={})
28
+ @rules = []
29
+ options.each do |k,v|
30
+ __send__("#{k}=", v)
31
+ end
32
+ end
33
+
34
+ #
35
+ def rule(pattern, replacement)
36
+ @rules << [re(pattern), replacement]
37
+ end
38
+
39
+ #
40
+ def apply(*ios)
41
+ ios.each do |io|
42
+ original = (IO === io || StringIO === io ? io.read : io.to_s)
43
+ generate = original
44
+ rules.each do |(pattern, replacement)|
45
+ if pattern.global
46
+ generate = generate.gsub(pattern.to_re, replacement)
47
+ else
48
+ generate = generate.sub(pattern.to_re, replacement)
49
+ end
50
+ end
51
+ if original != generate
52
+ write(io, generate)
53
+ end
54
+ end
55
+ end
56
+
57
+ private
58
+
59
+ # Parse pattern matcher.
60
+ def re(pattern)
61
+ Matcher.new(
62
+ pattern,
63
+ :global=>global,
64
+ :escape=>escape,
65
+ :multiline=>multiline,
66
+ :insensitive=>insensitive
67
+ )
68
+ end
69
+
70
+ #
71
+ def write(io, text)
72
+ case io
73
+ when File
74
+ if backup
75
+ backup_file = io.path + '.bak'
76
+ File.open(backup_file, 'w'){ |f| f << File.read(io.path) }
77
+ end
78
+ File.open(io.path, 'w'){ |w| w << text }
79
+ when StringIO
80
+ io.string = text
81
+ when IO
82
+ # TODO: How to handle general IO object?
83
+ io.write(text)
84
+ else
85
+ io.replace(text)
86
+ end
87
+ end
88
+
89
+ #
90
+ def self.cli(argv=ARGV)
91
+ searches = []
92
+ replaces = []
93
+ options = {}
94
+ parser = OptionParser.new do |opt|
95
+ opt.on('--subtitute', '-s PATTERN', 'search portion of substitution') do |search|
96
+ searches << search
97
+ end
98
+ opt.on('--template', '-t NAME', 'search for built-in regular expression') do |name|
99
+ searches << "$#{name}"
100
+ end
101
+ opt.on('--replace', '-r STRING', 'replacement string of substitution') do |replace|
102
+ replaces << replace
103
+ end
104
+ opt.on('--escape', '-e', 'make all patterns exact string matchers') do
105
+ options[:escape] = true
106
+ end
107
+ opt.on('--insensitive', '-i', 'make all patterns case-insensitive matchers') do
108
+ options[:insensitive] = true
109
+ end
110
+ #opt.on('--unxml', '-x', 'ignore XML/HTML tags') do
111
+ # options[:unxml] = true
112
+ #end
113
+ opt.on('--global', '-g', 'make all patterns global matchers') do
114
+ options[:global] = true
115
+ end
116
+ opt.on('--multiline', '-m', 'make all patterns multi-line matchers') do
117
+ options[:multiline] = true
118
+ end
119
+ opt.on('-b', '--backup', 'backup any files that are changed') do
120
+ options[:backup] = true
121
+ end
122
+ opt.on_tail('--debug', 'run in debug mode') do
123
+ $DEBUG = true
124
+ end
125
+ opt.on_tail('--help', '-h', 'display this lovely help message') do
126
+ puts opt
127
+ exit 0
128
+ end
129
+ end
130
+ parser.parse!(argv)
131
+
132
+ files = argv
133
+ files.each do |file|
134
+ raise "file does not exist -- #{file}" unless File.exist?(file)
135
+ end
136
+ targets = files.empty? ? [ARGF] : files.map{ |f| File.new(f) }
137
+
138
+ unless searches.size == replaces.size
139
+ raise "search replace mismatch -- #{searches.size} to #{replaces.size}"
140
+ end
141
+ rules = searches.zip(replaces)
142
+
143
+ replacer = new(options)
144
+ rules.each do |search, replace|
145
+ replacer.rule(search, replace)
146
+ end
147
+ replacer.apply(*targets)
148
+ end
149
+
150
+ # Basically a Regex but handles a couple extra options.
151
+ class Matcher
152
+
153
+ #
154
+ attr_accessor :global
155
+
156
+ #
157
+ attr_accessor :escape
158
+
159
+ #
160
+ attr_accessor :multiline
161
+
162
+ #
163
+ attr_accessor :insensitive
164
+
165
+ #
166
+ def initialize(pattern, options={})
167
+ options.each do |k,v|
168
+ __send__("#{k}=", v) if respond_to?("#{k}=")
169
+ end
170
+ @regexp = parse(pattern)
171
+ end
172
+
173
+ #
174
+ def =~(string)
175
+ @regexp =~ string
176
+ end
177
+
178
+ #
179
+ def match(string)
180
+ @regexp.match(string)
181
+ end
182
+
183
+ #
184
+ def to_re
185
+ @regexp
186
+ end
187
+
188
+ # Parse pattern matcher.
189
+ def parse(pattern)
190
+ case pattern
191
+ when Regexp
192
+ pattern
193
+ when /^\$/
194
+ Templates.const_get($'.upcase)
195
+ when /^\/(.*?)\/(\w+)$/
196
+ flags = []
197
+ @global = true if $2.index('g')
198
+ flags << Regexp::MULTILINE if $2.index('m') or multiline
199
+ flags << Regexp::IGNORECASE if $2.index('i') or insensitive
200
+ if $2.index('e') or escape
201
+ Regexp.new(Regexp.escape($1), *flags)
202
+ else
203
+ Regexp.new($1, *flags)
204
+ end
205
+ else
206
+ flags = []
207
+ flags << Regexp::MULTILINE if multiline
208
+ flags << Regexp::IGNORECASE if insensitive
209
+ if escape
210
+ Regexp.new(Regexp.escape(pattern), *flags)
211
+ else
212
+ Regexp.new(pattern, *flags)
213
+ end
214
+ end
215
+ end
216
+
217
+ end
218
+
219
+ end
220
+
221
+ end
@@ -1,4 +1,4 @@
1
- class Regex
1
+ module Regex
2
2
 
3
3
  # Extensions for String class.
4
4
  # These methods are taken directly from Ruby Facets.
@@ -0,0 +1,85 @@
1
+ module Regex
2
+
3
+ # = Templates
4
+ #
5
+ # TODO: What about regular expressions with variable content?
6
+ # Should these be methods rather than constants? But then how
7
+ # would we handle named substituions?
8
+ module Templates
9
+
10
+ # Empty line.
11
+ EMPTY = /^$/
12
+
13
+ # Blank line.
14
+ BLANK = /^\s*$/
15
+
16
+ NUMBER = /[-+]?[0-9]*\.?[0-9]+/
17
+
18
+ # Markup language tag, e.g \<a>stuff</a>.
19
+ MLTAG = /<([A-Z][A-Z0-9]*)\b[^>]*>(.*?)<\/\1>/i
20
+
21
+ # IPv4 Address
22
+ IPV4 = /\b(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/
23
+
24
+ # Username
25
+ USERNAME = /^[a-zA-Z0-9_]{3,16}$/
26
+
27
+ # Email Address
28
+ EMAIL = /([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)/i
29
+
30
+ # United States phone number.
31
+ USPHONE = /(\d\d\d[-]|\(\d\d\d\))?(\d\d\d)[-](\d\d\d\d)/
32
+
33
+ # United States zip code.
34
+ USZIP = /^[0-9]{5}(-[0-9]{4})?$/
35
+
36
+ # United States social secuirty number.
37
+ SSN = /[0-9]\{3\}-[0-9]\{2\}-[0-9]\{4\}/
38
+
39
+ # United States dollar amount.
40
+ DOLLARS = /\$[0-9]*.[0-9][0-9]/
41
+
42
+ # Bank Ientification Code
43
+ BIC = /([a-zA-Z]{4}[a-zA-Z]{2}[a-zA-Z0-9]{2}([a-zA-Z0-9]{3})?)/
44
+
45
+ #
46
+ IBAN = /[a-zA-Z]{2}[0-9]{2}[a-zA-Z0-9]{4}[0-9]{7}([a-zA-Z0-9]?){0,16}/
47
+
48
+ # Hexidecimal value.
49
+ HEX = /(#([0-9A-Fa-f]{3}|[0-9A-Fa-f]{6})\b)/
50
+
51
+ # HTTP URL Address
52
+ HTTP = /^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \?=.-]*)*\/?$/
53
+
54
+ # Ruby comment block.
55
+ RUBYBLOCK = /^=begin\s*(.*?)\n(.*?)\n=end/m
56
+
57
+ # Ruby method definition.
58
+ # TODO: Not quite right.
59
+ RUBYMETHOD_WITH_COMMENT = /(^\ *\#.*?)^\s*def\s*(.*?)$/m
60
+
61
+ #
62
+ RUBYMETHOD = /^\ *def\s*(.*?)$/
63
+
64
+ # By the legendary abigail. Fails to match if and only if it is matched against
65
+ # a prime number of 1's. That is, '11' fails, but '1111' does not.
66
+ # I once heard him talk why this works, but I forgot most of it.
67
+ PRIMEONES = /^1?$|^(11+?)\1+$/
68
+
69
+ # Name of all constants.
70
+ def self.list
71
+ constants.map{ |c| c.downcase }
72
+ end
73
+
74
+ # Lookup a template by name.
75
+ def self.[](name)
76
+ Templates.const_get(name.upcase)
77
+ end
78
+
79
+ end
80
+
81
+ # Add templates to Regex module.
82
+ include Templates
83
+
84
+ end
85
+
@@ -19,26 +19,26 @@ We will also suppoly a matching pattern, as none of
19
19
  the matching functions will work without providing
20
20
  a pattern or the name of built-in pattern template.
21
21
 
22
- regex = Regex.new(text, :pattern=>'\w+')
22
+ rx = Regex.new(text, :pattern=>'\w+')
23
23
 
24
24
  We can see that the Regex object has converted the pattern
25
25
  into the expected regular expression via the #regex method.
26
26
 
27
- regex.regex.assert == /\w+/m
27
+ rx.regex.assert == /\w+/
28
28
 
29
29
  Under the hood, Regex has split the process of matching,
30
30
  organizing and formating the results into separate methods.
31
31
  We can use the #structure method to see thematch results
32
32
  organized into uniform arrays.
33
33
 
34
- regex.structure.assert == %w{We}
34
+ rx.structure.assert == %w{We}
35
35
 
36
36
  Whereas the last use only returns a single metch, if we turn
37
37
  on repeat mode we can see every word.
38
38
 
39
- regex.repeat = true
39
+ rx.repeat = true
40
40
 
41
- regex.structure.assert == %w{We will match against this string}.map{ |e| [e] }
41
+ rx.structure.assert == %w{We will match against this string}.map{ |e| [e] }
42
42
 
43
43
  Notice that repeat mode creates an array in an array.
44
44
 
@@ -0,0 +1,57 @@
1
+ = Search and Replace
2
+
3
+ Regex can also be used to do search and replace across multiple
4
+ strings or IO objects, includeing files.
5
+
6
+ require 'regex'
7
+
8
+ To perform search and replace procedure we create a Regex::Replacer object.
9
+ The constructor method takes a Hash of options which set universal parameters
10
+ to apply to all search and replace rules. Usually, each individual rule
11
+ will specify it's own options, so for this example we provide none.
12
+
13
+ replacer = Regex::Replacer.new
14
+
15
+ Rules are added via the #rule method.
16
+
17
+ replacer.rule('World', 'Planet Earth')
18
+ replacer.rule('!', '!!!')
19
+
20
+ Rules are applied in the order they were defined. If there rules overlap
21
+ in their effects this can be signifficant.
22
+
23
+ Now, lets say we have that famous String,
24
+
25
+ string = "Hello, World!"
26
+
27
+ We use the #apply method to actually perform the substitutions.
28
+
29
+ replacer.apply(string)
30
+
31
+ The replacements occur in place. Since in this case we are performing
32
+ the serach and replace on a String object, we can see the change
33
+ has taken place.
34
+
35
+ string.assert == "Hello, Planet Earth!!!"
36
+
37
+ As we mentioned at the beginning, substitutions can be applied to IO
38
+ objects in general, so long as they they can be reopended for writing.
39
+
40
+ require 'stringio'
41
+
42
+ io = StringIO.new("Hello, World!")
43
+
44
+ replacer.apply(io)
45
+
46
+ io.read.assert == "Hello, Planet Earth!!!"
47
+
48
+ If +io+ were a File object, rather than a StringIO, the file would
49
+ be changed on disk. As a precaution a backup file can be written
50
+ with then name of file plus a '.bak' extension in the same directory as
51
+ the file. To turn on the backup option, either supply it as an option
52
+ to the constructor, or set it via the writer method.
53
+
54
+ replacer.backup = true
55
+
56
+ (TODO: Example of a file search and replace.)
57
+
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regex
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 19
4
5
  prerelease: false
5
6
  segments:
6
7
  - 1
8
+ - 1
7
9
  - 0
8
- - 0
9
- version: 1.0.0
10
+ version: 1.1.0
10
11
  platform: ruby
11
12
  authors:
12
13
  - Thomas Sawyer
@@ -15,12 +16,39 @@ autorequire:
15
16
  bindir: bin
16
17
  cert_chain: []
17
18
 
18
- date: 2010-02-12 00:00:00 -05:00
19
+ date: 2010-10-12 00:00:00 -04:00
19
20
  default_executable:
20
- dependencies: []
21
-
22
- description: Regex is simple commmandline Regular Expression tool.
23
- email:
21
+ dependencies:
22
+ - !ruby/object:Gem::Dependency
23
+ name: syckle
24
+ prerelease: false
25
+ requirement: &id001 !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ">="
29
+ - !ruby/object:Gem::Version
30
+ hash: 3
31
+ segments:
32
+ - 0
33
+ version: "0"
34
+ type: :development
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: qed
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 3
45
+ segments:
46
+ - 0
47
+ version: "0"
48
+ type: :development
49
+ version_requirements: *id002
50
+ description: Regex is a simple commmandline Regular Expression tool, that makes easy to search documents for content matches.
51
+ email: ""
24
52
  executables:
25
53
  - regex
26
54
  extensions: []
@@ -28,32 +56,25 @@ extensions: []
28
56
  extra_rdoc_files:
29
57
  - README
30
58
  files:
31
- - HISTORY
32
- - LICENSE
33
- - MANIFEST
34
- - README
35
59
  - bin/regex
36
- - lib/regex.rb
37
60
  - lib/regex/command.rb
38
61
  - lib/regex/extractor.rb
62
+ - lib/regex/package.yml
63
+ - lib/regex/replacer.rb
39
64
  - lib/regex/string.rb
40
- - lib/regex/templates/common.rb
41
- - meta/authors
42
- - meta/created
43
- - meta/description
44
- - meta/download
45
- - meta/homepage
46
- - meta/mailinglist
47
- - meta/name
48
- - meta/repository
49
- - meta/summary
50
- - meta/title
51
- - meta/version
52
- - test/demos/regex.rdoc
65
+ - lib/regex/templates.rb
66
+ - lib/regex.rb
67
+ - qed/regex.rdoc
68
+ - qed/replacer.rdoc
69
+ - PROFILE
70
+ - PACKAGE
71
+ - LICENSE
72
+ - README
73
+ - HISTORY
53
74
  has_rdoc: true
54
75
  homepage: http://proutils.github.com/regex
55
- licenses: []
56
-
76
+ licenses:
77
+ - ""
57
78
  post_install_message:
58
79
  rdoc_options:
59
80
  - --title
@@ -63,25 +84,29 @@ rdoc_options:
63
84
  require_paths:
64
85
  - lib
65
86
  required_ruby_version: !ruby/object:Gem::Requirement
87
+ none: false
66
88
  requirements:
67
89
  - - ">="
68
90
  - !ruby/object:Gem::Version
91
+ hash: 3
69
92
  segments:
70
93
  - 0
71
94
  version: "0"
72
95
  required_rubygems_version: !ruby/object:Gem::Requirement
96
+ none: false
73
97
  requirements:
74
98
  - - ">="
75
99
  - !ruby/object:Gem::Version
100
+ hash: 3
76
101
  segments:
77
102
  - 0
78
103
  version: "0"
79
104
  requirements: []
80
105
 
81
106
  rubyforge_project: regex
82
- rubygems_version: 1.3.6.pre.3
107
+ rubygems_version: 1.3.7
83
108
  signing_key:
84
109
  specification_version: 3
85
- summary: Regex is simple commmandline Regular Expression tool.
110
+ summary: Regex is a simple commmandline Regular Expression tool.
86
111
  test_files: []
87
112