regex 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ name : regex
2
+ date : 2010-10-10
3
+ version : 1.1.0
4
+
5
+ requires:
6
+ - syckle (build)
7
+ - qed (test)
@@ -0,0 +1,221 @@
1
+ require 'stringio'
2
+
3
+ module Regex
4
+
5
+ #
6
+ class Replacer
7
+
8
+ # Array of [search, replace] rules.
9
+ attr_reader :rules
10
+
11
+ # Make all patterns exact string matchers.
12
+ attr_accessor :escape
13
+
14
+ # Make all patterns global matchers.
15
+ attr_accessor :global
16
+
17
+ # Make all patterns case-insenstive matchers.
18
+ attr_accessor :insensitive
19
+
20
+ # Make all patterns multi-line matchers.
21
+ attr_accessor :multiline
22
+
23
+ # Make backups of files when they change.
24
+ attr_accessor :backup
25
+
26
+ #
27
+ def initialize(options={})
28
+ @rules = []
29
+ options.each do |k,v|
30
+ __send__("#{k}=", v)
31
+ end
32
+ end
33
+
34
+ #
35
+ def rule(pattern, replacement)
36
+ @rules << [re(pattern), replacement]
37
+ end
38
+
39
+ #
40
+ def apply(*ios)
41
+ ios.each do |io|
42
+ original = (IO === io || StringIO === io ? io.read : io.to_s)
43
+ generate = original
44
+ rules.each do |(pattern, replacement)|
45
+ if pattern.global
46
+ generate = generate.gsub(pattern.to_re, replacement)
47
+ else
48
+ generate = generate.sub(pattern.to_re, replacement)
49
+ end
50
+ end
51
+ if original != generate
52
+ write(io, generate)
53
+ end
54
+ end
55
+ end
56
+
57
+ private
58
+
59
+ # Parse pattern matcher.
60
+ def re(pattern)
61
+ Matcher.new(
62
+ pattern,
63
+ :global=>global,
64
+ :escape=>escape,
65
+ :multiline=>multiline,
66
+ :insensitive=>insensitive
67
+ )
68
+ end
69
+
70
+ #
71
+ def write(io, text)
72
+ case io
73
+ when File
74
+ if backup
75
+ backup_file = io.path + '.bak'
76
+ File.open(backup_file, 'w'){ |f| f << File.read(io.path) }
77
+ end
78
+ File.open(io.path, 'w'){ |w| w << text }
79
+ when StringIO
80
+ io.string = text
81
+ when IO
82
+ # TODO: How to handle general IO object?
83
+ io.write(text)
84
+ else
85
+ io.replace(text)
86
+ end
87
+ end
88
+
89
+ #
90
+ def self.cli(argv=ARGV)
91
+ searches = []
92
+ replaces = []
93
+ options = {}
94
+ parser = OptionParser.new do |opt|
95
+ opt.on('--subtitute', '-s PATTERN', 'search portion of substitution') do |search|
96
+ searches << search
97
+ end
98
+ opt.on('--template', '-t NAME', 'search for built-in regular expression') do |name|
99
+ searches << "$#{name}"
100
+ end
101
+ opt.on('--replace', '-r STRING', 'replacement string of substitution') do |replace|
102
+ replaces << replace
103
+ end
104
+ opt.on('--escape', '-e', 'make all patterns exact string matchers') do
105
+ options[:escape] = true
106
+ end
107
+ opt.on('--insensitive', '-i', 'make all patterns case-insensitive matchers') do
108
+ options[:insensitive] = true
109
+ end
110
+ #opt.on('--unxml', '-x', 'ignore XML/HTML tags') do
111
+ # options[:unxml] = true
112
+ #end
113
+ opt.on('--global', '-g', 'make all patterns global matchers') do
114
+ options[:global] = true
115
+ end
116
+ opt.on('--multiline', '-m', 'make all patterns multi-line matchers') do
117
+ options[:multiline] = true
118
+ end
119
+ opt.on('-b', '--backup', 'backup any files that are changed') do
120
+ options[:backup] = true
121
+ end
122
+ opt.on_tail('--debug', 'run in debug mode') do
123
+ $DEBUG = true
124
+ end
125
+ opt.on_tail('--help', '-h', 'display this lovely help message') do
126
+ puts opt
127
+ exit 0
128
+ end
129
+ end
130
+ parser.parse!(argv)
131
+
132
+ files = argv
133
+ files.each do |file|
134
+ raise "file does not exist -- #{file}" unless File.exist?(file)
135
+ end
136
+ targets = files.empty? ? [ARGF] : files.map{ |f| File.new(f) }
137
+
138
+ unless searches.size == replaces.size
139
+ raise "search replace mismatch -- #{searches.size} to #{replaces.size}"
140
+ end
141
+ rules = searches.zip(replaces)
142
+
143
+ replacer = new(options)
144
+ rules.each do |search, replace|
145
+ replacer.rule(search, replace)
146
+ end
147
+ replacer.apply(*targets)
148
+ end
149
+
150
+ # Basically a Regex but handles a couple extra options.
151
+ class Matcher
152
+
153
+ #
154
+ attr_accessor :global
155
+
156
+ #
157
+ attr_accessor :escape
158
+
159
+ #
160
+ attr_accessor :multiline
161
+
162
+ #
163
+ attr_accessor :insensitive
164
+
165
+ #
166
+ def initialize(pattern, options={})
167
+ options.each do |k,v|
168
+ __send__("#{k}=", v) if respond_to?("#{k}=")
169
+ end
170
+ @regexp = parse(pattern)
171
+ end
172
+
173
+ #
174
+ def =~(string)
175
+ @regexp =~ string
176
+ end
177
+
178
+ #
179
+ def match(string)
180
+ @regexp.match(string)
181
+ end
182
+
183
+ #
184
+ def to_re
185
+ @regexp
186
+ end
187
+
188
+ # Parse pattern matcher.
189
+ def parse(pattern)
190
+ case pattern
191
+ when Regexp
192
+ pattern
193
+ when /^\$/
194
+ Templates.const_get($'.upcase)
195
+ when /^\/(.*?)\/(\w+)$/
196
+ flags = []
197
+ @global = true if $2.index('g')
198
+ flags << Regexp::MULTILINE if $2.index('m') or multiline
199
+ flags << Regexp::IGNORECASE if $2.index('i') or insensitive
200
+ if $2.index('e') or escape
201
+ Regexp.new(Regexp.escape($1), *flags)
202
+ else
203
+ Regexp.new($1, *flags)
204
+ end
205
+ else
206
+ flags = []
207
+ flags << Regexp::MULTILINE if multiline
208
+ flags << Regexp::IGNORECASE if insensitive
209
+ if escape
210
+ Regexp.new(Regexp.escape(pattern), *flags)
211
+ else
212
+ Regexp.new(pattern, *flags)
213
+ end
214
+ end
215
+ end
216
+
217
+ end
218
+
219
+ end
220
+
221
+ end
@@ -1,4 +1,4 @@
1
- class Regex
1
+ module Regex
2
2
 
3
3
  # Extensions for String class.
4
4
  # These methods are taken directly from Ruby Facets.
@@ -0,0 +1,85 @@
1
+ module Regex
2
+
3
+ # = Templates
4
+ #
5
+ # TODO: What about regular expressions with variable content?
6
+ # Should these be methods rather than constants? But then how
7
+ # would we handle named substituions?
8
+ module Templates
9
+
10
+ # Empty line.
11
+ EMPTY = /^$/
12
+
13
+ # Blank line.
14
+ BLANK = /^\s*$/
15
+
16
+ NUMBER = /[-+]?[0-9]*\.?[0-9]+/
17
+
18
+ # Markup language tag, e.g \<a>stuff</a>.
19
+ MLTAG = /<([A-Z][A-Z0-9]*)\b[^>]*>(.*?)<\/\1>/i
20
+
21
+ # IPv4 Address
22
+ IPV4 = /\b(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/
23
+
24
+ # Username
25
+ USERNAME = /^[a-zA-Z0-9_]{3,16}$/
26
+
27
+ # Email Address
28
+ EMAIL = /([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)/i
29
+
30
+ # United States phone number.
31
+ USPHONE = /(\d\d\d[-]|\(\d\d\d\))?(\d\d\d)[-](\d\d\d\d)/
32
+
33
+ # United States zip code.
34
+ USZIP = /^[0-9]{5}(-[0-9]{4})?$/
35
+
36
+ # United States social secuirty number.
37
+ SSN = /[0-9]\{3\}-[0-9]\{2\}-[0-9]\{4\}/
38
+
39
+ # United States dollar amount.
40
+ DOLLARS = /\$[0-9]*.[0-9][0-9]/
41
+
42
+ # Bank Ientification Code
43
+ BIC = /([a-zA-Z]{4}[a-zA-Z]{2}[a-zA-Z0-9]{2}([a-zA-Z0-9]{3})?)/
44
+
45
+ #
46
+ IBAN = /[a-zA-Z]{2}[0-9]{2}[a-zA-Z0-9]{4}[0-9]{7}([a-zA-Z0-9]?){0,16}/
47
+
48
+ # Hexidecimal value.
49
+ HEX = /(#([0-9A-Fa-f]{3}|[0-9A-Fa-f]{6})\b)/
50
+
51
+ # HTTP URL Address
52
+ HTTP = /^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \?=.-]*)*\/?$/
53
+
54
+ # Ruby comment block.
55
+ RUBYBLOCK = /^=begin\s*(.*?)\n(.*?)\n=end/m
56
+
57
+ # Ruby method definition.
58
+ # TODO: Not quite right.
59
+ RUBYMETHOD_WITH_COMMENT = /(^\ *\#.*?)^\s*def\s*(.*?)$/m
60
+
61
+ #
62
+ RUBYMETHOD = /^\ *def\s*(.*?)$/
63
+
64
+ # By the legendary abigail. Fails to match if and only if it is matched against
65
+ # a prime number of 1's. That is, '11' fails, but '1111' does not.
66
+ # I once heard him talk why this works, but I forgot most of it.
67
+ PRIMEONES = /^1?$|^(11+?)\1+$/
68
+
69
+ # Name of all constants.
70
+ def self.list
71
+ constants.map{ |c| c.downcase }
72
+ end
73
+
74
+ # Lookup a template by name.
75
+ def self.[](name)
76
+ Templates.const_get(name.upcase)
77
+ end
78
+
79
+ end
80
+
81
+ # Add templates to Regex module.
82
+ include Templates
83
+
84
+ end
85
+
@@ -19,26 +19,26 @@ We will also suppoly a matching pattern, as none of
19
19
  the matching functions will work without providing
20
20
  a pattern or the name of built-in pattern template.
21
21
 
22
- regex = Regex.new(text, :pattern=>'\w+')
22
+ rx = Regex.new(text, :pattern=>'\w+')
23
23
 
24
24
  We can see that the Regex object has converted the pattern
25
25
  into the expected regular expression via the #regex method.
26
26
 
27
- regex.regex.assert == /\w+/m
27
+ rx.regex.assert == /\w+/
28
28
 
29
29
  Under the hood, Regex has split the process of matching,
30
30
  organizing and formating the results into separate methods.
31
31
  We can use the #structure method to see thematch results
32
32
  organized into uniform arrays.
33
33
 
34
- regex.structure.assert == %w{We}
34
+ rx.structure.assert == %w{We}
35
35
 
36
36
  Whereas the last use only returns a single metch, if we turn
37
37
  on repeat mode we can see every word.
38
38
 
39
- regex.repeat = true
39
+ rx.repeat = true
40
40
 
41
- regex.structure.assert == %w{We will match against this string}.map{ |e| [e] }
41
+ rx.structure.assert == %w{We will match against this string}.map{ |e| [e] }
42
42
 
43
43
  Notice that repeat mode creates an array in an array.
44
44
 
@@ -0,0 +1,57 @@
1
+ = Search and Replace
2
+
3
+ Regex can also be used to do search and replace across multiple
4
+ strings or IO objects, includeing files.
5
+
6
+ require 'regex'
7
+
8
+ To perform search and replace procedure we create a Regex::Replacer object.
9
+ The constructor method takes a Hash of options which set universal parameters
10
+ to apply to all search and replace rules. Usually, each individual rule
11
+ will specify it's own options, so for this example we provide none.
12
+
13
+ replacer = Regex::Replacer.new
14
+
15
+ Rules are added via the #rule method.
16
+
17
+ replacer.rule('World', 'Planet Earth')
18
+ replacer.rule('!', '!!!')
19
+
20
+ Rules are applied in the order they were defined. If there rules overlap
21
+ in their effects this can be signifficant.
22
+
23
+ Now, lets say we have that famous String,
24
+
25
+ string = "Hello, World!"
26
+
27
+ We use the #apply method to actually perform the substitutions.
28
+
29
+ replacer.apply(string)
30
+
31
+ The replacements occur in place. Since in this case we are performing
32
+ the serach and replace on a String object, we can see the change
33
+ has taken place.
34
+
35
+ string.assert == "Hello, Planet Earth!!!"
36
+
37
+ As we mentioned at the beginning, substitutions can be applied to IO
38
+ objects in general, so long as they they can be reopended for writing.
39
+
40
+ require 'stringio'
41
+
42
+ io = StringIO.new("Hello, World!")
43
+
44
+ replacer.apply(io)
45
+
46
+ io.read.assert == "Hello, Planet Earth!!!"
47
+
48
+ If +io+ were a File object, rather than a StringIO, the file would
49
+ be changed on disk. As a precaution a backup file can be written
50
+ with then name of file plus a '.bak' extension in the same directory as
51
+ the file. To turn on the backup option, either supply it as an option
52
+ to the constructor, or set it via the writer method.
53
+
54
+ replacer.backup = true
55
+
56
+ (TODO: Example of a file search and replace.)
57
+
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regex
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 19
4
5
  prerelease: false
5
6
  segments:
6
7
  - 1
8
+ - 1
7
9
  - 0
8
- - 0
9
- version: 1.0.0
10
+ version: 1.1.0
10
11
  platform: ruby
11
12
  authors:
12
13
  - Thomas Sawyer
@@ -15,12 +16,39 @@ autorequire:
15
16
  bindir: bin
16
17
  cert_chain: []
17
18
 
18
- date: 2010-02-12 00:00:00 -05:00
19
+ date: 2010-10-12 00:00:00 -04:00
19
20
  default_executable:
20
- dependencies: []
21
-
22
- description: Regex is simple commmandline Regular Expression tool.
23
- email:
21
+ dependencies:
22
+ - !ruby/object:Gem::Dependency
23
+ name: syckle
24
+ prerelease: false
25
+ requirement: &id001 !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ">="
29
+ - !ruby/object:Gem::Version
30
+ hash: 3
31
+ segments:
32
+ - 0
33
+ version: "0"
34
+ type: :development
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: qed
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 3
45
+ segments:
46
+ - 0
47
+ version: "0"
48
+ type: :development
49
+ version_requirements: *id002
50
+ description: Regex is a simple commmandline Regular Expression tool, that makes easy to search documents for content matches.
51
+ email: ""
24
52
  executables:
25
53
  - regex
26
54
  extensions: []
@@ -28,32 +56,25 @@ extensions: []
28
56
  extra_rdoc_files:
29
57
  - README
30
58
  files:
31
- - HISTORY
32
- - LICENSE
33
- - MANIFEST
34
- - README
35
59
  - bin/regex
36
- - lib/regex.rb
37
60
  - lib/regex/command.rb
38
61
  - lib/regex/extractor.rb
62
+ - lib/regex/package.yml
63
+ - lib/regex/replacer.rb
39
64
  - lib/regex/string.rb
40
- - lib/regex/templates/common.rb
41
- - meta/authors
42
- - meta/created
43
- - meta/description
44
- - meta/download
45
- - meta/homepage
46
- - meta/mailinglist
47
- - meta/name
48
- - meta/repository
49
- - meta/summary
50
- - meta/title
51
- - meta/version
52
- - test/demos/regex.rdoc
65
+ - lib/regex/templates.rb
66
+ - lib/regex.rb
67
+ - qed/regex.rdoc
68
+ - qed/replacer.rdoc
69
+ - PROFILE
70
+ - PACKAGE
71
+ - LICENSE
72
+ - README
73
+ - HISTORY
53
74
  has_rdoc: true
54
75
  homepage: http://proutils.github.com/regex
55
- licenses: []
56
-
76
+ licenses:
77
+ - ""
57
78
  post_install_message:
58
79
  rdoc_options:
59
80
  - --title
@@ -63,25 +84,29 @@ rdoc_options:
63
84
  require_paths:
64
85
  - lib
65
86
  required_ruby_version: !ruby/object:Gem::Requirement
87
+ none: false
66
88
  requirements:
67
89
  - - ">="
68
90
  - !ruby/object:Gem::Version
91
+ hash: 3
69
92
  segments:
70
93
  - 0
71
94
  version: "0"
72
95
  required_rubygems_version: !ruby/object:Gem::Requirement
96
+ none: false
73
97
  requirements:
74
98
  - - ">="
75
99
  - !ruby/object:Gem::Version
100
+ hash: 3
76
101
  segments:
77
102
  - 0
78
103
  version: "0"
79
104
  requirements: []
80
105
 
81
106
  rubyforge_project: regex
82
- rubygems_version: 1.3.6.pre.3
107
+ rubygems_version: 1.3.7
83
108
  signing_key:
84
109
  specification_version: 3
85
- summary: Regex is simple commmandline Regular Expression tool.
110
+ summary: Regex is a simple commmandline Regular Expression tool.
86
111
  test_files: []
87
112