glark 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,248 @@
1
+ #!/usr/bin/ruby -w
2
+ #!ruby -w
3
+ # vim: set filetype=ruby : set sw=2
4
+
5
+ # Expression factory.
6
+
7
+ require 'English'
8
+
9
+ require 'rubygems'
10
+ require 'riel'
11
+
12
+ require 'glark/options'
13
+ require 'glark/expression'
14
+
15
+ class ExpressionFactory
16
+ include Loggable
17
+
18
+ # signifies no limit to the distance between matches, i.e., anywhere within
19
+ # the entire file is valid.
20
+ INFINITE_DISTANCE = -1
21
+
22
+ attr_reader :expr
23
+
24
+ def initialize
25
+ @regexps = 0
26
+ opts = GlarkOptions.instance
27
+ @ignorecase = opts.nocase
28
+ @wholewords = opts.whole_words
29
+ @wholelines = opts.whole_lines
30
+ @extended = opts.extended
31
+ @multiline = opts.multiline
32
+ @highlight = opts.highlight
33
+ @text_highlights = opts.text_highlights
34
+ @extract_matches = opts.extract_matches
35
+ end
36
+
37
+ # reads a file containing one regular expression per line.
38
+ def read_file(fname)
39
+ log { "reading file: #{fname}" }
40
+ expr = nil
41
+ File.open(fname) do |file|
42
+ file.each_line do |line|
43
+ log { "line: #{line}" }
44
+ line.strip!
45
+ unless line.empty?
46
+ # flatten the or expression instead of nesting it, to avoid
47
+ # stack overruns for very large files.
48
+ re = make_regular_expression(line.chomp)
49
+ if expr
50
+ expr.ops << re
51
+ else
52
+ expr = InclusiveOrExpression.new(re)
53
+ end
54
+ end
55
+ end
56
+ end
57
+
58
+ log { "returning expression #{expr}" }
59
+
60
+ expr
61
+ end
62
+
63
+ def make_regular_expression(pattern, negated = false)
64
+ # this check is because they may have omitted the pattern, e.g.:
65
+ # % glark *.cpp
66
+ if File.exists?(pattern)
67
+ warn "pattern '#{pattern}' exists as a file.\n Pattern may have been omitted."
68
+ end
69
+
70
+ regex = Regexp.create(pattern.dup,
71
+ :negated => negated,
72
+ :ignorecase => @ignorecase,
73
+ :wholewords => @wholewords,
74
+ :wholelines => @wholelines,
75
+ :extended => @extended,
76
+ :multiline => @multiline)
77
+
78
+ regex_args = {
79
+ :highlight => @highlight,
80
+ :text_highlights => @text_highlights,
81
+ :extract_matches => @extract_matches
82
+ }
83
+
84
+ re = RegexpFuncObj.new(regex, @regexps, regex_args)
85
+ @regexps += 1
86
+ re
87
+ end
88
+
89
+ # creates two expressions and returns them.
90
+ def make_expressions(args)
91
+ a1 = make_expression(args)
92
+ a2 = make_expression(args)
93
+
94
+ [ a1, a2 ]
95
+ end
96
+
97
+ # removes optional end tag
98
+ def shift_end_tag(name, args)
99
+ # explicit end tag is optional:
100
+ args.shift if args[0] == ("--end-of-" + name)
101
+ end
102
+
103
+ def make_not_expression(args)
104
+ expr = make_regular_expression(args, true)
105
+ unless expr
106
+ error "'not' expression takes one argument"
107
+ exit 2
108
+ end
109
+
110
+ # explicit end tag is optional:
111
+ shift_end_tag("not", args)
112
+ expr
113
+ end
114
+
115
+ def make_two_expressions(args, type)
116
+ a1, a2 = make_expressions(args)
117
+ unless a1 && a2
118
+ error "'" + type + "' expression takes two arguments"
119
+ exit 2
120
+ end
121
+
122
+ shift_end_tag(type, args)
123
+ [ a1, a2 ]
124
+ end
125
+
126
+ def make_or_expression(args)
127
+ a1, a2 = make_two_expressions(args, "or")
128
+ InclusiveOrExpression.new(a1, a2)
129
+ end
130
+
131
+ def make_xor_expression(args)
132
+ a1, a2 = make_two_expressions(args, "xor")
133
+ ExclusiveOrExpression.new(a1, a2)
134
+ end
135
+
136
+ def numeric?(x)
137
+ x && (x.kind_of?(Fixnum) || (x.to_i == INFINITE_DISTANCE || x.num))
138
+ end
139
+
140
+ def make_and_distance(arg, args)
141
+ dist = nil
142
+ if arg == "-a"
143
+ dist = args.shift
144
+ elsif arg == "--and"
145
+ if args.size > 0 && numeric?(args[0])
146
+ dist = args.shift
147
+ else
148
+ dist = "0"
149
+ end
150
+ elsif arg.index(/^--and=(\-?\d+)?$/)
151
+ dist = $1
152
+ end
153
+
154
+ # check to ensure that this is numeric
155
+ if !numeric?(dist)
156
+ error "invalid distance for 'and' expression: '#{dist}'\n" +
157
+ " expecting an integer, or #{INFINITE_DISTANCE} for 'infinite'"
158
+ exit 2
159
+ end
160
+
161
+ if dist.to_i == INFINITE_DISTANCE
162
+ dist = 1.0 / 0.0 # infinity
163
+ else
164
+ dist = dist.to_i
165
+ end
166
+
167
+ dist
168
+ end
169
+
170
+ def make_and_expression(arg, args)
171
+ dist = make_and_distance(arg, args)
172
+
173
+ a1, a2 = make_two_expressions(args, "and")
174
+ AndExpression.new(dist, a1, a2)
175
+ end
176
+
177
+ def make_infix_expression(arg, args = [])
178
+ expr = nil
179
+
180
+ while arg
181
+ case arg
182
+ when '('
183
+ arg = args.shift
184
+ expr = make_infix_expression(arg, args)
185
+ when '--or', '-o'
186
+ arg = args.shift
187
+ rhs = make_infix_expression(arg, args)
188
+ expr = InclusiveOrExpression.new(expr, rhs)
189
+ when '--xor'
190
+ arg = args.shift
191
+ rhs = make_infix_expression(arg, args)
192
+ expr = ExclusiveOrExpression.new(expr, rhs)
193
+ when Regexp.new('^--and'), '-a'
194
+ dist = make_and_distance(arg, args)
195
+ arg = args.shift
196
+ rhs = make_infix_expression(arg, args)
197
+ expr = AndExpression.new(dist, expr, rhs)
198
+ when ')'
199
+ break
200
+ else
201
+ # blather "assuming the last argument #{arg} is a pattern"
202
+ expr = make_regular_expression(arg)
203
+ break
204
+ end
205
+ arg = args.shift
206
+ end
207
+
208
+ if !expr
209
+ puts "arg: #{arg}; args: #{args.inspect}"
210
+ error "No expression provided."
211
+ end
212
+
213
+ expr
214
+ end
215
+
216
+ def make_expression(args, warn_option = false)
217
+ arg = args[0]
218
+
219
+ if arg
220
+ case arg
221
+ when "--or", "-o"
222
+ args.shift
223
+ make_or_expression(args)
224
+ when "--xor"
225
+ args.shift
226
+ make_xor_expression(args)
227
+ when %r{^\-\-and}, %r{^\-a}
228
+ args.shift
229
+ make_and_expression(arg, args)
230
+ when '('
231
+ args.shift
232
+ make_infix_expression(arg, args)
233
+ else
234
+ if warn_option && arg.index(/^\-{1,2}\w/)
235
+ warn "option not understood: #{arg}"
236
+ exit 2
237
+ end
238
+
239
+ # blather "assuming the last argument #{arg} is a pattern"
240
+ args.shift
241
+ make_regular_expression(arg)
242
+ end
243
+ else
244
+ nil
245
+ end
246
+ end
247
+
248
+ end
@@ -0,0 +1,297 @@
1
+ #!/usr/bin/ruby -w
2
+ #!ruby -w
3
+ # vim: set filetype=ruby : set sw=2
4
+
5
+ # An extended grep, with extended functionality including full regular
6
+ # expressions, contextual output, highlighting, detection and exclusion of
7
+ # nontext files, and complex matching criteria.
8
+
9
+ require 'English'
10
+ require 'pathname'
11
+
12
+ require 'rubygems'
13
+ require 'riel'
14
+
15
+ require 'glark/options'
16
+ require 'glark/input'
17
+ require 'glark/output'
18
+ require 'glark/expression'
19
+ require 'glark/exprfactory'
20
+
21
+ $stdout.sync = true # unbuffer
22
+ $stderr.sync = true # unbuffer
23
+
24
+ $PACKAGE = "glark"
25
+ $VERSION = "1.9.0"
26
+
27
+ # The main processor.
28
+ class Glark
29
+ include Loggable
30
+
31
+ attr_reader :exit_status
32
+
33
+ def initialize(func, files)
34
+ @opts = GlarkOptions.instance
35
+ @func = func
36
+ @searched_files = Array.new # files searched, so we don't cycle through links
37
+
38
+ @files = files
39
+
40
+ @show_file_names = (@opts.show_file_names ||
41
+ (@opts.show_file_names.nil? &&
42
+ (@opts.label ||
43
+ @files.size > 1 ||
44
+ (@files[0] != "-" && FileType.type(@files[0]) == FileType::DIRECTORY))))
45
+
46
+ @out_class = case @opts.output
47
+ when "grep"
48
+ GrepOutputFormat
49
+ when "ansi", "xterm", nil
50
+ GlarkOutputFormat
51
+ when "match"
52
+ error "output to match list is not yet supported"
53
+ GlarkMatchList
54
+ # exit 2
55
+ end
56
+
57
+ @count = @opts.count
58
+ @invert_match = @opts.invert_match
59
+
60
+ @after = @opts.after
61
+ @before = @opts.before
62
+ @output = @opts.output
63
+
64
+ # 0 == matches, 1 == no matches, 2 == error
65
+ @exit_status = @invert_match ? 0 : 1
66
+
67
+ @skip_methods = Array.new
68
+
69
+ if @opts.with_basename || @opts.without_basename
70
+ @skip_methods << Proc.new { |fn| skip?(File.basename(fn), @opts.with_basename, @opts.without_basename) }
71
+ end
72
+
73
+ if @opts.with_fullname || @opts.without_fullname
74
+ @skip_methods << Proc.new { |fn| skip?(fn, @opts.with_fullname, @opts.without_fullname) }
75
+ end
76
+
77
+ if @opts.size_limit
78
+ @skip_methods << Proc.new { |fn| File.size(fname) > @opts.size_limit }
79
+ end
80
+ end
81
+
82
+ def search_file(input)
83
+ output = @out_class.new(input, @show_file_names)
84
+ input.output = output
85
+
86
+ input.count = 0 if @count
87
+ input.invert_match = true if @invert_match
88
+
89
+ @func.process(input)
90
+
91
+ if input.matched?
92
+ @exit_status = @invert_match ? 1 : 0
93
+ end
94
+ end
95
+
96
+ def skip?(name, opts_with, opts_without)
97
+ inc = opts_with && !opts_with.match(name)
98
+ exc = opts_without && opts_without.match(name)
99
+ inc || exc
100
+ end
101
+
102
+ def skipped?(fname)
103
+ @skip_methods.detect { |meth| meth.call(fname) }
104
+ end
105
+
106
+ def search_text(fname)
107
+ if skipped?(fname)
108
+ log { "skipping file: #{fname}" }
109
+ else
110
+ log { "searching text" }
111
+ if false
112
+ # readlines doesn't work with $/ == nil, so we'll use gets instead.
113
+ # this has been fixed in the CVS version of Ruby (on 26 Dec 2003).
114
+ text = []
115
+ File.open(fname) do |f|
116
+ while ((line = f.gets) && line.length > 0)
117
+ text << line
118
+ end
119
+ end
120
+ log { "got text #{text.length}" }
121
+ end
122
+ log { "searching #{fname} for #{@func}" }
123
+
124
+ ifile_args = {
125
+ :after => @after,
126
+ :before => @before,
127
+ :output => @output
128
+ }
129
+
130
+ io = fname == "-" ? $stdin : File.new(fname)
131
+
132
+ input = InputFile.new(fname, io, ifile_args)
133
+ search_file(input)
134
+ end
135
+ end
136
+
137
+ def search_binary(fname)
138
+ if skipped?(fname)
139
+ log { "skipping file: #{fname}" }
140
+ else
141
+ log { "handling binary" }
142
+
143
+ case @opts.binary_files
144
+ when "without-match"
145
+ log { "skipping binary file #{fname}" }
146
+
147
+ when "binary"
148
+ log { "searching binary file #{fname} for #{@func}" }
149
+ f = File.new(fname)
150
+ f.binmode # for MSDOS/WinWhatever
151
+ bf = BinaryFile.new(fname, f)
152
+ search_file(bf)
153
+
154
+ when "text"
155
+ log { "processing binary file #{name} as text" }
156
+ search_text(fname)
157
+ end
158
+ end
159
+ end
160
+
161
+ def search_directory(fname)
162
+ log { "processing directory" }
163
+ case @opts.directory
164
+ when "read"
165
+ write "#{fname}: Is a directory"
166
+ when "recurse"
167
+ log { "recursing into directory #{fname}" }
168
+ begin
169
+ entries = Dir.entries(fname).reject { |x| x == "." || x == ".." }
170
+ entries.each do |e|
171
+ entname = fname + "/" + e
172
+ inode = File.exists?(entname) && File.stat(entname).ino
173
+ if inode && @searched_files.include?(inode)
174
+ Log.verbose && log("file already processed: #{entname}")
175
+ else
176
+ @searched_files << inode
177
+ search(entname)
178
+ end
179
+ end
180
+ rescue Errno::EACCES => e
181
+ write "directory not readable: #{fname}"
182
+ end
183
+ when "skip"
184
+ log { "skipping directory #{fname}" }
185
+ else
186
+ log { "directory: #{@opts.directory}" }
187
+ end
188
+ end
189
+
190
+ def search_unknown(fname)
191
+ warn "unknown file type: #{fname}"
192
+ end
193
+
194
+ def search_none(fname)
195
+ write "no such file: #{fname}"
196
+ end
197
+
198
+ def search_unreadable(fname)
199
+ log { "skipping unreadable: #{fname}" }
200
+ end
201
+
202
+ def search(name)
203
+ if @opts.exclude_matching
204
+ expr = @opts.expr
205
+ if expr.respond_to?(:re) && expr.re.match(name)
206
+ log { "skipping file #{name} with matching name" }
207
+ return
208
+ else
209
+ log { "not skipping file #{name}" }
210
+ end
211
+ end
212
+
213
+ if name == "-"
214
+ write "reading standard input..."
215
+ search_text("-")
216
+ else
217
+ type = FileType.type(name)
218
+
219
+ case type
220
+ when FileType::BINARY
221
+ search_binary(name)
222
+ when FileType::DIRECTORY
223
+ search_directory(name)
224
+ when FileType::NONE
225
+ search_none(name)
226
+ when FileType::TEXT
227
+ search_text(name)
228
+ when FileType::UNKNOWN
229
+ search_unknown(name)
230
+ when FileType::UNREADABLE
231
+ search_unreadable(name)
232
+ else
233
+ error "type unknown: file: #{name}; type: #{type}"
234
+ exit(-2)
235
+ end
236
+ end
237
+ end
238
+
239
+ def end_processing
240
+ end
241
+
242
+ def self.create_options
243
+ GlarkOptions.instance
244
+ end
245
+
246
+ def self.main
247
+ begin
248
+ Log.set_widths(-15, -40, -40)
249
+
250
+ Log.log { "loading options" }
251
+ opts = self.create_options
252
+
253
+ opts.run(ARGV)
254
+ Log.log { "done loading options" }
255
+
256
+ # To get rid of the annoying stack trace on ctrl-C:
257
+ trap("INT") { abort }
258
+
259
+ if opts.explain
260
+ puts opts.expr.explain
261
+ end
262
+
263
+ files = if ARGV.size > 0 then
264
+ if opts.split_as_path
265
+ ARGV.collect { |f| f.split(File::PATH_SEPARATOR) }.flatten
266
+ else
267
+ ARGV
268
+ end
269
+ else
270
+ [ '-' ]
271
+ end
272
+
273
+ glark = self.new(opts.expr, files)
274
+
275
+ files.each do |f|
276
+ glark.search(f)
277
+ end
278
+
279
+ glark.end_processing
280
+
281
+ exit glark.exit_status
282
+ rescue => e
283
+ # show the message, and the stack trace only if verbose:
284
+ $stderr.puts "error: #{e}"
285
+ if opts.verbose || true
286
+ $stderr.puts e.backtrace
287
+ raise
288
+ else
289
+ exit 2
290
+ end
291
+ end
292
+ end
293
+ end
294
+
295
+ if __FILE__ == $0
296
+ Glark.main
297
+ end