glark 1.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +0 -0
- data/bin/glark +37 -0
- data/bin/jlark +63 -0
- data/lib/glark.rb +4 -0
- data/lib/glark/expression.rb +440 -0
- data/lib/glark/exprfactory.rb +248 -0
- data/lib/glark/glark.rb +297 -0
- data/lib/glark/help.rb +85 -0
- data/lib/glark/input.rb +183 -0
- data/lib/glark/options.rb +757 -0
- data/lib/glark/output.rb +266 -0
- data/test/lib/glark/glark_test.rb +317 -0
- data/test/lib/glark/options_test.rb +891 -0
- metadata +95 -0
@@ -0,0 +1,248 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
#!ruby -w
|
3
|
+
# vim: set filetype=ruby : set sw=2
|
4
|
+
|
5
|
+
# Expression factory.
|
6
|
+
|
7
|
+
require 'English'
|
8
|
+
|
9
|
+
require 'rubygems'
|
10
|
+
require 'riel'
|
11
|
+
|
12
|
+
require 'glark/options'
|
13
|
+
require 'glark/expression'
|
14
|
+
|
15
|
+
class ExpressionFactory
|
16
|
+
include Loggable
|
17
|
+
|
18
|
+
# signifies no limit to the distance between matches, i.e., anywhere within
|
19
|
+
# the entire file is valid.
|
20
|
+
INFINITE_DISTANCE = -1
|
21
|
+
|
22
|
+
attr_reader :expr
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
@regexps = 0
|
26
|
+
opts = GlarkOptions.instance
|
27
|
+
@ignorecase = opts.nocase
|
28
|
+
@wholewords = opts.whole_words
|
29
|
+
@wholelines = opts.whole_lines
|
30
|
+
@extended = opts.extended
|
31
|
+
@multiline = opts.multiline
|
32
|
+
@highlight = opts.highlight
|
33
|
+
@text_highlights = opts.text_highlights
|
34
|
+
@extract_matches = opts.extract_matches
|
35
|
+
end
|
36
|
+
|
37
|
+
# reads a file containing one regular expression per line.
|
38
|
+
def read_file(fname)
|
39
|
+
log { "reading file: #{fname}" }
|
40
|
+
expr = nil
|
41
|
+
File.open(fname) do |file|
|
42
|
+
file.each_line do |line|
|
43
|
+
log { "line: #{line}" }
|
44
|
+
line.strip!
|
45
|
+
unless line.empty?
|
46
|
+
# flatten the or expression instead of nesting it, to avoid
|
47
|
+
# stack overruns for very large files.
|
48
|
+
re = make_regular_expression(line.chomp)
|
49
|
+
if expr
|
50
|
+
expr.ops << re
|
51
|
+
else
|
52
|
+
expr = InclusiveOrExpression.new(re)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
log { "returning expression #{expr}" }
|
59
|
+
|
60
|
+
expr
|
61
|
+
end
|
62
|
+
|
63
|
+
def make_regular_expression(pattern, negated = false)
|
64
|
+
# this check is because they may have omitted the pattern, e.g.:
|
65
|
+
# % glark *.cpp
|
66
|
+
if File.exists?(pattern)
|
67
|
+
warn "pattern '#{pattern}' exists as a file.\n Pattern may have been omitted."
|
68
|
+
end
|
69
|
+
|
70
|
+
regex = Regexp.create(pattern.dup,
|
71
|
+
:negated => negated,
|
72
|
+
:ignorecase => @ignorecase,
|
73
|
+
:wholewords => @wholewords,
|
74
|
+
:wholelines => @wholelines,
|
75
|
+
:extended => @extended,
|
76
|
+
:multiline => @multiline)
|
77
|
+
|
78
|
+
regex_args = {
|
79
|
+
:highlight => @highlight,
|
80
|
+
:text_highlights => @text_highlights,
|
81
|
+
:extract_matches => @extract_matches
|
82
|
+
}
|
83
|
+
|
84
|
+
re = RegexpFuncObj.new(regex, @regexps, regex_args)
|
85
|
+
@regexps += 1
|
86
|
+
re
|
87
|
+
end
|
88
|
+
|
89
|
+
# creates two expressions and returns them.
|
90
|
+
def make_expressions(args)
|
91
|
+
a1 = make_expression(args)
|
92
|
+
a2 = make_expression(args)
|
93
|
+
|
94
|
+
[ a1, a2 ]
|
95
|
+
end
|
96
|
+
|
97
|
+
# removes optional end tag
|
98
|
+
def shift_end_tag(name, args)
|
99
|
+
# explicit end tag is optional:
|
100
|
+
args.shift if args[0] == ("--end-of-" + name)
|
101
|
+
end
|
102
|
+
|
103
|
+
def make_not_expression(args)
|
104
|
+
expr = make_regular_expression(args, true)
|
105
|
+
unless expr
|
106
|
+
error "'not' expression takes one argument"
|
107
|
+
exit 2
|
108
|
+
end
|
109
|
+
|
110
|
+
# explicit end tag is optional:
|
111
|
+
shift_end_tag("not", args)
|
112
|
+
expr
|
113
|
+
end
|
114
|
+
|
115
|
+
def make_two_expressions(args, type)
|
116
|
+
a1, a2 = make_expressions(args)
|
117
|
+
unless a1 && a2
|
118
|
+
error "'" + type + "' expression takes two arguments"
|
119
|
+
exit 2
|
120
|
+
end
|
121
|
+
|
122
|
+
shift_end_tag(type, args)
|
123
|
+
[ a1, a2 ]
|
124
|
+
end
|
125
|
+
|
126
|
+
def make_or_expression(args)
|
127
|
+
a1, a2 = make_two_expressions(args, "or")
|
128
|
+
InclusiveOrExpression.new(a1, a2)
|
129
|
+
end
|
130
|
+
|
131
|
+
def make_xor_expression(args)
|
132
|
+
a1, a2 = make_two_expressions(args, "xor")
|
133
|
+
ExclusiveOrExpression.new(a1, a2)
|
134
|
+
end
|
135
|
+
|
136
|
+
def numeric?(x)
|
137
|
+
x && (x.kind_of?(Fixnum) || (x.to_i == INFINITE_DISTANCE || x.num))
|
138
|
+
end
|
139
|
+
|
140
|
+
def make_and_distance(arg, args)
|
141
|
+
dist = nil
|
142
|
+
if arg == "-a"
|
143
|
+
dist = args.shift
|
144
|
+
elsif arg == "--and"
|
145
|
+
if args.size > 0 && numeric?(args[0])
|
146
|
+
dist = args.shift
|
147
|
+
else
|
148
|
+
dist = "0"
|
149
|
+
end
|
150
|
+
elsif arg.index(/^--and=(\-?\d+)?$/)
|
151
|
+
dist = $1
|
152
|
+
end
|
153
|
+
|
154
|
+
# check to ensure that this is numeric
|
155
|
+
if !numeric?(dist)
|
156
|
+
error "invalid distance for 'and' expression: '#{dist}'\n" +
|
157
|
+
" expecting an integer, or #{INFINITE_DISTANCE} for 'infinite'"
|
158
|
+
exit 2
|
159
|
+
end
|
160
|
+
|
161
|
+
if dist.to_i == INFINITE_DISTANCE
|
162
|
+
dist = 1.0 / 0.0 # infinity
|
163
|
+
else
|
164
|
+
dist = dist.to_i
|
165
|
+
end
|
166
|
+
|
167
|
+
dist
|
168
|
+
end
|
169
|
+
|
170
|
+
def make_and_expression(arg, args)
|
171
|
+
dist = make_and_distance(arg, args)
|
172
|
+
|
173
|
+
a1, a2 = make_two_expressions(args, "and")
|
174
|
+
AndExpression.new(dist, a1, a2)
|
175
|
+
end
|
176
|
+
|
177
|
+
def make_infix_expression(arg, args = [])
|
178
|
+
expr = nil
|
179
|
+
|
180
|
+
while arg
|
181
|
+
case arg
|
182
|
+
when '('
|
183
|
+
arg = args.shift
|
184
|
+
expr = make_infix_expression(arg, args)
|
185
|
+
when '--or', '-o'
|
186
|
+
arg = args.shift
|
187
|
+
rhs = make_infix_expression(arg, args)
|
188
|
+
expr = InclusiveOrExpression.new(expr, rhs)
|
189
|
+
when '--xor'
|
190
|
+
arg = args.shift
|
191
|
+
rhs = make_infix_expression(arg, args)
|
192
|
+
expr = ExclusiveOrExpression.new(expr, rhs)
|
193
|
+
when Regexp.new('^--and'), '-a'
|
194
|
+
dist = make_and_distance(arg, args)
|
195
|
+
arg = args.shift
|
196
|
+
rhs = make_infix_expression(arg, args)
|
197
|
+
expr = AndExpression.new(dist, expr, rhs)
|
198
|
+
when ')'
|
199
|
+
break
|
200
|
+
else
|
201
|
+
# blather "assuming the last argument #{arg} is a pattern"
|
202
|
+
expr = make_regular_expression(arg)
|
203
|
+
break
|
204
|
+
end
|
205
|
+
arg = args.shift
|
206
|
+
end
|
207
|
+
|
208
|
+
if !expr
|
209
|
+
puts "arg: #{arg}; args: #{args.inspect}"
|
210
|
+
error "No expression provided."
|
211
|
+
end
|
212
|
+
|
213
|
+
expr
|
214
|
+
end
|
215
|
+
|
216
|
+
def make_expression(args, warn_option = false)
|
217
|
+
arg = args[0]
|
218
|
+
|
219
|
+
if arg
|
220
|
+
case arg
|
221
|
+
when "--or", "-o"
|
222
|
+
args.shift
|
223
|
+
make_or_expression(args)
|
224
|
+
when "--xor"
|
225
|
+
args.shift
|
226
|
+
make_xor_expression(args)
|
227
|
+
when %r{^\-\-and}, %r{^\-a}
|
228
|
+
args.shift
|
229
|
+
make_and_expression(arg, args)
|
230
|
+
when '('
|
231
|
+
args.shift
|
232
|
+
make_infix_expression(arg, args)
|
233
|
+
else
|
234
|
+
if warn_option && arg.index(/^\-{1,2}\w/)
|
235
|
+
warn "option not understood: #{arg}"
|
236
|
+
exit 2
|
237
|
+
end
|
238
|
+
|
239
|
+
# blather "assuming the last argument #{arg} is a pattern"
|
240
|
+
args.shift
|
241
|
+
make_regular_expression(arg)
|
242
|
+
end
|
243
|
+
else
|
244
|
+
nil
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
end
|
data/lib/glark/glark.rb
ADDED
@@ -0,0 +1,297 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
#!ruby -w
|
3
|
+
# vim: set filetype=ruby : set sw=2
|
4
|
+
|
5
|
+
# An extended grep, with extended functionality including full regular
|
6
|
+
# expressions, contextual output, highlighting, detection and exclusion of
|
7
|
+
# nontext files, and complex matching criteria.
|
8
|
+
|
9
|
+
require 'English'
|
10
|
+
require 'pathname'
|
11
|
+
|
12
|
+
require 'rubygems'
|
13
|
+
require 'riel'
|
14
|
+
|
15
|
+
require 'glark/options'
|
16
|
+
require 'glark/input'
|
17
|
+
require 'glark/output'
|
18
|
+
require 'glark/expression'
|
19
|
+
require 'glark/exprfactory'
|
20
|
+
|
21
|
+
$stdout.sync = true # unbuffer
|
22
|
+
$stderr.sync = true # unbuffer
|
23
|
+
|
24
|
+
$PACKAGE = "glark"
|
25
|
+
$VERSION = "1.9.0"
|
26
|
+
|
27
|
+
# The main processor.
|
28
|
+
class Glark
|
29
|
+
include Loggable
|
30
|
+
|
31
|
+
attr_reader :exit_status
|
32
|
+
|
33
|
+
def initialize(func, files)
|
34
|
+
@opts = GlarkOptions.instance
|
35
|
+
@func = func
|
36
|
+
@searched_files = Array.new # files searched, so we don't cycle through links
|
37
|
+
|
38
|
+
@files = files
|
39
|
+
|
40
|
+
@show_file_names = (@opts.show_file_names ||
|
41
|
+
(@opts.show_file_names.nil? &&
|
42
|
+
(@opts.label ||
|
43
|
+
@files.size > 1 ||
|
44
|
+
(@files[0] != "-" && FileType.type(@files[0]) == FileType::DIRECTORY))))
|
45
|
+
|
46
|
+
@out_class = case @opts.output
|
47
|
+
when "grep"
|
48
|
+
GrepOutputFormat
|
49
|
+
when "ansi", "xterm", nil
|
50
|
+
GlarkOutputFormat
|
51
|
+
when "match"
|
52
|
+
error "output to match list is not yet supported"
|
53
|
+
GlarkMatchList
|
54
|
+
# exit 2
|
55
|
+
end
|
56
|
+
|
57
|
+
@count = @opts.count
|
58
|
+
@invert_match = @opts.invert_match
|
59
|
+
|
60
|
+
@after = @opts.after
|
61
|
+
@before = @opts.before
|
62
|
+
@output = @opts.output
|
63
|
+
|
64
|
+
# 0 == matches, 1 == no matches, 2 == error
|
65
|
+
@exit_status = @invert_match ? 0 : 1
|
66
|
+
|
67
|
+
@skip_methods = Array.new
|
68
|
+
|
69
|
+
if @opts.with_basename || @opts.without_basename
|
70
|
+
@skip_methods << Proc.new { |fn| skip?(File.basename(fn), @opts.with_basename, @opts.without_basename) }
|
71
|
+
end
|
72
|
+
|
73
|
+
if @opts.with_fullname || @opts.without_fullname
|
74
|
+
@skip_methods << Proc.new { |fn| skip?(fn, @opts.with_fullname, @opts.without_fullname) }
|
75
|
+
end
|
76
|
+
|
77
|
+
if @opts.size_limit
|
78
|
+
@skip_methods << Proc.new { |fn| File.size(fname) > @opts.size_limit }
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def search_file(input)
|
83
|
+
output = @out_class.new(input, @show_file_names)
|
84
|
+
input.output = output
|
85
|
+
|
86
|
+
input.count = 0 if @count
|
87
|
+
input.invert_match = true if @invert_match
|
88
|
+
|
89
|
+
@func.process(input)
|
90
|
+
|
91
|
+
if input.matched?
|
92
|
+
@exit_status = @invert_match ? 1 : 0
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def skip?(name, opts_with, opts_without)
|
97
|
+
inc = opts_with && !opts_with.match(name)
|
98
|
+
exc = opts_without && opts_without.match(name)
|
99
|
+
inc || exc
|
100
|
+
end
|
101
|
+
|
102
|
+
def skipped?(fname)
|
103
|
+
@skip_methods.detect { |meth| meth.call(fname) }
|
104
|
+
end
|
105
|
+
|
106
|
+
def search_text(fname)
|
107
|
+
if skipped?(fname)
|
108
|
+
log { "skipping file: #{fname}" }
|
109
|
+
else
|
110
|
+
log { "searching text" }
|
111
|
+
if false
|
112
|
+
# readlines doesn't work with $/ == nil, so we'll use gets instead.
|
113
|
+
# this has been fixed in the CVS version of Ruby (on 26 Dec 2003).
|
114
|
+
text = []
|
115
|
+
File.open(fname) do |f|
|
116
|
+
while ((line = f.gets) && line.length > 0)
|
117
|
+
text << line
|
118
|
+
end
|
119
|
+
end
|
120
|
+
log { "got text #{text.length}" }
|
121
|
+
end
|
122
|
+
log { "searching #{fname} for #{@func}" }
|
123
|
+
|
124
|
+
ifile_args = {
|
125
|
+
:after => @after,
|
126
|
+
:before => @before,
|
127
|
+
:output => @output
|
128
|
+
}
|
129
|
+
|
130
|
+
io = fname == "-" ? $stdin : File.new(fname)
|
131
|
+
|
132
|
+
input = InputFile.new(fname, io, ifile_args)
|
133
|
+
search_file(input)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def search_binary(fname)
|
138
|
+
if skipped?(fname)
|
139
|
+
log { "skipping file: #{fname}" }
|
140
|
+
else
|
141
|
+
log { "handling binary" }
|
142
|
+
|
143
|
+
case @opts.binary_files
|
144
|
+
when "without-match"
|
145
|
+
log { "skipping binary file #{fname}" }
|
146
|
+
|
147
|
+
when "binary"
|
148
|
+
log { "searching binary file #{fname} for #{@func}" }
|
149
|
+
f = File.new(fname)
|
150
|
+
f.binmode # for MSDOS/WinWhatever
|
151
|
+
bf = BinaryFile.new(fname, f)
|
152
|
+
search_file(bf)
|
153
|
+
|
154
|
+
when "text"
|
155
|
+
log { "processing binary file #{name} as text" }
|
156
|
+
search_text(fname)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def search_directory(fname)
|
162
|
+
log { "processing directory" }
|
163
|
+
case @opts.directory
|
164
|
+
when "read"
|
165
|
+
write "#{fname}: Is a directory"
|
166
|
+
when "recurse"
|
167
|
+
log { "recursing into directory #{fname}" }
|
168
|
+
begin
|
169
|
+
entries = Dir.entries(fname).reject { |x| x == "." || x == ".." }
|
170
|
+
entries.each do |e|
|
171
|
+
entname = fname + "/" + e
|
172
|
+
inode = File.exists?(entname) && File.stat(entname).ino
|
173
|
+
if inode && @searched_files.include?(inode)
|
174
|
+
Log.verbose && log("file already processed: #{entname}")
|
175
|
+
else
|
176
|
+
@searched_files << inode
|
177
|
+
search(entname)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
rescue Errno::EACCES => e
|
181
|
+
write "directory not readable: #{fname}"
|
182
|
+
end
|
183
|
+
when "skip"
|
184
|
+
log { "skipping directory #{fname}" }
|
185
|
+
else
|
186
|
+
log { "directory: #{@opts.directory}" }
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def search_unknown(fname)
|
191
|
+
warn "unknown file type: #{fname}"
|
192
|
+
end
|
193
|
+
|
194
|
+
def search_none(fname)
|
195
|
+
write "no such file: #{fname}"
|
196
|
+
end
|
197
|
+
|
198
|
+
def search_unreadable(fname)
|
199
|
+
log { "skipping unreadable: #{fname}" }
|
200
|
+
end
|
201
|
+
|
202
|
+
def search(name)
|
203
|
+
if @opts.exclude_matching
|
204
|
+
expr = @opts.expr
|
205
|
+
if expr.respond_to?(:re) && expr.re.match(name)
|
206
|
+
log { "skipping file #{name} with matching name" }
|
207
|
+
return
|
208
|
+
else
|
209
|
+
log { "not skipping file #{name}" }
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
if name == "-"
|
214
|
+
write "reading standard input..."
|
215
|
+
search_text("-")
|
216
|
+
else
|
217
|
+
type = FileType.type(name)
|
218
|
+
|
219
|
+
case type
|
220
|
+
when FileType::BINARY
|
221
|
+
search_binary(name)
|
222
|
+
when FileType::DIRECTORY
|
223
|
+
search_directory(name)
|
224
|
+
when FileType::NONE
|
225
|
+
search_none(name)
|
226
|
+
when FileType::TEXT
|
227
|
+
search_text(name)
|
228
|
+
when FileType::UNKNOWN
|
229
|
+
search_unknown(name)
|
230
|
+
when FileType::UNREADABLE
|
231
|
+
search_unreadable(name)
|
232
|
+
else
|
233
|
+
error "type unknown: file: #{name}; type: #{type}"
|
234
|
+
exit(-2)
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def end_processing
|
240
|
+
end
|
241
|
+
|
242
|
+
def self.create_options
|
243
|
+
GlarkOptions.instance
|
244
|
+
end
|
245
|
+
|
246
|
+
def self.main
|
247
|
+
begin
|
248
|
+
Log.set_widths(-15, -40, -40)
|
249
|
+
|
250
|
+
Log.log { "loading options" }
|
251
|
+
opts = self.create_options
|
252
|
+
|
253
|
+
opts.run(ARGV)
|
254
|
+
Log.log { "done loading options" }
|
255
|
+
|
256
|
+
# To get rid of the annoying stack trace on ctrl-C:
|
257
|
+
trap("INT") { abort }
|
258
|
+
|
259
|
+
if opts.explain
|
260
|
+
puts opts.expr.explain
|
261
|
+
end
|
262
|
+
|
263
|
+
files = if ARGV.size > 0 then
|
264
|
+
if opts.split_as_path
|
265
|
+
ARGV.collect { |f| f.split(File::PATH_SEPARATOR) }.flatten
|
266
|
+
else
|
267
|
+
ARGV
|
268
|
+
end
|
269
|
+
else
|
270
|
+
[ '-' ]
|
271
|
+
end
|
272
|
+
|
273
|
+
glark = self.new(opts.expr, files)
|
274
|
+
|
275
|
+
files.each do |f|
|
276
|
+
glark.search(f)
|
277
|
+
end
|
278
|
+
|
279
|
+
glark.end_processing
|
280
|
+
|
281
|
+
exit glark.exit_status
|
282
|
+
rescue => e
|
283
|
+
# show the message, and the stack trace only if verbose:
|
284
|
+
$stderr.puts "error: #{e}"
|
285
|
+
if opts.verbose || true
|
286
|
+
$stderr.puts e.backtrace
|
287
|
+
raise
|
288
|
+
else
|
289
|
+
exit 2
|
290
|
+
end
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
if __FILE__ == $0
|
296
|
+
Glark.main
|
297
|
+
end
|