glark 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +0 -0
- data/bin/glark +37 -0
- data/bin/jlark +63 -0
- data/lib/glark.rb +4 -0
- data/lib/glark/expression.rb +440 -0
- data/lib/glark/exprfactory.rb +248 -0
- data/lib/glark/glark.rb +297 -0
- data/lib/glark/help.rb +85 -0
- data/lib/glark/input.rb +183 -0
- data/lib/glark/options.rb +757 -0
- data/lib/glark/output.rb +266 -0
- data/test/lib/glark/glark_test.rb +317 -0
- data/test/lib/glark/options_test.rb +891 -0
- metadata +95 -0
@@ -0,0 +1,248 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
#!ruby -w
|
3
|
+
# vim: set filetype=ruby : set sw=2
|
4
|
+
|
5
|
+
# Expression factory.
|
6
|
+
|
7
|
+
require 'English'
|
8
|
+
|
9
|
+
require 'rubygems'
|
10
|
+
require 'riel'
|
11
|
+
|
12
|
+
require 'glark/options'
|
13
|
+
require 'glark/expression'
|
14
|
+
|
15
|
+
class ExpressionFactory
|
16
|
+
include Loggable
|
17
|
+
|
18
|
+
# signifies no limit to the distance between matches, i.e., anywhere within
|
19
|
+
# the entire file is valid.
|
20
|
+
INFINITE_DISTANCE = -1
|
21
|
+
|
22
|
+
attr_reader :expr
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
@regexps = 0
|
26
|
+
opts = GlarkOptions.instance
|
27
|
+
@ignorecase = opts.nocase
|
28
|
+
@wholewords = opts.whole_words
|
29
|
+
@wholelines = opts.whole_lines
|
30
|
+
@extended = opts.extended
|
31
|
+
@multiline = opts.multiline
|
32
|
+
@highlight = opts.highlight
|
33
|
+
@text_highlights = opts.text_highlights
|
34
|
+
@extract_matches = opts.extract_matches
|
35
|
+
end
|
36
|
+
|
37
|
+
# reads a file containing one regular expression per line.
|
38
|
+
def read_file(fname)
|
39
|
+
log { "reading file: #{fname}" }
|
40
|
+
expr = nil
|
41
|
+
File.open(fname) do |file|
|
42
|
+
file.each_line do |line|
|
43
|
+
log { "line: #{line}" }
|
44
|
+
line.strip!
|
45
|
+
unless line.empty?
|
46
|
+
# flatten the or expression instead of nesting it, to avoid
|
47
|
+
# stack overruns for very large files.
|
48
|
+
re = make_regular_expression(line.chomp)
|
49
|
+
if expr
|
50
|
+
expr.ops << re
|
51
|
+
else
|
52
|
+
expr = InclusiveOrExpression.new(re)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
log { "returning expression #{expr}" }
|
59
|
+
|
60
|
+
expr
|
61
|
+
end
|
62
|
+
|
63
|
+
def make_regular_expression(pattern, negated = false)
|
64
|
+
# this check is because they may have omitted the pattern, e.g.:
|
65
|
+
# % glark *.cpp
|
66
|
+
if File.exists?(pattern)
|
67
|
+
warn "pattern '#{pattern}' exists as a file.\n Pattern may have been omitted."
|
68
|
+
end
|
69
|
+
|
70
|
+
regex = Regexp.create(pattern.dup,
|
71
|
+
:negated => negated,
|
72
|
+
:ignorecase => @ignorecase,
|
73
|
+
:wholewords => @wholewords,
|
74
|
+
:wholelines => @wholelines,
|
75
|
+
:extended => @extended,
|
76
|
+
:multiline => @multiline)
|
77
|
+
|
78
|
+
regex_args = {
|
79
|
+
:highlight => @highlight,
|
80
|
+
:text_highlights => @text_highlights,
|
81
|
+
:extract_matches => @extract_matches
|
82
|
+
}
|
83
|
+
|
84
|
+
re = RegexpFuncObj.new(regex, @regexps, regex_args)
|
85
|
+
@regexps += 1
|
86
|
+
re
|
87
|
+
end
|
88
|
+
|
89
|
+
# creates two expressions and returns them.
|
90
|
+
def make_expressions(args)
|
91
|
+
a1 = make_expression(args)
|
92
|
+
a2 = make_expression(args)
|
93
|
+
|
94
|
+
[ a1, a2 ]
|
95
|
+
end
|
96
|
+
|
97
|
+
# removes optional end tag
|
98
|
+
def shift_end_tag(name, args)
|
99
|
+
# explicit end tag is optional:
|
100
|
+
args.shift if args[0] == ("--end-of-" + name)
|
101
|
+
end
|
102
|
+
|
103
|
+
def make_not_expression(args)
|
104
|
+
expr = make_regular_expression(args, true)
|
105
|
+
unless expr
|
106
|
+
error "'not' expression takes one argument"
|
107
|
+
exit 2
|
108
|
+
end
|
109
|
+
|
110
|
+
# explicit end tag is optional:
|
111
|
+
shift_end_tag("not", args)
|
112
|
+
expr
|
113
|
+
end
|
114
|
+
|
115
|
+
def make_two_expressions(args, type)
|
116
|
+
a1, a2 = make_expressions(args)
|
117
|
+
unless a1 && a2
|
118
|
+
error "'" + type + "' expression takes two arguments"
|
119
|
+
exit 2
|
120
|
+
end
|
121
|
+
|
122
|
+
shift_end_tag(type, args)
|
123
|
+
[ a1, a2 ]
|
124
|
+
end
|
125
|
+
|
126
|
+
def make_or_expression(args)
|
127
|
+
a1, a2 = make_two_expressions(args, "or")
|
128
|
+
InclusiveOrExpression.new(a1, a2)
|
129
|
+
end
|
130
|
+
|
131
|
+
def make_xor_expression(args)
|
132
|
+
a1, a2 = make_two_expressions(args, "xor")
|
133
|
+
ExclusiveOrExpression.new(a1, a2)
|
134
|
+
end
|
135
|
+
|
136
|
+
def numeric?(x)
|
137
|
+
x && (x.kind_of?(Fixnum) || (x.to_i == INFINITE_DISTANCE || x.num))
|
138
|
+
end
|
139
|
+
|
140
|
+
def make_and_distance(arg, args)
|
141
|
+
dist = nil
|
142
|
+
if arg == "-a"
|
143
|
+
dist = args.shift
|
144
|
+
elsif arg == "--and"
|
145
|
+
if args.size > 0 && numeric?(args[0])
|
146
|
+
dist = args.shift
|
147
|
+
else
|
148
|
+
dist = "0"
|
149
|
+
end
|
150
|
+
elsif arg.index(/^--and=(\-?\d+)?$/)
|
151
|
+
dist = $1
|
152
|
+
end
|
153
|
+
|
154
|
+
# check to ensure that this is numeric
|
155
|
+
if !numeric?(dist)
|
156
|
+
error "invalid distance for 'and' expression: '#{dist}'\n" +
|
157
|
+
" expecting an integer, or #{INFINITE_DISTANCE} for 'infinite'"
|
158
|
+
exit 2
|
159
|
+
end
|
160
|
+
|
161
|
+
if dist.to_i == INFINITE_DISTANCE
|
162
|
+
dist = 1.0 / 0.0 # infinity
|
163
|
+
else
|
164
|
+
dist = dist.to_i
|
165
|
+
end
|
166
|
+
|
167
|
+
dist
|
168
|
+
end
|
169
|
+
|
170
|
+
def make_and_expression(arg, args)
|
171
|
+
dist = make_and_distance(arg, args)
|
172
|
+
|
173
|
+
a1, a2 = make_two_expressions(args, "and")
|
174
|
+
AndExpression.new(dist, a1, a2)
|
175
|
+
end
|
176
|
+
|
177
|
+
def make_infix_expression(arg, args = [])
|
178
|
+
expr = nil
|
179
|
+
|
180
|
+
while arg
|
181
|
+
case arg
|
182
|
+
when '('
|
183
|
+
arg = args.shift
|
184
|
+
expr = make_infix_expression(arg, args)
|
185
|
+
when '--or', '-o'
|
186
|
+
arg = args.shift
|
187
|
+
rhs = make_infix_expression(arg, args)
|
188
|
+
expr = InclusiveOrExpression.new(expr, rhs)
|
189
|
+
when '--xor'
|
190
|
+
arg = args.shift
|
191
|
+
rhs = make_infix_expression(arg, args)
|
192
|
+
expr = ExclusiveOrExpression.new(expr, rhs)
|
193
|
+
when Regexp.new('^--and'), '-a'
|
194
|
+
dist = make_and_distance(arg, args)
|
195
|
+
arg = args.shift
|
196
|
+
rhs = make_infix_expression(arg, args)
|
197
|
+
expr = AndExpression.new(dist, expr, rhs)
|
198
|
+
when ')'
|
199
|
+
break
|
200
|
+
else
|
201
|
+
# blather "assuming the last argument #{arg} is a pattern"
|
202
|
+
expr = make_regular_expression(arg)
|
203
|
+
break
|
204
|
+
end
|
205
|
+
arg = args.shift
|
206
|
+
end
|
207
|
+
|
208
|
+
if !expr
|
209
|
+
puts "arg: #{arg}; args: #{args.inspect}"
|
210
|
+
error "No expression provided."
|
211
|
+
end
|
212
|
+
|
213
|
+
expr
|
214
|
+
end
|
215
|
+
|
216
|
+
def make_expression(args, warn_option = false)
|
217
|
+
arg = args[0]
|
218
|
+
|
219
|
+
if arg
|
220
|
+
case arg
|
221
|
+
when "--or", "-o"
|
222
|
+
args.shift
|
223
|
+
make_or_expression(args)
|
224
|
+
when "--xor"
|
225
|
+
args.shift
|
226
|
+
make_xor_expression(args)
|
227
|
+
when %r{^\-\-and}, %r{^\-a}
|
228
|
+
args.shift
|
229
|
+
make_and_expression(arg, args)
|
230
|
+
when '('
|
231
|
+
args.shift
|
232
|
+
make_infix_expression(arg, args)
|
233
|
+
else
|
234
|
+
if warn_option && arg.index(/^\-{1,2}\w/)
|
235
|
+
warn "option not understood: #{arg}"
|
236
|
+
exit 2
|
237
|
+
end
|
238
|
+
|
239
|
+
# blather "assuming the last argument #{arg} is a pattern"
|
240
|
+
args.shift
|
241
|
+
make_regular_expression(arg)
|
242
|
+
end
|
243
|
+
else
|
244
|
+
nil
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
end
|
data/lib/glark/glark.rb
ADDED
@@ -0,0 +1,297 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
#!ruby -w
|
3
|
+
# vim: set filetype=ruby : set sw=2
|
4
|
+
|
5
|
+
# An extended grep, with extended functionality including full regular
|
6
|
+
# expressions, contextual output, highlighting, detection and exclusion of
|
7
|
+
# nontext files, and complex matching criteria.
|
8
|
+
|
9
|
+
require 'English'
|
10
|
+
require 'pathname'
|
11
|
+
|
12
|
+
require 'rubygems'
|
13
|
+
require 'riel'
|
14
|
+
|
15
|
+
require 'glark/options'
|
16
|
+
require 'glark/input'
|
17
|
+
require 'glark/output'
|
18
|
+
require 'glark/expression'
|
19
|
+
require 'glark/exprfactory'
|
20
|
+
|
21
|
+
$stdout.sync = true # unbuffer
|
22
|
+
$stderr.sync = true # unbuffer
|
23
|
+
|
24
|
+
$PACKAGE = "glark"
|
25
|
+
$VERSION = "1.9.0"
|
26
|
+
|
27
|
+
# The main processor.
|
28
|
+
class Glark
|
29
|
+
include Loggable
|
30
|
+
|
31
|
+
attr_reader :exit_status
|
32
|
+
|
33
|
+
def initialize(func, files)
|
34
|
+
@opts = GlarkOptions.instance
|
35
|
+
@func = func
|
36
|
+
@searched_files = Array.new # files searched, so we don't cycle through links
|
37
|
+
|
38
|
+
@files = files
|
39
|
+
|
40
|
+
@show_file_names = (@opts.show_file_names ||
|
41
|
+
(@opts.show_file_names.nil? &&
|
42
|
+
(@opts.label ||
|
43
|
+
@files.size > 1 ||
|
44
|
+
(@files[0] != "-" && FileType.type(@files[0]) == FileType::DIRECTORY))))
|
45
|
+
|
46
|
+
@out_class = case @opts.output
|
47
|
+
when "grep"
|
48
|
+
GrepOutputFormat
|
49
|
+
when "ansi", "xterm", nil
|
50
|
+
GlarkOutputFormat
|
51
|
+
when "match"
|
52
|
+
error "output to match list is not yet supported"
|
53
|
+
GlarkMatchList
|
54
|
+
# exit 2
|
55
|
+
end
|
56
|
+
|
57
|
+
@count = @opts.count
|
58
|
+
@invert_match = @opts.invert_match
|
59
|
+
|
60
|
+
@after = @opts.after
|
61
|
+
@before = @opts.before
|
62
|
+
@output = @opts.output
|
63
|
+
|
64
|
+
# 0 == matches, 1 == no matches, 2 == error
|
65
|
+
@exit_status = @invert_match ? 0 : 1
|
66
|
+
|
67
|
+
@skip_methods = Array.new
|
68
|
+
|
69
|
+
if @opts.with_basename || @opts.without_basename
|
70
|
+
@skip_methods << Proc.new { |fn| skip?(File.basename(fn), @opts.with_basename, @opts.without_basename) }
|
71
|
+
end
|
72
|
+
|
73
|
+
if @opts.with_fullname || @opts.without_fullname
|
74
|
+
@skip_methods << Proc.new { |fn| skip?(fn, @opts.with_fullname, @opts.without_fullname) }
|
75
|
+
end
|
76
|
+
|
77
|
+
if @opts.size_limit
|
78
|
+
@skip_methods << Proc.new { |fn| File.size(fname) > @opts.size_limit }
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def search_file(input)
|
83
|
+
output = @out_class.new(input, @show_file_names)
|
84
|
+
input.output = output
|
85
|
+
|
86
|
+
input.count = 0 if @count
|
87
|
+
input.invert_match = true if @invert_match
|
88
|
+
|
89
|
+
@func.process(input)
|
90
|
+
|
91
|
+
if input.matched?
|
92
|
+
@exit_status = @invert_match ? 1 : 0
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def skip?(name, opts_with, opts_without)
|
97
|
+
inc = opts_with && !opts_with.match(name)
|
98
|
+
exc = opts_without && opts_without.match(name)
|
99
|
+
inc || exc
|
100
|
+
end
|
101
|
+
|
102
|
+
def skipped?(fname)
|
103
|
+
@skip_methods.detect { |meth| meth.call(fname) }
|
104
|
+
end
|
105
|
+
|
106
|
+
def search_text(fname)
|
107
|
+
if skipped?(fname)
|
108
|
+
log { "skipping file: #{fname}" }
|
109
|
+
else
|
110
|
+
log { "searching text" }
|
111
|
+
if false
|
112
|
+
# readlines doesn't work with $/ == nil, so we'll use gets instead.
|
113
|
+
# this has been fixed in the CVS version of Ruby (on 26 Dec 2003).
|
114
|
+
text = []
|
115
|
+
File.open(fname) do |f|
|
116
|
+
while ((line = f.gets) && line.length > 0)
|
117
|
+
text << line
|
118
|
+
end
|
119
|
+
end
|
120
|
+
log { "got text #{text.length}" }
|
121
|
+
end
|
122
|
+
log { "searching #{fname} for #{@func}" }
|
123
|
+
|
124
|
+
ifile_args = {
|
125
|
+
:after => @after,
|
126
|
+
:before => @before,
|
127
|
+
:output => @output
|
128
|
+
}
|
129
|
+
|
130
|
+
io = fname == "-" ? $stdin : File.new(fname)
|
131
|
+
|
132
|
+
input = InputFile.new(fname, io, ifile_args)
|
133
|
+
search_file(input)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def search_binary(fname)
|
138
|
+
if skipped?(fname)
|
139
|
+
log { "skipping file: #{fname}" }
|
140
|
+
else
|
141
|
+
log { "handling binary" }
|
142
|
+
|
143
|
+
case @opts.binary_files
|
144
|
+
when "without-match"
|
145
|
+
log { "skipping binary file #{fname}" }
|
146
|
+
|
147
|
+
when "binary"
|
148
|
+
log { "searching binary file #{fname} for #{@func}" }
|
149
|
+
f = File.new(fname)
|
150
|
+
f.binmode # for MSDOS/WinWhatever
|
151
|
+
bf = BinaryFile.new(fname, f)
|
152
|
+
search_file(bf)
|
153
|
+
|
154
|
+
when "text"
|
155
|
+
log { "processing binary file #{name} as text" }
|
156
|
+
search_text(fname)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def search_directory(fname)
|
162
|
+
log { "processing directory" }
|
163
|
+
case @opts.directory
|
164
|
+
when "read"
|
165
|
+
write "#{fname}: Is a directory"
|
166
|
+
when "recurse"
|
167
|
+
log { "recursing into directory #{fname}" }
|
168
|
+
begin
|
169
|
+
entries = Dir.entries(fname).reject { |x| x == "." || x == ".." }
|
170
|
+
entries.each do |e|
|
171
|
+
entname = fname + "/" + e
|
172
|
+
inode = File.exists?(entname) && File.stat(entname).ino
|
173
|
+
if inode && @searched_files.include?(inode)
|
174
|
+
Log.verbose && log("file already processed: #{entname}")
|
175
|
+
else
|
176
|
+
@searched_files << inode
|
177
|
+
search(entname)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
rescue Errno::EACCES => e
|
181
|
+
write "directory not readable: #{fname}"
|
182
|
+
end
|
183
|
+
when "skip"
|
184
|
+
log { "skipping directory #{fname}" }
|
185
|
+
else
|
186
|
+
log { "directory: #{@opts.directory}" }
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def search_unknown(fname)
|
191
|
+
warn "unknown file type: #{fname}"
|
192
|
+
end
|
193
|
+
|
194
|
+
def search_none(fname)
|
195
|
+
write "no such file: #{fname}"
|
196
|
+
end
|
197
|
+
|
198
|
+
def search_unreadable(fname)
|
199
|
+
log { "skipping unreadable: #{fname}" }
|
200
|
+
end
|
201
|
+
|
202
|
+
def search(name)
|
203
|
+
if @opts.exclude_matching
|
204
|
+
expr = @opts.expr
|
205
|
+
if expr.respond_to?(:re) && expr.re.match(name)
|
206
|
+
log { "skipping file #{name} with matching name" }
|
207
|
+
return
|
208
|
+
else
|
209
|
+
log { "not skipping file #{name}" }
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
if name == "-"
|
214
|
+
write "reading standard input..."
|
215
|
+
search_text("-")
|
216
|
+
else
|
217
|
+
type = FileType.type(name)
|
218
|
+
|
219
|
+
case type
|
220
|
+
when FileType::BINARY
|
221
|
+
search_binary(name)
|
222
|
+
when FileType::DIRECTORY
|
223
|
+
search_directory(name)
|
224
|
+
when FileType::NONE
|
225
|
+
search_none(name)
|
226
|
+
when FileType::TEXT
|
227
|
+
search_text(name)
|
228
|
+
when FileType::UNKNOWN
|
229
|
+
search_unknown(name)
|
230
|
+
when FileType::UNREADABLE
|
231
|
+
search_unreadable(name)
|
232
|
+
else
|
233
|
+
error "type unknown: file: #{name}; type: #{type}"
|
234
|
+
exit(-2)
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def end_processing
|
240
|
+
end
|
241
|
+
|
242
|
+
def self.create_options
|
243
|
+
GlarkOptions.instance
|
244
|
+
end
|
245
|
+
|
246
|
+
def self.main
|
247
|
+
begin
|
248
|
+
Log.set_widths(-15, -40, -40)
|
249
|
+
|
250
|
+
Log.log { "loading options" }
|
251
|
+
opts = self.create_options
|
252
|
+
|
253
|
+
opts.run(ARGV)
|
254
|
+
Log.log { "done loading options" }
|
255
|
+
|
256
|
+
# To get rid of the annoying stack trace on ctrl-C:
|
257
|
+
trap("INT") { abort }
|
258
|
+
|
259
|
+
if opts.explain
|
260
|
+
puts opts.expr.explain
|
261
|
+
end
|
262
|
+
|
263
|
+
files = if ARGV.size > 0 then
|
264
|
+
if opts.split_as_path
|
265
|
+
ARGV.collect { |f| f.split(File::PATH_SEPARATOR) }.flatten
|
266
|
+
else
|
267
|
+
ARGV
|
268
|
+
end
|
269
|
+
else
|
270
|
+
[ '-' ]
|
271
|
+
end
|
272
|
+
|
273
|
+
glark = self.new(opts.expr, files)
|
274
|
+
|
275
|
+
files.each do |f|
|
276
|
+
glark.search(f)
|
277
|
+
end
|
278
|
+
|
279
|
+
glark.end_processing
|
280
|
+
|
281
|
+
exit glark.exit_status
|
282
|
+
rescue => e
|
283
|
+
# show the message, and the stack trace only if verbose:
|
284
|
+
$stderr.puts "error: #{e}"
|
285
|
+
if opts.verbose || true
|
286
|
+
$stderr.puts e.backtrace
|
287
|
+
raise
|
288
|
+
else
|
289
|
+
exit 2
|
290
|
+
end
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
if __FILE__ == $0
|
296
|
+
Glark.main
|
297
|
+
end
|