hadoop-find 0.0.1-java → 0.1.0-java
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +5 -0
- data/README +5 -3
- data/bin/hfind.rb +55 -16
- metadata +3 -3
data/CHANGELOG
CHANGED
data/README
CHANGED
@@ -1,12 +1,14 @@
|
|
1
1
|
hfind
|
2
2
|
|
3
|
-
A file listing
|
3
|
+
A file listing utility for HDFS filesystems similar to unix find(1).
|
4
4
|
|
5
5
|
Requires jruby 1.6+.
|
6
6
|
|
7
7
|
# installation
|
8
8
|
|
9
|
-
|
9
|
+
jgem install --no-wrapper hadoop-find
|
10
|
+
|
11
|
+
Or simply copy hfind.rb and hfind into your path.
|
10
12
|
|
11
13
|
# usage
|
12
14
|
|
@@ -17,7 +19,7 @@ usage: hfind [options] path
|
|
17
19
|
-M, --mtime # files modified before (-x) or after (+x) days ago
|
18
20
|
-s, --size # file size > (+x), < (-x), or == (x)
|
19
21
|
-r, --repl # replication factor > (+x), < (-x), or == (x)
|
20
|
-
-U, --under # under-replicated files
|
22
|
+
-U, --under # show under-replicated files
|
21
23
|
-t, --type # show type (f)ile or (d)irectory
|
22
24
|
-l, --ls # show full listing detail
|
23
25
|
-h, --human # show human readable file sizes
|
data/bin/hfind.rb
CHANGED
@@ -55,7 +55,7 @@ class HadoopFSFinder
|
|
55
55
|
end
|
56
56
|
filter_size = s.to_i.abs * multi
|
57
57
|
|
58
|
-
|
58
|
+
size.send(cmp, filter_size)
|
59
59
|
end
|
60
60
|
|
61
61
|
# filter by replication count using unix find -size numbering scheme
|
@@ -73,7 +73,7 @@ class HadoopFSFinder
|
|
73
73
|
|
74
74
|
filter_repl = r.to_i.abs
|
75
75
|
|
76
|
-
|
76
|
+
repl.send(cmp, filter_repl)
|
77
77
|
end
|
78
78
|
|
79
79
|
def filter_mtime mtime
|
@@ -127,7 +127,7 @@ class HadoopFSFinder
|
|
127
127
|
filter_mtime = Time.now.to_i - m.abs.to_i
|
128
128
|
|
129
129
|
#puts "#{mtime} vs #{filter_mtime} #{m}"
|
130
|
-
|
130
|
+
mtime.send(cmp, filter_mtime)
|
131
131
|
end
|
132
132
|
|
133
133
|
# print out one line of info for a filestatus object
|
@@ -151,6 +151,8 @@ class HadoopFSFinder
|
|
151
151
|
end
|
152
152
|
path = "#{path}/" if f.dir?
|
153
153
|
|
154
|
+
return if not filter_path path
|
155
|
+
|
154
156
|
if not @opts[:ls]
|
155
157
|
puts path
|
156
158
|
return
|
@@ -183,11 +185,40 @@ class HadoopFSFinder
|
|
183
185
|
[type, perm, repl, f.owner, f.group, size, mtime, path]
|
184
186
|
end
|
185
187
|
|
188
|
+
# given a path string, return false if it doesn't match the provided regexp
|
189
|
+
def filter_path path
|
190
|
+
return true if not @opts[:name_re]
|
191
|
+
|
192
|
+
return false if path !~ /#{@opts[:name_re]}/
|
193
|
+
|
194
|
+
true
|
195
|
+
end
|
196
|
+
|
197
|
+
# prune_path
|
198
|
+
# - given a FileStatus, return true if a file is to be pruned (this
|
199
|
+
# is the opposite behavior of filter_*)
|
200
|
+
# - prune_path serves a different purpose than filter_path in that
|
201
|
+
# it runs during the walk stage rather than the display stage
|
202
|
+
# - that means directories that fail the test will NOT be followed
|
203
|
+
# and no files underneath will be processed
|
204
|
+
# - for now, it can only prune out hidden path names
|
205
|
+
def prune_path f
|
206
|
+
return false if not @opts[:no_hidden]
|
207
|
+
|
208
|
+
path = f.path.to_s.sub %r|\A.*/|, ''
|
209
|
+
hide = f.path.to_uri.scheme == 'hdfs' ? '_' : '\.'
|
210
|
+
return true if path =~ /\A#{hide}/
|
211
|
+
|
212
|
+
false
|
213
|
+
end
|
214
|
+
|
186
215
|
def find
|
187
216
|
@fs.glob_status(@path).each {|s| walk(s) {|f| display f}}
|
188
217
|
end
|
189
218
|
|
190
219
|
def walk fstat
|
220
|
+
return if prune_path fstat
|
221
|
+
|
191
222
|
yield fstat
|
192
223
|
|
193
224
|
return if not fstat.dir?
|
@@ -199,18 +230,20 @@ end
|
|
199
230
|
def usage
|
200
231
|
puts <<-EOF
|
201
232
|
usage: hfind [options] path
|
202
|
-
-H, --help
|
203
233
|
-a, --after # files modified after ISO date
|
204
234
|
-b, --before # files modified before ISO date
|
205
235
|
-m, --mmin # files modified before (-x) or after (+x) minutes ago
|
206
236
|
-M, --mtime # files modified before (-x) or after (+x) days ago
|
207
237
|
-s, --size # file size > (+x), < (-x), or == (x)
|
238
|
+
-n, --name # display paths matching a regular expression
|
208
239
|
-r, --repl # replication factor > (+x), < (-x), or == (x)
|
209
240
|
-U, --under # show under-replicated files
|
210
241
|
-t, --type # show type (f)ile or (d)irectory
|
211
242
|
-l, --ls # show full listing detail
|
212
243
|
-h, --human # show human readable file sizes
|
244
|
+
-D, --no-hidden # do not show hidden files
|
213
245
|
-u, --uri # show full uri for path
|
246
|
+
-H, --help
|
214
247
|
EOF
|
215
248
|
end
|
216
249
|
|
@@ -219,18 +252,20 @@ end
|
|
219
252
|
opts = {}
|
220
253
|
|
221
254
|
gopts = GetoptLong.new(
|
222
|
-
[ '--size',
|
223
|
-
[ '--repl',
|
224
|
-
[ '--after',
|
225
|
-
[ '--before',
|
226
|
-
[ '--mmin',
|
227
|
-
[ '--mtime',
|
228
|
-
[ '--type',
|
229
|
-
[ '--
|
230
|
-
[ '--
|
231
|
-
[ '--
|
232
|
-
[ '--
|
233
|
-
[ '--
|
255
|
+
[ '--size', '-s', GetoptLong::REQUIRED_ARGUMENT ],
|
256
|
+
[ '--repl', '-r', GetoptLong::REQUIRED_ARGUMENT ],
|
257
|
+
[ '--after', '-a', GetoptLong::REQUIRED_ARGUMENT ],
|
258
|
+
[ '--before', '-b', GetoptLong::REQUIRED_ARGUMENT ],
|
259
|
+
[ '--mmin', '-m', GetoptLong::REQUIRED_ARGUMENT ],
|
260
|
+
[ '--mtime', '-M', GetoptLong::REQUIRED_ARGUMENT ],
|
261
|
+
[ '--type', '-t', GetoptLong::REQUIRED_ARGUMENT ],
|
262
|
+
[ '--name', '-n', GetoptLong::REQUIRED_ARGUMENT ],
|
263
|
+
[ '--ls', '-l', GetoptLong::NO_ARGUMENT ],
|
264
|
+
[ '--uri', '-u', GetoptLong::NO_ARGUMENT ],
|
265
|
+
[ '--under', '-U', GetoptLong::NO_ARGUMENT ],
|
266
|
+
[ '--human', '-h', GetoptLong::NO_ARGUMENT ],
|
267
|
+
[ '--no-hidden', '-D', GetoptLong::NO_ARGUMENT ],
|
268
|
+
[ '--help', '-H', GetoptLong::NO_ARGUMENT ],
|
234
269
|
)
|
235
270
|
|
236
271
|
gopts.each do |opt, arg|
|
@@ -249,6 +284,8 @@ gopts.each do |opt, arg|
|
|
249
284
|
opts[:repl] = arg
|
250
285
|
when '--type'
|
251
286
|
opts[:type] = arg
|
287
|
+
when '--name'
|
288
|
+
opts[:name_re] = arg
|
252
289
|
when '--human'
|
253
290
|
opts[:human] = true
|
254
291
|
when '--ls'
|
@@ -257,6 +294,8 @@ gopts.each do |opt, arg|
|
|
257
294
|
opts[:under] = true
|
258
295
|
when '--uri'
|
259
296
|
opts[:uri] = true
|
297
|
+
when '--no-hidden'
|
298
|
+
opts[:no_hidden] = true
|
260
299
|
else
|
261
300
|
usage
|
262
301
|
exit 1
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
- 0
|
8
7
|
- 1
|
9
|
-
|
8
|
+
- 0
|
9
|
+
version: 0.1.0
|
10
10
|
platform: java
|
11
11
|
authors:
|
12
12
|
- Frank Fejes
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-07-
|
17
|
+
date: 2011-07-13 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|