hadoop-find 0.0.1-java → 0.1.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +5 -0
- data/README +5 -3
- data/bin/hfind.rb +55 -16
- metadata +3 -3
data/CHANGELOG
CHANGED
data/README
CHANGED
@@ -1,12 +1,14 @@
|
|
1
1
|
hfind
|
2
2
|
|
3
|
-
A file listing
|
3
|
+
A file listing utility for HDFS filesystems similar to unix find(1).
|
4
4
|
|
5
5
|
Requires jruby 1.6+.
|
6
6
|
|
7
7
|
# installation
|
8
8
|
|
9
|
-
|
9
|
+
jgem install --no-wrapper hadoop-find
|
10
|
+
|
11
|
+
Or simply copy hfind.rb and hfind into your path.
|
10
12
|
|
11
13
|
# usage
|
12
14
|
|
@@ -17,7 +19,7 @@ usage: hfind [options] path
|
|
17
19
|
-M, --mtime # files modified before (-x) or after (+x) days ago
|
18
20
|
-s, --size # file size > (+x), < (-x), or == (x)
|
19
21
|
-r, --repl # replication factor > (+x), < (-x), or == (x)
|
20
|
-
-U, --under # under-replicated files
|
22
|
+
-U, --under # show under-replicated files
|
21
23
|
-t, --type # show type (f)ile or (d)irectory
|
22
24
|
-l, --ls # show full listing detail
|
23
25
|
-h, --human # show human readable file sizes
|
data/bin/hfind.rb
CHANGED
@@ -55,7 +55,7 @@ class HadoopFSFinder
|
|
55
55
|
end
|
56
56
|
filter_size = s.to_i.abs * multi
|
57
57
|
|
58
|
-
|
58
|
+
size.send(cmp, filter_size)
|
59
59
|
end
|
60
60
|
|
61
61
|
# filter by replication count using unix find -size numbering scheme
|
@@ -73,7 +73,7 @@ class HadoopFSFinder
|
|
73
73
|
|
74
74
|
filter_repl = r.to_i.abs
|
75
75
|
|
76
|
-
|
76
|
+
repl.send(cmp, filter_repl)
|
77
77
|
end
|
78
78
|
|
79
79
|
def filter_mtime mtime
|
@@ -127,7 +127,7 @@ class HadoopFSFinder
|
|
127
127
|
filter_mtime = Time.now.to_i - m.abs.to_i
|
128
128
|
|
129
129
|
#puts "#{mtime} vs #{filter_mtime} #{m}"
|
130
|
-
|
130
|
+
mtime.send(cmp, filter_mtime)
|
131
131
|
end
|
132
132
|
|
133
133
|
# print out one line of info for a filestatus object
|
@@ -151,6 +151,8 @@ class HadoopFSFinder
|
|
151
151
|
end
|
152
152
|
path = "#{path}/" if f.dir?
|
153
153
|
|
154
|
+
return if not filter_path path
|
155
|
+
|
154
156
|
if not @opts[:ls]
|
155
157
|
puts path
|
156
158
|
return
|
@@ -183,11 +185,40 @@ class HadoopFSFinder
|
|
183
185
|
[type, perm, repl, f.owner, f.group, size, mtime, path]
|
184
186
|
end
|
185
187
|
|
188
|
+
# given a path string, return false if it doesn't match the provided regexp
|
189
|
+
def filter_path path
|
190
|
+
return true if not @opts[:name_re]
|
191
|
+
|
192
|
+
return false if path !~ /#{@opts[:name_re]}/
|
193
|
+
|
194
|
+
true
|
195
|
+
end
|
196
|
+
|
197
|
+
# prune_path
|
198
|
+
# - given a FileStatus, return true if a file is to be pruned (this
|
199
|
+
# is the opposite behavior of filter_*)
|
200
|
+
# - prune_path serves a different purpose than filter_path in that
|
201
|
+
# it runs during the walk stage rather than the display stage
|
202
|
+
# - that means directories that fail the test will NOT be followed
|
203
|
+
# and no files underneath will be processed
|
204
|
+
# - for now, it can only prune out hidden path names
|
205
|
+
def prune_path f
|
206
|
+
return false if not @opts[:no_hidden]
|
207
|
+
|
208
|
+
path = f.path.to_s.sub %r|\A.*/|, ''
|
209
|
+
hide = f.path.to_uri.scheme == 'hdfs' ? '_' : '\.'
|
210
|
+
return true if path =~ /\A#{hide}/
|
211
|
+
|
212
|
+
false
|
213
|
+
end
|
214
|
+
|
186
215
|
def find
|
187
216
|
@fs.glob_status(@path).each {|s| walk(s) {|f| display f}}
|
188
217
|
end
|
189
218
|
|
190
219
|
def walk fstat
|
220
|
+
return if prune_path fstat
|
221
|
+
|
191
222
|
yield fstat
|
192
223
|
|
193
224
|
return if not fstat.dir?
|
@@ -199,18 +230,20 @@ end
|
|
199
230
|
def usage
|
200
231
|
puts <<-EOF
|
201
232
|
usage: hfind [options] path
|
202
|
-
-H, --help
|
203
233
|
-a, --after # files modified after ISO date
|
204
234
|
-b, --before # files modified before ISO date
|
205
235
|
-m, --mmin # files modified before (-x) or after (+x) minutes ago
|
206
236
|
-M, --mtime # files modified before (-x) or after (+x) days ago
|
207
237
|
-s, --size # file size > (+x), < (-x), or == (x)
|
238
|
+
-n, --name # display paths matching a regular expression
|
208
239
|
-r, --repl # replication factor > (+x), < (-x), or == (x)
|
209
240
|
-U, --under # show under-replicated files
|
210
241
|
-t, --type # show type (f)ile or (d)irectory
|
211
242
|
-l, --ls # show full listing detail
|
212
243
|
-h, --human # show human readable file sizes
|
244
|
+
-D, --no-hidden # do not show hidden files
|
213
245
|
-u, --uri # show full uri for path
|
246
|
+
-H, --help
|
214
247
|
EOF
|
215
248
|
end
|
216
249
|
|
@@ -219,18 +252,20 @@ end
|
|
219
252
|
opts = {}
|
220
253
|
|
221
254
|
gopts = GetoptLong.new(
|
222
|
-
[ '--size',
|
223
|
-
[ '--repl',
|
224
|
-
[ '--after',
|
225
|
-
[ '--before',
|
226
|
-
[ '--mmin',
|
227
|
-
[ '--mtime',
|
228
|
-
[ '--type',
|
229
|
-
[ '--
|
230
|
-
[ '--
|
231
|
-
[ '--
|
232
|
-
[ '--
|
233
|
-
[ '--
|
255
|
+
[ '--size', '-s', GetoptLong::REQUIRED_ARGUMENT ],
|
256
|
+
[ '--repl', '-r', GetoptLong::REQUIRED_ARGUMENT ],
|
257
|
+
[ '--after', '-a', GetoptLong::REQUIRED_ARGUMENT ],
|
258
|
+
[ '--before', '-b', GetoptLong::REQUIRED_ARGUMENT ],
|
259
|
+
[ '--mmin', '-m', GetoptLong::REQUIRED_ARGUMENT ],
|
260
|
+
[ '--mtime', '-M', GetoptLong::REQUIRED_ARGUMENT ],
|
261
|
+
[ '--type', '-t', GetoptLong::REQUIRED_ARGUMENT ],
|
262
|
+
[ '--name', '-n', GetoptLong::REQUIRED_ARGUMENT ],
|
263
|
+
[ '--ls', '-l', GetoptLong::NO_ARGUMENT ],
|
264
|
+
[ '--uri', '-u', GetoptLong::NO_ARGUMENT ],
|
265
|
+
[ '--under', '-U', GetoptLong::NO_ARGUMENT ],
|
266
|
+
[ '--human', '-h', GetoptLong::NO_ARGUMENT ],
|
267
|
+
[ '--no-hidden', '-D', GetoptLong::NO_ARGUMENT ],
|
268
|
+
[ '--help', '-H', GetoptLong::NO_ARGUMENT ],
|
234
269
|
)
|
235
270
|
|
236
271
|
gopts.each do |opt, arg|
|
@@ -249,6 +284,8 @@ gopts.each do |opt, arg|
|
|
249
284
|
opts[:repl] = arg
|
250
285
|
when '--type'
|
251
286
|
opts[:type] = arg
|
287
|
+
when '--name'
|
288
|
+
opts[:name_re] = arg
|
252
289
|
when '--human'
|
253
290
|
opts[:human] = true
|
254
291
|
when '--ls'
|
@@ -257,6 +294,8 @@ gopts.each do |opt, arg|
|
|
257
294
|
opts[:under] = true
|
258
295
|
when '--uri'
|
259
296
|
opts[:uri] = true
|
297
|
+
when '--no-hidden'
|
298
|
+
opts[:no_hidden] = true
|
260
299
|
else
|
261
300
|
usage
|
262
301
|
exit 1
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
- 0
|
8
7
|
- 1
|
9
|
-
|
8
|
+
- 0
|
9
|
+
version: 0.1.0
|
10
10
|
platform: java
|
11
11
|
authors:
|
12
12
|
- Frank Fejes
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-07-
|
17
|
+
date: 2011-07-13 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|