hadoop-find 0.0.1-java → 0.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/CHANGELOG +5 -0
  2. data/README +5 -3
  3. data/bin/hfind.rb +55 -16
  4. metadata +3 -3
data/CHANGELOG CHANGED
@@ -1,2 +1,7 @@
1
+ * 2011-07-13 - fsf
2
+ - new: added --no-hidden (-D) option to not display or follow hidden
3
+ files and directories (_ for hdfs, . for unix)
4
+ - new: added --name (-n) regular expression filter for path matching
5
+
1
6
  * 2011-07-02 - fsf
2
7
  - initial import
data/README CHANGED
@@ -1,12 +1,14 @@
1
1
  hfind
2
2
 
3
- A file listing command for HDFS filesystems similar to unix find(1).
3
+ A file listing utility for HDFS filesystems similar to unix find(1).
4
4
 
5
5
  Requires jruby 1.6+.
6
6
 
7
7
  # installation
8
8
 
9
- Simply copy hfind.rb and hfind into your path.
9
+ jgem install --no-wrapper hadoop-find
10
+
11
+ Or simply copy hfind.rb and hfind into your path.
10
12
 
11
13
  # usage
12
14
 
@@ -17,7 +19,7 @@ usage: hfind [options] path
17
19
  -M, --mtime # files modified before (-x) or after (+x) days ago
18
20
  -s, --size # file size > (+x), < (-x), or == (x)
19
21
  -r, --repl # replication factor > (+x), < (-x), or == (x)
20
- -U, --under # under-replicated files
22
+ -U, --under # show under-replicated files
21
23
  -t, --type # show type (f)ile or (d)irectory
22
24
  -l, --ls # show full listing detail
23
25
  -h, --human # show human readable file sizes
@@ -55,7 +55,7 @@ class HadoopFSFinder
55
55
  end
56
56
  filter_size = s.to_i.abs * multi
57
57
 
58
- return size.send(cmp, filter_size)
58
+ size.send(cmp, filter_size)
59
59
  end
60
60
 
61
61
  # filter by replication count using unix find -size numbering scheme
@@ -73,7 +73,7 @@ class HadoopFSFinder
73
73
 
74
74
  filter_repl = r.to_i.abs
75
75
 
76
- return repl.send(cmp, filter_repl)
76
+ repl.send(cmp, filter_repl)
77
77
  end
78
78
 
79
79
  def filter_mtime mtime
@@ -127,7 +127,7 @@ class HadoopFSFinder
127
127
  filter_mtime = Time.now.to_i - m.abs.to_i
128
128
 
129
129
  #puts "#{mtime} vs #{filter_mtime} #{m}"
130
- return mtime.send(cmp, filter_mtime)
130
+ mtime.send(cmp, filter_mtime)
131
131
  end
132
132
 
133
133
  # print out one line of info for a filestatus object
@@ -151,6 +151,8 @@ class HadoopFSFinder
151
151
  end
152
152
  path = "#{path}/" if f.dir?
153
153
 
154
+ return if not filter_path path
155
+
154
156
  if not @opts[:ls]
155
157
  puts path
156
158
  return
@@ -183,11 +185,40 @@ class HadoopFSFinder
183
185
  [type, perm, repl, f.owner, f.group, size, mtime, path]
184
186
  end
185
187
 
188
+ # given a path string, return false if it doesn't match the provided regexp
189
+ def filter_path path
190
+ return true if not @opts[:name_re]
191
+
192
+ return false if path !~ /#{@opts[:name_re]}/
193
+
194
+ true
195
+ end
196
+
197
+ # prune_path
198
+ # - given a FileStatus, return true if a file is to be pruned (this
199
+ # is the opposite behavior of filter_*)
200
+ # - prune_path serves a different purpose than filter_path in that
201
+ # it runs during the walk stage rather than the display stage
202
+ # - that means directories that fail the test will NOT be followed
203
+ # and no files underneath will be processed
204
+ # - for now, it can only prune out hidden path names
205
+ def prune_path f
206
+ return false if not @opts[:no_hidden]
207
+
208
+ path = f.path.to_s.sub %r|\A.*/|, ''
209
+ hide = f.path.to_uri.scheme == 'hdfs' ? '_' : '\.'
210
+ return true if path =~ /\A#{hide}/
211
+
212
+ false
213
+ end
214
+
186
215
  def find
187
216
  @fs.glob_status(@path).each {|s| walk(s) {|f| display f}}
188
217
  end
189
218
 
190
219
  def walk fstat
220
+ return if prune_path fstat
221
+
191
222
  yield fstat
192
223
 
193
224
  return if not fstat.dir?
@@ -199,18 +230,20 @@ end
199
230
  def usage
200
231
  puts <<-EOF
201
232
  usage: hfind [options] path
202
- -H, --help
203
233
  -a, --after # files modified after ISO date
204
234
  -b, --before # files modified before ISO date
205
235
  -m, --mmin # files modified before (-x) or after (+x) minutes ago
206
236
  -M, --mtime # files modified before (-x) or after (+x) days ago
207
237
  -s, --size # file size > (+x), < (-x), or == (x)
238
+ -n, --name # display paths matching a regular expression
208
239
  -r, --repl # replication factor > (+x), < (-x), or == (x)
209
240
  -U, --under # show under-replicated files
210
241
  -t, --type # show type (f)ile or (d)irectory
211
242
  -l, --ls # show full listing detail
212
243
  -h, --human # show human readable file sizes
244
+ -D, --no-hidden # do not show hidden files
213
245
  -u, --uri # show full uri for path
246
+ -H, --help
214
247
  EOF
215
248
  end
216
249
 
@@ -219,18 +252,20 @@ end
219
252
  opts = {}
220
253
 
221
254
  gopts = GetoptLong.new(
222
- [ '--size', '-s', GetoptLong::REQUIRED_ARGUMENT ],
223
- [ '--repl', '-r', GetoptLong::REQUIRED_ARGUMENT ],
224
- [ '--after', '-a', GetoptLong::REQUIRED_ARGUMENT ],
225
- [ '--before', '-b', GetoptLong::REQUIRED_ARGUMENT ],
226
- [ '--mmin', '-m', GetoptLong::REQUIRED_ARGUMENT ],
227
- [ '--mtime', '-M', GetoptLong::REQUIRED_ARGUMENT ],
228
- [ '--type', '-t', GetoptLong::REQUIRED_ARGUMENT ],
229
- [ '--ls', '-l', GetoptLong::NO_ARGUMENT ],
230
- [ '--uri', '-u', GetoptLong::NO_ARGUMENT ],
231
- [ '--under', '-U', GetoptLong::NO_ARGUMENT ],
232
- [ '--human', '-h', GetoptLong::NO_ARGUMENT ],
233
- [ '--help', '-H', GetoptLong::NO_ARGUMENT ],
255
+ [ '--size', '-s', GetoptLong::REQUIRED_ARGUMENT ],
256
+ [ '--repl', '-r', GetoptLong::REQUIRED_ARGUMENT ],
257
+ [ '--after', '-a', GetoptLong::REQUIRED_ARGUMENT ],
258
+ [ '--before', '-b', GetoptLong::REQUIRED_ARGUMENT ],
259
+ [ '--mmin', '-m', GetoptLong::REQUIRED_ARGUMENT ],
260
+ [ '--mtime', '-M', GetoptLong::REQUIRED_ARGUMENT ],
261
+ [ '--type', '-t', GetoptLong::REQUIRED_ARGUMENT ],
262
+ [ '--name', '-n', GetoptLong::REQUIRED_ARGUMENT ],
263
+ [ '--ls', '-l', GetoptLong::NO_ARGUMENT ],
264
+ [ '--uri', '-u', GetoptLong::NO_ARGUMENT ],
265
+ [ '--under', '-U', GetoptLong::NO_ARGUMENT ],
266
+ [ '--human', '-h', GetoptLong::NO_ARGUMENT ],
267
+ [ '--no-hidden', '-D', GetoptLong::NO_ARGUMENT ],
268
+ [ '--help', '-H', GetoptLong::NO_ARGUMENT ],
234
269
  )
235
270
 
236
271
  gopts.each do |opt, arg|
@@ -249,6 +284,8 @@ gopts.each do |opt, arg|
249
284
  opts[:repl] = arg
250
285
  when '--type'
251
286
  opts[:type] = arg
287
+ when '--name'
288
+ opts[:name_re] = arg
252
289
  when '--human'
253
290
  opts[:human] = true
254
291
  when '--ls'
@@ -257,6 +294,8 @@ gopts.each do |opt, arg|
257
294
  opts[:under] = true
258
295
  when '--uri'
259
296
  opts[:uri] = true
297
+ when '--no-hidden'
298
+ opts[:no_hidden] = true
260
299
  else
261
300
  usage
262
301
  exit 1
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 0
8
7
  - 1
9
- version: 0.0.1
8
+ - 0
9
+ version: 0.1.0
10
10
  platform: java
11
11
  authors:
12
12
  - Frank Fejes
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-07-02 00:00:00 -05:00
17
+ date: 2011-07-13 00:00:00 -05:00
18
18
  default_executable:
19
19
  dependencies: []
20
20