hadoop-find 0.0.1-java → 0.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/CHANGELOG +5 -0
  2. data/README +5 -3
  3. data/bin/hfind.rb +55 -16
  4. metadata +3 -3
data/CHANGELOG CHANGED
@@ -1,2 +1,7 @@
1
+ * 2011-07-13 - fsf
2
+ - new: added --no-hidden (-D) option to not display or follow hidden
3
+ files and directories (_ for hdfs, . for unix)
4
+ - new: added --name (-n) regular expression filter for path matching
5
+
1
6
  * 2011-07-02 - fsf
2
7
  - initial import
data/README CHANGED
@@ -1,12 +1,14 @@
1
1
  hfind
2
2
 
3
- A file listing command for HDFS filesystems similar to unix find(1).
3
+ A file listing utility for HDFS filesystems similar to unix find(1).
4
4
 
5
5
  Requires jruby 1.6+.
6
6
 
7
7
  # installation
8
8
 
9
- Simply copy hfind.rb and hfind into your path.
9
+ jgem install --no-wrapper hadoop-find
10
+
11
+ Or simply copy hfind.rb and hfind into your path.
10
12
 
11
13
  # usage
12
14
 
@@ -17,7 +19,7 @@ usage: hfind [options] path
17
19
  -M, --mtime # files modified before (-x) or after (+x) days ago
18
20
  -s, --size # file size > (+x), < (-x), or == (x)
19
21
  -r, --repl # replication factor > (+x), < (-x), or == (x)
20
- -U, --under # under-replicated files
22
+ -U, --under # show under-replicated files
21
23
  -t, --type # show type (f)ile or (d)irectory
22
24
  -l, --ls # show full listing detail
23
25
  -h, --human # show human readable file sizes
@@ -55,7 +55,7 @@ class HadoopFSFinder
55
55
  end
56
56
  filter_size = s.to_i.abs * multi
57
57
 
58
- return size.send(cmp, filter_size)
58
+ size.send(cmp, filter_size)
59
59
  end
60
60
 
61
61
  # filter by replication count using unix find -size numbering scheme
@@ -73,7 +73,7 @@ class HadoopFSFinder
73
73
 
74
74
  filter_repl = r.to_i.abs
75
75
 
76
- return repl.send(cmp, filter_repl)
76
+ repl.send(cmp, filter_repl)
77
77
  end
78
78
 
79
79
  def filter_mtime mtime
@@ -127,7 +127,7 @@ class HadoopFSFinder
127
127
  filter_mtime = Time.now.to_i - m.abs.to_i
128
128
 
129
129
  #puts "#{mtime} vs #{filter_mtime} #{m}"
130
- return mtime.send(cmp, filter_mtime)
130
+ mtime.send(cmp, filter_mtime)
131
131
  end
132
132
 
133
133
  # print out one line of info for a filestatus object
@@ -151,6 +151,8 @@ class HadoopFSFinder
151
151
  end
152
152
  path = "#{path}/" if f.dir?
153
153
 
154
+ return if not filter_path path
155
+
154
156
  if not @opts[:ls]
155
157
  puts path
156
158
  return
@@ -183,11 +185,40 @@ class HadoopFSFinder
183
185
  [type, perm, repl, f.owner, f.group, size, mtime, path]
184
186
  end
185
187
 
188
+ # given a path string, return false if it doesn't match the provided regexp
189
+ def filter_path path
190
+ return true if not @opts[:name_re]
191
+
192
+ return false if path !~ /#{@opts[:name_re]}/
193
+
194
+ true
195
+ end
196
+
197
+ # prune_path
198
+ # - given a FileStatus, return true if a file is to be pruned (this
199
+ # is the opposite behavior of filter_*)
200
+ # - prune_path serves a different purpose than filter_path in that
201
+ # it runs during the walk stage rather than the display stage
202
+ # - that means directories that fail the test will NOT be followed
203
+ # and no files underneath will be processed
204
+ # - for now, it can only prune out hidden path names
205
+ def prune_path f
206
+ return false if not @opts[:no_hidden]
207
+
208
+ path = f.path.to_s.sub %r|\A.*/|, ''
209
+ hide = f.path.to_uri.scheme == 'hdfs' ? '_' : '\.'
210
+ return true if path =~ /\A#{hide}/
211
+
212
+ false
213
+ end
214
+
186
215
  def find
187
216
  @fs.glob_status(@path).each {|s| walk(s) {|f| display f}}
188
217
  end
189
218
 
190
219
  def walk fstat
220
+ return if prune_path fstat
221
+
191
222
  yield fstat
192
223
 
193
224
  return if not fstat.dir?
@@ -199,18 +230,20 @@ end
199
230
  def usage
200
231
  puts <<-EOF
201
232
  usage: hfind [options] path
202
- -H, --help
203
233
  -a, --after # files modified after ISO date
204
234
  -b, --before # files modified before ISO date
205
235
  -m, --mmin # files modified before (-x) or after (+x) minutes ago
206
236
  -M, --mtime # files modified before (-x) or after (+x) days ago
207
237
  -s, --size # file size > (+x), < (-x), or == (x)
238
+ -n, --name # display paths matching a regular expression
208
239
  -r, --repl # replication factor > (+x), < (-x), or == (x)
209
240
  -U, --under # show under-replicated files
210
241
  -t, --type # show type (f)ile or (d)irectory
211
242
  -l, --ls # show full listing detail
212
243
  -h, --human # show human readable file sizes
244
+ -D, --no-hidden # do not show hidden files
213
245
  -u, --uri # show full uri for path
246
+ -H, --help
214
247
  EOF
215
248
  end
216
249
 
@@ -219,18 +252,20 @@ end
219
252
  opts = {}
220
253
 
221
254
  gopts = GetoptLong.new(
222
- [ '--size', '-s', GetoptLong::REQUIRED_ARGUMENT ],
223
- [ '--repl', '-r', GetoptLong::REQUIRED_ARGUMENT ],
224
- [ '--after', '-a', GetoptLong::REQUIRED_ARGUMENT ],
225
- [ '--before', '-b', GetoptLong::REQUIRED_ARGUMENT ],
226
- [ '--mmin', '-m', GetoptLong::REQUIRED_ARGUMENT ],
227
- [ '--mtime', '-M', GetoptLong::REQUIRED_ARGUMENT ],
228
- [ '--type', '-t', GetoptLong::REQUIRED_ARGUMENT ],
229
- [ '--ls', '-l', GetoptLong::NO_ARGUMENT ],
230
- [ '--uri', '-u', GetoptLong::NO_ARGUMENT ],
231
- [ '--under', '-U', GetoptLong::NO_ARGUMENT ],
232
- [ '--human', '-h', GetoptLong::NO_ARGUMENT ],
233
- [ '--help', '-H', GetoptLong::NO_ARGUMENT ],
255
+ [ '--size', '-s', GetoptLong::REQUIRED_ARGUMENT ],
256
+ [ '--repl', '-r', GetoptLong::REQUIRED_ARGUMENT ],
257
+ [ '--after', '-a', GetoptLong::REQUIRED_ARGUMENT ],
258
+ [ '--before', '-b', GetoptLong::REQUIRED_ARGUMENT ],
259
+ [ '--mmin', '-m', GetoptLong::REQUIRED_ARGUMENT ],
260
+ [ '--mtime', '-M', GetoptLong::REQUIRED_ARGUMENT ],
261
+ [ '--type', '-t', GetoptLong::REQUIRED_ARGUMENT ],
262
+ [ '--name', '-n', GetoptLong::REQUIRED_ARGUMENT ],
263
+ [ '--ls', '-l', GetoptLong::NO_ARGUMENT ],
264
+ [ '--uri', '-u', GetoptLong::NO_ARGUMENT ],
265
+ [ '--under', '-U', GetoptLong::NO_ARGUMENT ],
266
+ [ '--human', '-h', GetoptLong::NO_ARGUMENT ],
267
+ [ '--no-hidden', '-D', GetoptLong::NO_ARGUMENT ],
268
+ [ '--help', '-H', GetoptLong::NO_ARGUMENT ],
234
269
  )
235
270
 
236
271
  gopts.each do |opt, arg|
@@ -249,6 +284,8 @@ gopts.each do |opt, arg|
249
284
  opts[:repl] = arg
250
285
  when '--type'
251
286
  opts[:type] = arg
287
+ when '--name'
288
+ opts[:name_re] = arg
252
289
  when '--human'
253
290
  opts[:human] = true
254
291
  when '--ls'
@@ -257,6 +294,8 @@ gopts.each do |opt, arg|
257
294
  opts[:under] = true
258
295
  when '--uri'
259
296
  opts[:uri] = true
297
+ when '--no-hidden'
298
+ opts[:no_hidden] = true
260
299
  else
261
300
  usage
262
301
  exit 1
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 0
8
7
  - 1
9
- version: 0.0.1
8
+ - 0
9
+ version: 0.1.0
10
10
  platform: java
11
11
  authors:
12
12
  - Frank Fejes
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-07-02 00:00:00 -05:00
17
+ date: 2011-07-13 00:00:00 -05:00
18
18
  default_executable:
19
19
  dependencies: []
20
20