hadoop-find 0.0.1-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/CHANGELOG +2 -0
  2. data/README +29 -0
  3. data/bin/hfind +33 -0
  4. data/bin/hfind.rb +269 -0
  5. metadata +76 -0
data/CHANGELOG ADDED
@@ -0,0 +1,2 @@
1
+ * 2011-07-02 - fsf
2
+ - initial import
data/README ADDED
@@ -0,0 +1,29 @@
1
+ hfind
2
+
3
+ A file listing command for HDFS filesystems similar to unix find(1).
4
+
5
+ Requires jruby 1.6+.
6
+
7
+ # installation
8
+
9
+ Simply copy hfind.rb and hfind into your path.
10
+
11
+ # usage
12
+
13
+ usage: hfind [options] path
14
+ -a, --after # files modified after ISO date
15
+ -b, --before # files modified before ISO date
16
+ -m, --mmin # files modified before (-x) or after (+x) minutes ago
17
+ -M, --mtime # files modified before (-x) or after (+x) days ago
18
+ -s, --size # file size > (+x), < (-x), or == (x)
19
+ -r, --repl # replication factor > (+x), < (-x), or == (x)
20
+ -U, --under # under-replicated files
21
+ -t, --type # show type (f)ile or (d)irectory
22
+ -l, --ls # show full listing detail
23
+ -h, --human # show human readable file sizes
24
+ -u, --uri # show full uri for path
25
+ -H, --help
26
+
27
+ Please let me know if you find this software useful!
28
+
29
+ --frank
data/bin/hfind ADDED
@@ -0,0 +1,33 @@
1
+ #!/bin/bash
2
+
3
+ if [ -z "$HADOOP_HOME" ]; then
4
+ echo error: HADOOP_HOME is not defined >&2
5
+ exit 1
6
+ fi
7
+
8
+ export PATH=/usr/local/jruby/bin:$PATH
9
+ if ! type -p jruby >&/dev/null; then
10
+ echo error: cannot find jruby...please set your PATH >&2
11
+ exit 1
12
+ fi
13
+ export JRUBY_OPTS=--1.9
14
+
15
+ HFIND=${0%/*}/hfind.rb
16
+ if [ ! -f $HFIND ]; then
17
+ echo error: cannot find $HFIND...please install it alongside: >&2
18
+ echo " $0" >&2
19
+ exit 2
20
+ fi
21
+
22
+ # bring in the hadoop/jdbc jars
23
+ for f in $HADOOP_HOME/hadoop-core-*.jar; do
24
+ CLASSPATH=${CLASSPATH}:$f
25
+ done
26
+
27
+ for f in $HADOOP_HOME/lib/*.jar; do
28
+ CLASSPATH=${CLASSPATH}:$f
29
+ done
30
+
31
+ export CLASSPATH
32
+
33
+ exec jruby $HFIND "$@"
data/bin/hfind.rb ADDED
@@ -0,0 +1,269 @@
1
+ #!/usr/bin/env jruby
2
+
3
+ require 'java'
4
+ require 'getoptlong'
5
+
6
+ class HadoopFSFinder
7
+ def initialize uri, opts = {}
8
+ @opts = opts
9
+
10
+ @conf = org.apache.hadoop.conf.Configuration.new
11
+ core_site = ENV['HADOOP_HOME'].to_s + '/conf/core-site.xml'
12
+ core_path = org.apache.hadoop.fs.Path.new core_site
13
+ @conf.add_resource core_path
14
+ hdfs_site = ENV['HADOOP_HOME'].to_s + '/conf/hdfs-site.xml'
15
+ hdfs_path = org.apache.hadoop.fs.Path.new hdfs_site
16
+ @conf.add_resource hdfs_path
17
+ # convert . to the user's home directory
18
+ uri.sub! /\A\./, "/user/#{ENV['USER']}"
19
+
20
+ if @opts[:under]
21
+ @opts[:repl] = "-#{@conf.get_props['dfs.replication']}"
22
+ end
23
+ @opts[:type] = 'f' if @opts[:repl]
24
+
25
+ @uri = java.net.URI.create uri
26
+ @path = org.apache.hadoop.fs.Path.new @uri
27
+ @fs = org.apache.hadoop.fs.FileSystem.get @uri, @conf
28
+ end
29
+
30
+ # filter by size using unix find -size numbering scheme
31
+ def filter_size size
32
+ return true if not @opts[:size]
33
+
34
+ s = @opts[:size]
35
+ cmp = :==
36
+ case s[0].chr
37
+ when '-'
38
+ cmp = :<
39
+ when '+'
40
+ cmp = :>
41
+ end
42
+
43
+ multi = 1
44
+ case s[-1].chr.upcase
45
+ when 'K'
46
+ multi = 1024
47
+ when 'M'
48
+ multi = 1024 * 1024
49
+ when 'G'
50
+ multi = 1024 * 1024 * 1024
51
+ when 'T'
52
+ multi = 1024 * 1024 * 1024 * 1024
53
+ when 'P'
54
+ multi = 1024 * 1024 * 1024 * 1024 * 1024
55
+ end
56
+ filter_size = s.to_i.abs * multi
57
+
58
+ return size.send(cmp, filter_size)
59
+ end
60
+
61
+ # filter by replication count using unix find -size numbering scheme
62
+ def filter_repl repl
63
+ return true if not @opts[:repl]
64
+
65
+ r = @opts[:repl]
66
+ cmp = :==
67
+ case r[0].chr
68
+ when '-'
69
+ cmp = :<
70
+ when '+'
71
+ cmp = :>
72
+ end
73
+
74
+ filter_repl = r.to_i.abs
75
+
76
+ return repl.send(cmp, filter_repl)
77
+ end
78
+
79
+ def filter_mtime mtime
80
+ mtime_filters = [:before, :after, :mmin, :mtime]
81
+ return true if (mtime_filters & @opts.keys).empty?
82
+
83
+ dt_regexp = /\A(\d{4})-(\d{2})-(\d{2})/
84
+
85
+ if @opts[:before]
86
+ match = dt_regexp.match @opts[:before]
87
+ if match
88
+ m = Time.new(match[1], match[2], match[3]).to_i
89
+ else
90
+ raise 'Invalid Date Representation'
91
+ end
92
+ #puts "#{mtime} vs #{m}"
93
+ if mtime < m
94
+ return true
95
+ else
96
+ return false
97
+ end
98
+ elsif @opts[:after]
99
+ match = dt_regexp.match @opts[:after]
100
+ if match
101
+ m = Time.new(match[1], match[2], match[3]).to_i
102
+ else
103
+ raise 'Invalid Date Representation'
104
+ end
105
+ #puts "#{mtime} vs #{m}"
106
+ if mtime > m
107
+ return true
108
+ else
109
+ return false
110
+ end
111
+ end
112
+
113
+ m = 0
114
+ if @opts[:mmin]
115
+ m = @opts[:mmin].to_i * 60
116
+ elsif @opts[:mtime]
117
+ m = @opts[:mtime].to_i * 86400
118
+ end
119
+
120
+ cmp = :==
121
+ if m < 0
122
+ cmp = :>
123
+ elsif m > 0
124
+ cmp = :<
125
+ end
126
+
127
+ filter_mtime = Time.now.to_i - m.abs.to_i
128
+
129
+ #puts "#{mtime} vs #{filter_mtime} #{m}"
130
+ return mtime.send(cmp, filter_mtime)
131
+ end
132
+
133
+ # print out one line of info for a filestatus object
134
+ def display f
135
+ type = f.dir? ? 'd' : 'f'
136
+ return if @opts[:type] and @opts[:type] != type
137
+
138
+ size = f.len
139
+ return if not filter_size size
140
+
141
+ repl = f.replication
142
+ return if not filter_repl repl
143
+
144
+ mtime = Time.at(f.modification_time / 1000).to_i
145
+ return if not filter_mtime mtime
146
+
147
+ if @opts[:uri]
148
+ path = f.path.to_s
149
+ else
150
+ path = f.path.to_uri.path
151
+ end
152
+ path = "#{path}/" if f.dir?
153
+
154
+ if not @opts[:ls]
155
+ puts path
156
+ return
157
+ end
158
+
159
+ if @opts[:human]
160
+ if size > 1125899906842624
161
+ size = "#{size / 1125899906842624}P"
162
+ elsif size > 1099511627776
163
+ size = "#{size / 1099511627776}T"
164
+ elsif size > 1073741824
165
+ size = "#{size / 1073741824}G"
166
+ elsif size > 1048576
167
+ size = "#{size / 1048576}M"
168
+ elsif size > 1024
169
+ size = "#{size / 1024}K"
170
+ else
171
+ size = "#{size}B"
172
+ end
173
+ size = '%4s' % size
174
+ else
175
+ size = '%12s' % size
176
+ end
177
+
178
+ type = f.dir? ? 'd' : '-'
179
+ repl = f.replication > 0 ? f.replication : '-'
180
+ mtime = Time.at(f.modification_time / 1000).strftime '%Y-%m-%d %H:%M:%S'
181
+ perm = f.permission.to_s.strip
182
+ puts '%s%s %s %-8s %-16s %s %s %s' %
183
+ [type, perm, repl, f.owner, f.group, size, mtime, path]
184
+ end
185
+
186
+ def find
187
+ @fs.glob_status(@path).each {|s| walk(s) {|f| display f}}
188
+ end
189
+
190
+ def walk fstat
191
+ yield fstat
192
+
193
+ return if not fstat.dir?
194
+
195
+ @fs.list_status(fstat.path).each {|s| walk(s) {|f| yield f}}
196
+ end
197
+ end
198
+
199
+ def usage
200
+ puts <<-EOF
201
+ usage: hfind [options] path
202
+ -H, --help
203
+ -a, --after # files modified after ISO date
204
+ -b, --before # files modified before ISO date
205
+ -m, --mmin # files modified before (-x) or after (+x) minutes ago
206
+ -M, --mtime # files modified before (-x) or after (+x) days ago
207
+ -s, --size # file size > (+x), < (-x), or == (x)
208
+ -r, --repl # replication factor > (+x), < (-x), or == (x)
209
+ -U, --under # show under-replicated files
210
+ -t, --type # show type (f)ile or (d)irectory
211
+ -l, --ls # show full listing detail
212
+ -h, --human # show human readable file sizes
213
+ -u, --uri # show full uri for path
214
+ EOF
215
+ end
216
+
217
+ # main
218
+
219
+ opts = {}
220
+
221
+ gopts = GetoptLong.new(
222
+ [ '--size', '-s', GetoptLong::REQUIRED_ARGUMENT ],
223
+ [ '--repl', '-r', GetoptLong::REQUIRED_ARGUMENT ],
224
+ [ '--after', '-a', GetoptLong::REQUIRED_ARGUMENT ],
225
+ [ '--before', '-b', GetoptLong::REQUIRED_ARGUMENT ],
226
+ [ '--mmin', '-m', GetoptLong::REQUIRED_ARGUMENT ],
227
+ [ '--mtime', '-M', GetoptLong::REQUIRED_ARGUMENT ],
228
+ [ '--type', '-t', GetoptLong::REQUIRED_ARGUMENT ],
229
+ [ '--ls', '-l', GetoptLong::NO_ARGUMENT ],
230
+ [ '--uri', '-u', GetoptLong::NO_ARGUMENT ],
231
+ [ '--under', '-U', GetoptLong::NO_ARGUMENT ],
232
+ [ '--human', '-h', GetoptLong::NO_ARGUMENT ],
233
+ [ '--help', '-H', GetoptLong::NO_ARGUMENT ],
234
+ )
235
+
236
+ gopts.each do |opt, arg|
237
+ case opt
238
+ when '--after'
239
+ opts[:after] = arg
240
+ when '--before'
241
+ opts[:before] = arg
242
+ when '--mmin'
243
+ opts[:mmin] = arg
244
+ when '--mtime'
245
+ opts[:mtime] = arg
246
+ when '--size'
247
+ opts[:size] = arg
248
+ when '--repl'
249
+ opts[:repl] = arg
250
+ when '--type'
251
+ opts[:type] = arg
252
+ when '--human'
253
+ opts[:human] = true
254
+ when '--ls'
255
+ opts[:ls] = true
256
+ when '--under'
257
+ opts[:under] = true
258
+ when '--uri'
259
+ opts[:uri] = true
260
+ else
261
+ usage
262
+ exit 1
263
+ end
264
+ end
265
+
266
+ uri = ARGV[0] or (usage ; exit 1)
267
+
268
+ hf = HadoopFSFinder.new uri, opts
269
+ hf.find rescue STDERR.puts "error: could not process #{uri}"
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hadoop-find
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: java
11
+ authors:
12
+ - Frank Fejes
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-07-02 00:00:00 -05:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: |-
22
+ A file listing utility for HDFS filesystems similar to unix find(1).
23
+ Requires jruby 1.6+.
24
+ email: frank@fejes.net
25
+ executables:
26
+ - hfind
27
+ - hfind.rb
28
+ extensions: []
29
+
30
+ extra_rdoc_files: []
31
+
32
+ files:
33
+ - README
34
+ - CHANGELOG
35
+ - bin/hfind
36
+ - bin/hfind.rb
37
+ has_rdoc: true
38
+ homepage: https://github.com/fsfiii/hadoop-find
39
+ licenses: []
40
+
41
+ post_install_message: |
42
+ ===
43
+ Please be sure to install with:
44
+
45
+ jgem install --no-wrapper hadoop-find
46
+ ===
47
+
48
+ rdoc_options: []
49
+
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ segments:
66
+ - 0
67
+ version: "0"
68
+ requirements: []
69
+
70
+ rubyforge_project: hadoop-find
71
+ rubygems_version: 1.3.7
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: jruby file listing utility for HDFS filesystems similar to unix find(1).
75
+ test_files: []
76
+