hadoop-find 0.0.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. data/CHANGELOG +2 -0
  2. data/README +29 -0
  3. data/bin/hfind +33 -0
  4. data/bin/hfind.rb +269 -0
  5. metadata +76 -0
data/CHANGELOG ADDED
@@ -0,0 +1,2 @@
1
+ * 2011-07-02 - fsf
2
+ - initial import
data/README ADDED
@@ -0,0 +1,29 @@
1
+ hfind
2
+
3
+ A file listing command for HDFS filesystems similar to unix find(1).
4
+
5
+ Requires jruby 1.6+.
6
+
7
+ # installation
8
+
9
+ Simply copy hfind.rb and hfind into your path.
10
+
11
+ # usage
12
+
13
+ usage: hfind [options] path
14
+ -a, --after # files modified after ISO date
15
+ -b, --before # files modified before ISO date
16
+ -m, --mmin # files modified before (-x) or after (+x) minutes ago
17
+ -M, --mtime # files modified before (-x) or after (+x) days ago
18
+ -s, --size # file size > (+x), < (-x), or == (x)
19
+ -r, --repl # replication factor > (+x), < (-x), or == (x)
20
+ -U, --under # under-replicated files
21
+ -t, --type # show type (f)ile or (d)irectory
22
+ -l, --ls # show full listing detail
23
+ -h, --human # show human readable file sizes
24
+ -u, --uri # show full uri for path
25
+ -H, --help
26
+
27
+ Please let me know if you find this software useful!
28
+
29
+ --frank
data/bin/hfind ADDED
@@ -0,0 +1,33 @@
1
+ #!/bin/bash
2
+
3
+ if [ -z "$HADOOP_HOME" ]; then
4
+ echo error: HADOOP_HOME is not defined >&2
5
+ exit 1
6
+ fi
7
+
8
+ export PATH=/usr/local/jruby/bin:$PATH
9
+ if ! type -p jruby >&/dev/null; then
10
+ echo error: cannot find jruby...please set your PATH >&2
11
+ exit 1
12
+ fi
13
+ export JRUBY_OPTS=--1.9
14
+
15
+ HFIND=${0%/*}/hfind.rb
16
+ if [ ! -f $HFIND ]; then
17
+ echo error: cannot find $HFIND...please install it alongside: >&2
18
+ echo " $0" >&2
19
+ exit 2
20
+ fi
21
+
22
+ # bring in the hadoop/jdbc jars
23
+ for f in $HADOOP_HOME/hadoop-core-*.jar; do
24
+ CLASSPATH=${CLASSPATH}:$f
25
+ done
26
+
27
+ for f in $HADOOP_HOME/lib/*.jar; do
28
+ CLASSPATH=${CLASSPATH}:$f
29
+ done
30
+
31
+ export CLASSPATH
32
+
33
+ exec jruby $HFIND "$@"
data/bin/hfind.rb ADDED
@@ -0,0 +1,269 @@
1
+ #!/usr/bin/env jruby
2
+
3
+ require 'java'
4
+ require 'getoptlong'
5
+
6
+ class HadoopFSFinder
7
+ def initialize uri, opts = {}
8
+ @opts = opts
9
+
10
+ @conf = org.apache.hadoop.conf.Configuration.new
11
+ core_site = ENV['HADOOP_HOME'].to_s + '/conf/core-site.xml'
12
+ core_path = org.apache.hadoop.fs.Path.new core_site
13
+ @conf.add_resource core_path
14
+ hdfs_site = ENV['HADOOP_HOME'].to_s + '/conf/hdfs-site.xml'
15
+ hdfs_path = org.apache.hadoop.fs.Path.new hdfs_site
16
+ @conf.add_resource hdfs_path
17
+ # convert . to the user's home directory
18
+ uri.sub! /\A\./, "/user/#{ENV['USER']}"
19
+
20
+ if @opts[:under]
21
+ @opts[:repl] = "-#{@conf.get_props['dfs.replication']}"
22
+ end
23
+ @opts[:type] = 'f' if @opts[:repl]
24
+
25
+ @uri = java.net.URI.create uri
26
+ @path = org.apache.hadoop.fs.Path.new @uri
27
+ @fs = org.apache.hadoop.fs.FileSystem.get @uri, @conf
28
+ end
29
+
30
+ # filter by size using unix find -size numbering scheme
31
+ def filter_size size
32
+ return true if not @opts[:size]
33
+
34
+ s = @opts[:size]
35
+ cmp = :==
36
+ case s[0].chr
37
+ when '-'
38
+ cmp = :<
39
+ when '+'
40
+ cmp = :>
41
+ end
42
+
43
+ multi = 1
44
+ case s[-1].chr.upcase
45
+ when 'K'
46
+ multi = 1024
47
+ when 'M'
48
+ multi = 1024 * 1024
49
+ when 'G'
50
+ multi = 1024 * 1024 * 1024
51
+ when 'T'
52
+ multi = 1024 * 1024 * 1024 * 1024
53
+ when 'P'
54
+ multi = 1024 * 1024 * 1024 * 1024 * 1024
55
+ end
56
+ filter_size = s.to_i.abs * multi
57
+
58
+ return size.send(cmp, filter_size)
59
+ end
60
+
61
+ # filter by replication count using unix find -size numbering scheme
62
+ def filter_repl repl
63
+ return true if not @opts[:repl]
64
+
65
+ r = @opts[:repl]
66
+ cmp = :==
67
+ case r[0].chr
68
+ when '-'
69
+ cmp = :<
70
+ when '+'
71
+ cmp = :>
72
+ end
73
+
74
+ filter_repl = r.to_i.abs
75
+
76
+ return repl.send(cmp, filter_repl)
77
+ end
78
+
79
+ def filter_mtime mtime
80
+ mtime_filters = [:before, :after, :mmin, :mtime]
81
+ return true if (mtime_filters & @opts.keys).empty?
82
+
83
+ dt_regexp = /\A(\d{4})-(\d{2})-(\d{2})/
84
+
85
+ if @opts[:before]
86
+ match = dt_regexp.match @opts[:before]
87
+ if match
88
+ m = Time.new(match[1], match[2], match[3]).to_i
89
+ else
90
+ raise 'Invalid Date Representation'
91
+ end
92
+ #puts "#{mtime} vs #{m}"
93
+ if mtime < m
94
+ return true
95
+ else
96
+ return false
97
+ end
98
+ elsif @opts[:after]
99
+ match = dt_regexp.match @opts[:after]
100
+ if match
101
+ m = Time.new(match[1], match[2], match[3]).to_i
102
+ else
103
+ raise 'Invalid Date Representation'
104
+ end
105
+ #puts "#{mtime} vs #{m}"
106
+ if mtime > m
107
+ return true
108
+ else
109
+ return false
110
+ end
111
+ end
112
+
113
+ m = 0
114
+ if @opts[:mmin]
115
+ m = @opts[:mmin].to_i * 60
116
+ elsif @opts[:mtime]
117
+ m = @opts[:mtime].to_i * 86400
118
+ end
119
+
120
+ cmp = :==
121
+ if m < 0
122
+ cmp = :>
123
+ elsif m > 0
124
+ cmp = :<
125
+ end
126
+
127
+ filter_mtime = Time.now.to_i - m.abs.to_i
128
+
129
+ #puts "#{mtime} vs #{filter_mtime} #{m}"
130
+ return mtime.send(cmp, filter_mtime)
131
+ end
132
+
133
+ # print out one line of info for a filestatus object
134
+ def display f
135
+ type = f.dir? ? 'd' : 'f'
136
+ return if @opts[:type] and @opts[:type] != type
137
+
138
+ size = f.len
139
+ return if not filter_size size
140
+
141
+ repl = f.replication
142
+ return if not filter_repl repl
143
+
144
+ mtime = Time.at(f.modification_time / 1000).to_i
145
+ return if not filter_mtime mtime
146
+
147
+ if @opts[:uri]
148
+ path = f.path.to_s
149
+ else
150
+ path = f.path.to_uri.path
151
+ end
152
+ path = "#{path}/" if f.dir?
153
+
154
+ if not @opts[:ls]
155
+ puts path
156
+ return
157
+ end
158
+
159
+ if @opts[:human]
160
+ if size > 1125899906842624
161
+ size = "#{size / 1125899906842624}P"
162
+ elsif size > 1099511627776
163
+ size = "#{size / 1099511627776}T"
164
+ elsif size > 1073741824
165
+ size = "#{size / 1073741824}G"
166
+ elsif size > 1048576
167
+ size = "#{size / 1048576}M"
168
+ elsif size > 1024
169
+ size = "#{size / 1024}K"
170
+ else
171
+ size = "#{size}B"
172
+ end
173
+ size = '%4s' % size
174
+ else
175
+ size = '%12s' % size
176
+ end
177
+
178
+ type = f.dir? ? 'd' : '-'
179
+ repl = f.replication > 0 ? f.replication : '-'
180
+ mtime = Time.at(f.modification_time / 1000).strftime '%Y-%m-%d %H:%M:%S'
181
+ perm = f.permission.to_s.strip
182
+ puts '%s%s %s %-8s %-16s %s %s %s' %
183
+ [type, perm, repl, f.owner, f.group, size, mtime, path]
184
+ end
185
+
186
+ def find
187
+ @fs.glob_status(@path).each {|s| walk(s) {|f| display f}}
188
+ end
189
+
190
+ def walk fstat
191
+ yield fstat
192
+
193
+ return if not fstat.dir?
194
+
195
+ @fs.list_status(fstat.path).each {|s| walk(s) {|f| yield f}}
196
+ end
197
+ end
198
+
199
+ def usage
200
+ puts <<-EOF
201
+ usage: hfind [options] path
202
+ -H, --help
203
+ -a, --after # files modified after ISO date
204
+ -b, --before # files modified before ISO date
205
+ -m, --mmin # files modified before (-x) or after (+x) minutes ago
206
+ -M, --mtime # files modified before (-x) or after (+x) days ago
207
+ -s, --size # file size > (+x), < (-x), or == (x)
208
+ -r, --repl # replication factor > (+x), < (-x), or == (x)
209
+ -U, --under # show under-replicated files
210
+ -t, --type # show type (f)ile or (d)irectory
211
+ -l, --ls # show full listing detail
212
+ -h, --human # show human readable file sizes
213
+ -u, --uri # show full uri for path
214
+ EOF
215
+ end
216
+
217
+ # main
218
+
219
+ opts = {}
220
+
221
+ gopts = GetoptLong.new(
222
+ [ '--size', '-s', GetoptLong::REQUIRED_ARGUMENT ],
223
+ [ '--repl', '-r', GetoptLong::REQUIRED_ARGUMENT ],
224
+ [ '--after', '-a', GetoptLong::REQUIRED_ARGUMENT ],
225
+ [ '--before', '-b', GetoptLong::REQUIRED_ARGUMENT ],
226
+ [ '--mmin', '-m', GetoptLong::REQUIRED_ARGUMENT ],
227
+ [ '--mtime', '-M', GetoptLong::REQUIRED_ARGUMENT ],
228
+ [ '--type', '-t', GetoptLong::REQUIRED_ARGUMENT ],
229
+ [ '--ls', '-l', GetoptLong::NO_ARGUMENT ],
230
+ [ '--uri', '-u', GetoptLong::NO_ARGUMENT ],
231
+ [ '--under', '-U', GetoptLong::NO_ARGUMENT ],
232
+ [ '--human', '-h', GetoptLong::NO_ARGUMENT ],
233
+ [ '--help', '-H', GetoptLong::NO_ARGUMENT ],
234
+ )
235
+
236
+ gopts.each do |opt, arg|
237
+ case opt
238
+ when '--after'
239
+ opts[:after] = arg
240
+ when '--before'
241
+ opts[:before] = arg
242
+ when '--mmin'
243
+ opts[:mmin] = arg
244
+ when '--mtime'
245
+ opts[:mtime] = arg
246
+ when '--size'
247
+ opts[:size] = arg
248
+ when '--repl'
249
+ opts[:repl] = arg
250
+ when '--type'
251
+ opts[:type] = arg
252
+ when '--human'
253
+ opts[:human] = true
254
+ when '--ls'
255
+ opts[:ls] = true
256
+ when '--under'
257
+ opts[:under] = true
258
+ when '--uri'
259
+ opts[:uri] = true
260
+ else
261
+ usage
262
+ exit 1
263
+ end
264
+ end
265
+
266
+ uri = ARGV[0] or (usage ; exit 1)
267
+
268
+ hf = HadoopFSFinder.new uri, opts
269
+ hf.find rescue STDERR.puts "error: could not process #{uri}"
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hadoop-find
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: java
11
+ authors:
12
+ - Frank Fejes
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-07-02 00:00:00 -05:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: |-
22
+ A file listing utility for HDFS filesystems similar to unix find(1).
23
+ Requires jruby 1.6+.
24
+ email: frank@fejes.net
25
+ executables:
26
+ - hfind
27
+ - hfind.rb
28
+ extensions: []
29
+
30
+ extra_rdoc_files: []
31
+
32
+ files:
33
+ - README
34
+ - CHANGELOG
35
+ - bin/hfind
36
+ - bin/hfind.rb
37
+ has_rdoc: true
38
+ homepage: https://github.com/fsfiii/hadoop-find
39
+ licenses: []
40
+
41
+ post_install_message: |
42
+ ===
43
+ Please be sure to install with:
44
+
45
+ jgem install --no-wrapper hadoop-find
46
+ ===
47
+
48
+ rdoc_options: []
49
+
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ segments:
66
+ - 0
67
+ version: "0"
68
+ requirements: []
69
+
70
+ rubyforge_project: hadoop-find
71
+ rubygems_version: 1.3.7
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: jruby file listing utility for HDFS filesystems similar to unix find(1).
75
+ test_files: []
76
+