hadoop-find 0.0.1-java
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +2 -0
- data/README +29 -0
- data/bin/hfind +33 -0
- data/bin/hfind.rb +269 -0
- metadata +76 -0
data/CHANGELOG
ADDED
data/README
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
hfind
|
2
|
+
|
3
|
+
A file listing command for HDFS filesystems similar to unix find(1).
|
4
|
+
|
5
|
+
Requires jruby 1.6+.
|
6
|
+
|
7
|
+
# installation
|
8
|
+
|
9
|
+
Simply copy hfind.rb and hfind into your path.
|
10
|
+
|
11
|
+
# usage
|
12
|
+
|
13
|
+
usage: hfind [options] path
|
14
|
+
-a, --after # files modified after ISO date
|
15
|
+
-b, --before # files modified before ISO date
|
16
|
+
-m, --mmin # files modified before (-x) or after (+x) minutes ago
|
17
|
+
-M, --mtime # files modified before (-x) or after (+x) days ago
|
18
|
+
-s, --size # file size > (+x), < (-x), or == (x)
|
19
|
+
-r, --repl # replication factor > (+x), < (-x), or == (x)
|
20
|
+
-U, --under # under-replicated files
|
21
|
+
-t, --type # show type (f)ile or (d)irectory
|
22
|
+
-l, --ls # show full listing detail
|
23
|
+
-h, --human # show human readable file sizes
|
24
|
+
-u, --uri # show full uri for path
|
25
|
+
-H, --help
|
26
|
+
|
27
|
+
Please let me know if you find this software useful!
|
28
|
+
|
29
|
+
--frank
|
data/bin/hfind
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
if [ -z "$HADOOP_HOME" ]; then
|
4
|
+
echo error: HADOOP_HOME is not defined >&2
|
5
|
+
exit 1
|
6
|
+
fi
|
7
|
+
|
8
|
+
export PATH=/usr/local/jruby/bin:$PATH
|
9
|
+
if ! type -p jruby >&/dev/null; then
|
10
|
+
echo error: cannot find jruby...please set your PATH >&2
|
11
|
+
exit 1
|
12
|
+
fi
|
13
|
+
export JRUBY_OPTS=--1.9
|
14
|
+
|
15
|
+
HFIND=${0%/*}/hfind.rb
|
16
|
+
if [ ! -f $HFIND ]; then
|
17
|
+
echo error: cannot find $HFIND...please install it alongside: >&2
|
18
|
+
echo " $0" >&2
|
19
|
+
exit 2
|
20
|
+
fi
|
21
|
+
|
22
|
+
# bring in the hadoop/jdbc jars
|
23
|
+
for f in $HADOOP_HOME/hadoop-core-*.jar; do
|
24
|
+
CLASSPATH=${CLASSPATH}:$f
|
25
|
+
done
|
26
|
+
|
27
|
+
for f in $HADOOP_HOME/lib/*.jar; do
|
28
|
+
CLASSPATH=${CLASSPATH}:$f
|
29
|
+
done
|
30
|
+
|
31
|
+
export CLASSPATH
|
32
|
+
|
33
|
+
exec jruby $HFIND "$@"
|
data/bin/hfind.rb
ADDED
@@ -0,0 +1,269 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
|
3
|
+
require 'java'
|
4
|
+
require 'getoptlong'
|
5
|
+
|
6
|
+
class HadoopFSFinder
|
7
|
+
def initialize uri, opts = {}
|
8
|
+
@opts = opts
|
9
|
+
|
10
|
+
@conf = org.apache.hadoop.conf.Configuration.new
|
11
|
+
core_site = ENV['HADOOP_HOME'].to_s + '/conf/core-site.xml'
|
12
|
+
core_path = org.apache.hadoop.fs.Path.new core_site
|
13
|
+
@conf.add_resource core_path
|
14
|
+
hdfs_site = ENV['HADOOP_HOME'].to_s + '/conf/hdfs-site.xml'
|
15
|
+
hdfs_path = org.apache.hadoop.fs.Path.new hdfs_site
|
16
|
+
@conf.add_resource hdfs_path
|
17
|
+
# convert . to the user's home directory
|
18
|
+
uri.sub! /\A\./, "/user/#{ENV['USER']}"
|
19
|
+
|
20
|
+
if @opts[:under]
|
21
|
+
@opts[:repl] = "-#{@conf.get_props['dfs.replication']}"
|
22
|
+
end
|
23
|
+
@opts[:type] = 'f' if @opts[:repl]
|
24
|
+
|
25
|
+
@uri = java.net.URI.create uri
|
26
|
+
@path = org.apache.hadoop.fs.Path.new @uri
|
27
|
+
@fs = org.apache.hadoop.fs.FileSystem.get @uri, @conf
|
28
|
+
end
|
29
|
+
|
30
|
+
# filter by size using unix find -size numbering scheme
|
31
|
+
def filter_size size
|
32
|
+
return true if not @opts[:size]
|
33
|
+
|
34
|
+
s = @opts[:size]
|
35
|
+
cmp = :==
|
36
|
+
case s[0].chr
|
37
|
+
when '-'
|
38
|
+
cmp = :<
|
39
|
+
when '+'
|
40
|
+
cmp = :>
|
41
|
+
end
|
42
|
+
|
43
|
+
multi = 1
|
44
|
+
case s[-1].chr.upcase
|
45
|
+
when 'K'
|
46
|
+
multi = 1024
|
47
|
+
when 'M'
|
48
|
+
multi = 1024 * 1024
|
49
|
+
when 'G'
|
50
|
+
multi = 1024 * 1024 * 1024
|
51
|
+
when 'T'
|
52
|
+
multi = 1024 * 1024 * 1024 * 1024
|
53
|
+
when 'P'
|
54
|
+
multi = 1024 * 1024 * 1024 * 1024 * 1024
|
55
|
+
end
|
56
|
+
filter_size = s.to_i.abs * multi
|
57
|
+
|
58
|
+
return size.send(cmp, filter_size)
|
59
|
+
end
|
60
|
+
|
61
|
+
# filter by replication count using unix find -size numbering scheme
|
62
|
+
def filter_repl repl
|
63
|
+
return true if not @opts[:repl]
|
64
|
+
|
65
|
+
r = @opts[:repl]
|
66
|
+
cmp = :==
|
67
|
+
case r[0].chr
|
68
|
+
when '-'
|
69
|
+
cmp = :<
|
70
|
+
when '+'
|
71
|
+
cmp = :>
|
72
|
+
end
|
73
|
+
|
74
|
+
filter_repl = r.to_i.abs
|
75
|
+
|
76
|
+
return repl.send(cmp, filter_repl)
|
77
|
+
end
|
78
|
+
|
79
|
+
def filter_mtime mtime
|
80
|
+
mtime_filters = [:before, :after, :mmin, :mtime]
|
81
|
+
return true if (mtime_filters & @opts.keys).empty?
|
82
|
+
|
83
|
+
dt_regexp = /\A(\d{4})-(\d{2})-(\d{2})/
|
84
|
+
|
85
|
+
if @opts[:before]
|
86
|
+
match = dt_regexp.match @opts[:before]
|
87
|
+
if match
|
88
|
+
m = Time.new(match[1], match[2], match[3]).to_i
|
89
|
+
else
|
90
|
+
raise 'Invalid Date Representation'
|
91
|
+
end
|
92
|
+
#puts "#{mtime} vs #{m}"
|
93
|
+
if mtime < m
|
94
|
+
return true
|
95
|
+
else
|
96
|
+
return false
|
97
|
+
end
|
98
|
+
elsif @opts[:after]
|
99
|
+
match = dt_regexp.match @opts[:after]
|
100
|
+
if match
|
101
|
+
m = Time.new(match[1], match[2], match[3]).to_i
|
102
|
+
else
|
103
|
+
raise 'Invalid Date Representation'
|
104
|
+
end
|
105
|
+
#puts "#{mtime} vs #{m}"
|
106
|
+
if mtime > m
|
107
|
+
return true
|
108
|
+
else
|
109
|
+
return false
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
m = 0
|
114
|
+
if @opts[:mmin]
|
115
|
+
m = @opts[:mmin].to_i * 60
|
116
|
+
elsif @opts[:mtime]
|
117
|
+
m = @opts[:mtime].to_i * 86400
|
118
|
+
end
|
119
|
+
|
120
|
+
cmp = :==
|
121
|
+
if m < 0
|
122
|
+
cmp = :>
|
123
|
+
elsif m > 0
|
124
|
+
cmp = :<
|
125
|
+
end
|
126
|
+
|
127
|
+
filter_mtime = Time.now.to_i - m.abs.to_i
|
128
|
+
|
129
|
+
#puts "#{mtime} vs #{filter_mtime} #{m}"
|
130
|
+
return mtime.send(cmp, filter_mtime)
|
131
|
+
end
|
132
|
+
|
133
|
+
# print out one line of info for a filestatus object
|
134
|
+
def display f
|
135
|
+
type = f.dir? ? 'd' : 'f'
|
136
|
+
return if @opts[:type] and @opts[:type] != type
|
137
|
+
|
138
|
+
size = f.len
|
139
|
+
return if not filter_size size
|
140
|
+
|
141
|
+
repl = f.replication
|
142
|
+
return if not filter_repl repl
|
143
|
+
|
144
|
+
mtime = Time.at(f.modification_time / 1000).to_i
|
145
|
+
return if not filter_mtime mtime
|
146
|
+
|
147
|
+
if @opts[:uri]
|
148
|
+
path = f.path.to_s
|
149
|
+
else
|
150
|
+
path = f.path.to_uri.path
|
151
|
+
end
|
152
|
+
path = "#{path}/" if f.dir?
|
153
|
+
|
154
|
+
if not @opts[:ls]
|
155
|
+
puts path
|
156
|
+
return
|
157
|
+
end
|
158
|
+
|
159
|
+
if @opts[:human]
|
160
|
+
if size > 1125899906842624
|
161
|
+
size = "#{size / 1125899906842624}P"
|
162
|
+
elsif size > 1099511627776
|
163
|
+
size = "#{size / 1099511627776}T"
|
164
|
+
elsif size > 1073741824
|
165
|
+
size = "#{size / 1073741824}G"
|
166
|
+
elsif size > 1048576
|
167
|
+
size = "#{size / 1048576}M"
|
168
|
+
elsif size > 1024
|
169
|
+
size = "#{size / 1024}K"
|
170
|
+
else
|
171
|
+
size = "#{size}B"
|
172
|
+
end
|
173
|
+
size = '%4s' % size
|
174
|
+
else
|
175
|
+
size = '%12s' % size
|
176
|
+
end
|
177
|
+
|
178
|
+
type = f.dir? ? 'd' : '-'
|
179
|
+
repl = f.replication > 0 ? f.replication : '-'
|
180
|
+
mtime = Time.at(f.modification_time / 1000).strftime '%Y-%m-%d %H:%M:%S'
|
181
|
+
perm = f.permission.to_s.strip
|
182
|
+
puts '%s%s %s %-8s %-16s %s %s %s' %
|
183
|
+
[type, perm, repl, f.owner, f.group, size, mtime, path]
|
184
|
+
end
|
185
|
+
|
186
|
+
def find
|
187
|
+
@fs.glob_status(@path).each {|s| walk(s) {|f| display f}}
|
188
|
+
end
|
189
|
+
|
190
|
+
def walk fstat
|
191
|
+
yield fstat
|
192
|
+
|
193
|
+
return if not fstat.dir?
|
194
|
+
|
195
|
+
@fs.list_status(fstat.path).each {|s| walk(s) {|f| yield f}}
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def usage
|
200
|
+
puts <<-EOF
|
201
|
+
usage: hfind [options] path
|
202
|
+
-H, --help
|
203
|
+
-a, --after # files modified after ISO date
|
204
|
+
-b, --before # files modified before ISO date
|
205
|
+
-m, --mmin # files modified before (-x) or after (+x) minutes ago
|
206
|
+
-M, --mtime # files modified before (-x) or after (+x) days ago
|
207
|
+
-s, --size # file size > (+x), < (-x), or == (x)
|
208
|
+
-r, --repl # replication factor > (+x), < (-x), or == (x)
|
209
|
+
-U, --under # show under-replicated files
|
210
|
+
-t, --type # show type (f)ile or (d)irectory
|
211
|
+
-l, --ls # show full listing detail
|
212
|
+
-h, --human # show human readable file sizes
|
213
|
+
-u, --uri # show full uri for path
|
214
|
+
EOF
|
215
|
+
end
|
216
|
+
|
217
|
+
# main
|
218
|
+
|
219
|
+
opts = {}
|
220
|
+
|
221
|
+
gopts = GetoptLong.new(
|
222
|
+
[ '--size', '-s', GetoptLong::REQUIRED_ARGUMENT ],
|
223
|
+
[ '--repl', '-r', GetoptLong::REQUIRED_ARGUMENT ],
|
224
|
+
[ '--after', '-a', GetoptLong::REQUIRED_ARGUMENT ],
|
225
|
+
[ '--before', '-b', GetoptLong::REQUIRED_ARGUMENT ],
|
226
|
+
[ '--mmin', '-m', GetoptLong::REQUIRED_ARGUMENT ],
|
227
|
+
[ '--mtime', '-M', GetoptLong::REQUIRED_ARGUMENT ],
|
228
|
+
[ '--type', '-t', GetoptLong::REQUIRED_ARGUMENT ],
|
229
|
+
[ '--ls', '-l', GetoptLong::NO_ARGUMENT ],
|
230
|
+
[ '--uri', '-u', GetoptLong::NO_ARGUMENT ],
|
231
|
+
[ '--under', '-U', GetoptLong::NO_ARGUMENT ],
|
232
|
+
[ '--human', '-h', GetoptLong::NO_ARGUMENT ],
|
233
|
+
[ '--help', '-H', GetoptLong::NO_ARGUMENT ],
|
234
|
+
)
|
235
|
+
|
236
|
+
gopts.each do |opt, arg|
|
237
|
+
case opt
|
238
|
+
when '--after'
|
239
|
+
opts[:after] = arg
|
240
|
+
when '--before'
|
241
|
+
opts[:before] = arg
|
242
|
+
when '--mmin'
|
243
|
+
opts[:mmin] = arg
|
244
|
+
when '--mtime'
|
245
|
+
opts[:mtime] = arg
|
246
|
+
when '--size'
|
247
|
+
opts[:size] = arg
|
248
|
+
when '--repl'
|
249
|
+
opts[:repl] = arg
|
250
|
+
when '--type'
|
251
|
+
opts[:type] = arg
|
252
|
+
when '--human'
|
253
|
+
opts[:human] = true
|
254
|
+
when '--ls'
|
255
|
+
opts[:ls] = true
|
256
|
+
when '--under'
|
257
|
+
opts[:under] = true
|
258
|
+
when '--uri'
|
259
|
+
opts[:uri] = true
|
260
|
+
else
|
261
|
+
usage
|
262
|
+
exit 1
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
uri = ARGV[0] or (usage ; exit 1)
|
267
|
+
|
268
|
+
hf = HadoopFSFinder.new uri, opts
|
269
|
+
hf.find rescue STDERR.puts "error: could not process #{uri}"
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hadoop-find
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: java
|
11
|
+
authors:
|
12
|
+
- Frank Fejes
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-07-02 00:00:00 -05:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: |-
|
22
|
+
A file listing utility for HDFS filesystems similar to unix find(1).
|
23
|
+
Requires jruby 1.6+.
|
24
|
+
email: frank@fejes.net
|
25
|
+
executables:
|
26
|
+
- hfind
|
27
|
+
- hfind.rb
|
28
|
+
extensions: []
|
29
|
+
|
30
|
+
extra_rdoc_files: []
|
31
|
+
|
32
|
+
files:
|
33
|
+
- README
|
34
|
+
- CHANGELOG
|
35
|
+
- bin/hfind
|
36
|
+
- bin/hfind.rb
|
37
|
+
has_rdoc: true
|
38
|
+
homepage: https://github.com/fsfiii/hadoop-find
|
39
|
+
licenses: []
|
40
|
+
|
41
|
+
post_install_message: |
|
42
|
+
===
|
43
|
+
Please be sure to install with:
|
44
|
+
|
45
|
+
jgem install --no-wrapper hadoop-find
|
46
|
+
===
|
47
|
+
|
48
|
+
rdoc_options: []
|
49
|
+
|
50
|
+
require_paths:
|
51
|
+
- lib
|
52
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
segments:
|
58
|
+
- 0
|
59
|
+
version: "0"
|
60
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
segments:
|
66
|
+
- 0
|
67
|
+
version: "0"
|
68
|
+
requirements: []
|
69
|
+
|
70
|
+
rubyforge_project: hadoop-find
|
71
|
+
rubygems_version: 1.3.7
|
72
|
+
signing_key:
|
73
|
+
specification_version: 3
|
74
|
+
summary: jruby file listing utility for HDFS filesystems similar to unix find(1).
|
75
|
+
test_files: []
|
76
|
+
|