hadoop-find 0.0.1-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +2 -0
- data/README +29 -0
- data/bin/hfind +33 -0
- data/bin/hfind.rb +269 -0
- metadata +76 -0
data/CHANGELOG
ADDED
data/README
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
hfind
|
2
|
+
|
3
|
+
A file listing command for HDFS filesystems similar to unix find(1).
|
4
|
+
|
5
|
+
Requires jruby 1.6+.
|
6
|
+
|
7
|
+
# installation
|
8
|
+
|
9
|
+
Simply copy hfind.rb and hfind into your path.
|
10
|
+
|
11
|
+
# usage
|
12
|
+
|
13
|
+
usage: hfind [options] path
|
14
|
+
-a, --after # files modified after ISO date
|
15
|
+
-b, --before # files modified before ISO date
|
16
|
+
-m, --mmin # files modified before (-x) or after (+x) minutes ago
|
17
|
+
-M, --mtime # files modified before (-x) or after (+x) days ago
|
18
|
+
-s, --size # file size > (+x), < (-x), or == (x)
|
19
|
+
-r, --repl # replication factor > (+x), < (-x), or == (x)
|
20
|
+
-U, --under # under-replicated files
|
21
|
+
-t, --type # show type (f)ile or (d)irectory
|
22
|
+
-l, --ls # show full listing detail
|
23
|
+
-h, --human # show human readable file sizes
|
24
|
+
-u, --uri # show full uri for path
|
25
|
+
-H, --help
|
26
|
+
|
27
|
+
Please let me know if you find this software useful!
|
28
|
+
|
29
|
+
--frank
|
data/bin/hfind
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
if [ -z "$HADOOP_HOME" ]; then
|
4
|
+
echo error: HADOOP_HOME is not defined >&2
|
5
|
+
exit 1
|
6
|
+
fi
|
7
|
+
|
8
|
+
export PATH=/usr/local/jruby/bin:$PATH
|
9
|
+
if ! type -p jruby >&/dev/null; then
|
10
|
+
echo error: cannot find jruby...please set your PATH >&2
|
11
|
+
exit 1
|
12
|
+
fi
|
13
|
+
export JRUBY_OPTS=--1.9
|
14
|
+
|
15
|
+
HFIND=${0%/*}/hfind.rb
|
16
|
+
if [ ! -f $HFIND ]; then
|
17
|
+
echo error: cannot find $HFIND...please install it alongside: >&2
|
18
|
+
echo " $0" >&2
|
19
|
+
exit 2
|
20
|
+
fi
|
21
|
+
|
22
|
+
# bring in the hadoop/jdbc jars
|
23
|
+
for f in $HADOOP_HOME/hadoop-core-*.jar; do
|
24
|
+
CLASSPATH=${CLASSPATH}:$f
|
25
|
+
done
|
26
|
+
|
27
|
+
for f in $HADOOP_HOME/lib/*.jar; do
|
28
|
+
CLASSPATH=${CLASSPATH}:$f
|
29
|
+
done
|
30
|
+
|
31
|
+
export CLASSPATH
|
32
|
+
|
33
|
+
exec jruby $HFIND "$@"
|
data/bin/hfind.rb
ADDED
@@ -0,0 +1,269 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
|
3
|
+
require 'java'
|
4
|
+
require 'getoptlong'
|
5
|
+
|
6
|
+
class HadoopFSFinder
|
7
|
+
def initialize uri, opts = {}
|
8
|
+
@opts = opts
|
9
|
+
|
10
|
+
@conf = org.apache.hadoop.conf.Configuration.new
|
11
|
+
core_site = ENV['HADOOP_HOME'].to_s + '/conf/core-site.xml'
|
12
|
+
core_path = org.apache.hadoop.fs.Path.new core_site
|
13
|
+
@conf.add_resource core_path
|
14
|
+
hdfs_site = ENV['HADOOP_HOME'].to_s + '/conf/hdfs-site.xml'
|
15
|
+
hdfs_path = org.apache.hadoop.fs.Path.new hdfs_site
|
16
|
+
@conf.add_resource hdfs_path
|
17
|
+
# convert . to the user's home directory
|
18
|
+
uri.sub! /\A\./, "/user/#{ENV['USER']}"
|
19
|
+
|
20
|
+
if @opts[:under]
|
21
|
+
@opts[:repl] = "-#{@conf.get_props['dfs.replication']}"
|
22
|
+
end
|
23
|
+
@opts[:type] = 'f' if @opts[:repl]
|
24
|
+
|
25
|
+
@uri = java.net.URI.create uri
|
26
|
+
@path = org.apache.hadoop.fs.Path.new @uri
|
27
|
+
@fs = org.apache.hadoop.fs.FileSystem.get @uri, @conf
|
28
|
+
end
|
29
|
+
|
30
|
+
# filter by size using unix find -size numbering scheme
|
31
|
+
def filter_size size
|
32
|
+
return true if not @opts[:size]
|
33
|
+
|
34
|
+
s = @opts[:size]
|
35
|
+
cmp = :==
|
36
|
+
case s[0].chr
|
37
|
+
when '-'
|
38
|
+
cmp = :<
|
39
|
+
when '+'
|
40
|
+
cmp = :>
|
41
|
+
end
|
42
|
+
|
43
|
+
multi = 1
|
44
|
+
case s[-1].chr.upcase
|
45
|
+
when 'K'
|
46
|
+
multi = 1024
|
47
|
+
when 'M'
|
48
|
+
multi = 1024 * 1024
|
49
|
+
when 'G'
|
50
|
+
multi = 1024 * 1024 * 1024
|
51
|
+
when 'T'
|
52
|
+
multi = 1024 * 1024 * 1024 * 1024
|
53
|
+
when 'P'
|
54
|
+
multi = 1024 * 1024 * 1024 * 1024 * 1024
|
55
|
+
end
|
56
|
+
filter_size = s.to_i.abs * multi
|
57
|
+
|
58
|
+
return size.send(cmp, filter_size)
|
59
|
+
end
|
60
|
+
|
61
|
+
# filter by replication count using unix find -size numbering scheme
|
62
|
+
def filter_repl repl
|
63
|
+
return true if not @opts[:repl]
|
64
|
+
|
65
|
+
r = @opts[:repl]
|
66
|
+
cmp = :==
|
67
|
+
case r[0].chr
|
68
|
+
when '-'
|
69
|
+
cmp = :<
|
70
|
+
when '+'
|
71
|
+
cmp = :>
|
72
|
+
end
|
73
|
+
|
74
|
+
filter_repl = r.to_i.abs
|
75
|
+
|
76
|
+
return repl.send(cmp, filter_repl)
|
77
|
+
end
|
78
|
+
|
79
|
+
def filter_mtime mtime
|
80
|
+
mtime_filters = [:before, :after, :mmin, :mtime]
|
81
|
+
return true if (mtime_filters & @opts.keys).empty?
|
82
|
+
|
83
|
+
dt_regexp = /\A(\d{4})-(\d{2})-(\d{2})/
|
84
|
+
|
85
|
+
if @opts[:before]
|
86
|
+
match = dt_regexp.match @opts[:before]
|
87
|
+
if match
|
88
|
+
m = Time.new(match[1], match[2], match[3]).to_i
|
89
|
+
else
|
90
|
+
raise 'Invalid Date Representation'
|
91
|
+
end
|
92
|
+
#puts "#{mtime} vs #{m}"
|
93
|
+
if mtime < m
|
94
|
+
return true
|
95
|
+
else
|
96
|
+
return false
|
97
|
+
end
|
98
|
+
elsif @opts[:after]
|
99
|
+
match = dt_regexp.match @opts[:after]
|
100
|
+
if match
|
101
|
+
m = Time.new(match[1], match[2], match[3]).to_i
|
102
|
+
else
|
103
|
+
raise 'Invalid Date Representation'
|
104
|
+
end
|
105
|
+
#puts "#{mtime} vs #{m}"
|
106
|
+
if mtime > m
|
107
|
+
return true
|
108
|
+
else
|
109
|
+
return false
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
m = 0
|
114
|
+
if @opts[:mmin]
|
115
|
+
m = @opts[:mmin].to_i * 60
|
116
|
+
elsif @opts[:mtime]
|
117
|
+
m = @opts[:mtime].to_i * 86400
|
118
|
+
end
|
119
|
+
|
120
|
+
cmp = :==
|
121
|
+
if m < 0
|
122
|
+
cmp = :>
|
123
|
+
elsif m > 0
|
124
|
+
cmp = :<
|
125
|
+
end
|
126
|
+
|
127
|
+
filter_mtime = Time.now.to_i - m.abs.to_i
|
128
|
+
|
129
|
+
#puts "#{mtime} vs #{filter_mtime} #{m}"
|
130
|
+
return mtime.send(cmp, filter_mtime)
|
131
|
+
end
|
132
|
+
|
133
|
+
# print out one line of info for a filestatus object
|
134
|
+
def display f
|
135
|
+
type = f.dir? ? 'd' : 'f'
|
136
|
+
return if @opts[:type] and @opts[:type] != type
|
137
|
+
|
138
|
+
size = f.len
|
139
|
+
return if not filter_size size
|
140
|
+
|
141
|
+
repl = f.replication
|
142
|
+
return if not filter_repl repl
|
143
|
+
|
144
|
+
mtime = Time.at(f.modification_time / 1000).to_i
|
145
|
+
return if not filter_mtime mtime
|
146
|
+
|
147
|
+
if @opts[:uri]
|
148
|
+
path = f.path.to_s
|
149
|
+
else
|
150
|
+
path = f.path.to_uri.path
|
151
|
+
end
|
152
|
+
path = "#{path}/" if f.dir?
|
153
|
+
|
154
|
+
if not @opts[:ls]
|
155
|
+
puts path
|
156
|
+
return
|
157
|
+
end
|
158
|
+
|
159
|
+
if @opts[:human]
|
160
|
+
if size > 1125899906842624
|
161
|
+
size = "#{size / 1125899906842624}P"
|
162
|
+
elsif size > 1099511627776
|
163
|
+
size = "#{size / 1099511627776}T"
|
164
|
+
elsif size > 1073741824
|
165
|
+
size = "#{size / 1073741824}G"
|
166
|
+
elsif size > 1048576
|
167
|
+
size = "#{size / 1048576}M"
|
168
|
+
elsif size > 1024
|
169
|
+
size = "#{size / 1024}K"
|
170
|
+
else
|
171
|
+
size = "#{size}B"
|
172
|
+
end
|
173
|
+
size = '%4s' % size
|
174
|
+
else
|
175
|
+
size = '%12s' % size
|
176
|
+
end
|
177
|
+
|
178
|
+
type = f.dir? ? 'd' : '-'
|
179
|
+
repl = f.replication > 0 ? f.replication : '-'
|
180
|
+
mtime = Time.at(f.modification_time / 1000).strftime '%Y-%m-%d %H:%M:%S'
|
181
|
+
perm = f.permission.to_s.strip
|
182
|
+
puts '%s%s %s %-8s %-16s %s %s %s' %
|
183
|
+
[type, perm, repl, f.owner, f.group, size, mtime, path]
|
184
|
+
end
|
185
|
+
|
186
|
+
def find
|
187
|
+
@fs.glob_status(@path).each {|s| walk(s) {|f| display f}}
|
188
|
+
end
|
189
|
+
|
190
|
+
def walk fstat
|
191
|
+
yield fstat
|
192
|
+
|
193
|
+
return if not fstat.dir?
|
194
|
+
|
195
|
+
@fs.list_status(fstat.path).each {|s| walk(s) {|f| yield f}}
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def usage
|
200
|
+
puts <<-EOF
|
201
|
+
usage: hfind [options] path
|
202
|
+
-H, --help
|
203
|
+
-a, --after # files modified after ISO date
|
204
|
+
-b, --before # files modified before ISO date
|
205
|
+
-m, --mmin # files modified before (-x) or after (+x) minutes ago
|
206
|
+
-M, --mtime # files modified before (-x) or after (+x) days ago
|
207
|
+
-s, --size # file size > (+x), < (-x), or == (x)
|
208
|
+
-r, --repl # replication factor > (+x), < (-x), or == (x)
|
209
|
+
-U, --under # show under-replicated files
|
210
|
+
-t, --type # show type (f)ile or (d)irectory
|
211
|
+
-l, --ls # show full listing detail
|
212
|
+
-h, --human # show human readable file sizes
|
213
|
+
-u, --uri # show full uri for path
|
214
|
+
EOF
|
215
|
+
end
|
216
|
+
|
217
|
+
# main
|
218
|
+
|
219
|
+
opts = {}
|
220
|
+
|
221
|
+
gopts = GetoptLong.new(
|
222
|
+
[ '--size', '-s', GetoptLong::REQUIRED_ARGUMENT ],
|
223
|
+
[ '--repl', '-r', GetoptLong::REQUIRED_ARGUMENT ],
|
224
|
+
[ '--after', '-a', GetoptLong::REQUIRED_ARGUMENT ],
|
225
|
+
[ '--before', '-b', GetoptLong::REQUIRED_ARGUMENT ],
|
226
|
+
[ '--mmin', '-m', GetoptLong::REQUIRED_ARGUMENT ],
|
227
|
+
[ '--mtime', '-M', GetoptLong::REQUIRED_ARGUMENT ],
|
228
|
+
[ '--type', '-t', GetoptLong::REQUIRED_ARGUMENT ],
|
229
|
+
[ '--ls', '-l', GetoptLong::NO_ARGUMENT ],
|
230
|
+
[ '--uri', '-u', GetoptLong::NO_ARGUMENT ],
|
231
|
+
[ '--under', '-U', GetoptLong::NO_ARGUMENT ],
|
232
|
+
[ '--human', '-h', GetoptLong::NO_ARGUMENT ],
|
233
|
+
[ '--help', '-H', GetoptLong::NO_ARGUMENT ],
|
234
|
+
)
|
235
|
+
|
236
|
+
gopts.each do |opt, arg|
|
237
|
+
case opt
|
238
|
+
when '--after'
|
239
|
+
opts[:after] = arg
|
240
|
+
when '--before'
|
241
|
+
opts[:before] = arg
|
242
|
+
when '--mmin'
|
243
|
+
opts[:mmin] = arg
|
244
|
+
when '--mtime'
|
245
|
+
opts[:mtime] = arg
|
246
|
+
when '--size'
|
247
|
+
opts[:size] = arg
|
248
|
+
when '--repl'
|
249
|
+
opts[:repl] = arg
|
250
|
+
when '--type'
|
251
|
+
opts[:type] = arg
|
252
|
+
when '--human'
|
253
|
+
opts[:human] = true
|
254
|
+
when '--ls'
|
255
|
+
opts[:ls] = true
|
256
|
+
when '--under'
|
257
|
+
opts[:under] = true
|
258
|
+
when '--uri'
|
259
|
+
opts[:uri] = true
|
260
|
+
else
|
261
|
+
usage
|
262
|
+
exit 1
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
uri = ARGV[0] or (usage ; exit 1)
|
267
|
+
|
268
|
+
hf = HadoopFSFinder.new uri, opts
|
269
|
+
hf.find rescue STDERR.puts "error: could not process #{uri}"
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hadoop-find
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: java
|
11
|
+
authors:
|
12
|
+
- Frank Fejes
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-07-02 00:00:00 -05:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: |-
|
22
|
+
A file listing utility for HDFS filesystems similar to unix find(1).
|
23
|
+
Requires jruby 1.6+.
|
24
|
+
email: frank@fejes.net
|
25
|
+
executables:
|
26
|
+
- hfind
|
27
|
+
- hfind.rb
|
28
|
+
extensions: []
|
29
|
+
|
30
|
+
extra_rdoc_files: []
|
31
|
+
|
32
|
+
files:
|
33
|
+
- README
|
34
|
+
- CHANGELOG
|
35
|
+
- bin/hfind
|
36
|
+
- bin/hfind.rb
|
37
|
+
has_rdoc: true
|
38
|
+
homepage: https://github.com/fsfiii/hadoop-find
|
39
|
+
licenses: []
|
40
|
+
|
41
|
+
post_install_message: |
|
42
|
+
===
|
43
|
+
Please be sure to install with:
|
44
|
+
|
45
|
+
jgem install --no-wrapper hadoop-find
|
46
|
+
===
|
47
|
+
|
48
|
+
rdoc_options: []
|
49
|
+
|
50
|
+
require_paths:
|
51
|
+
- lib
|
52
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
segments:
|
58
|
+
- 0
|
59
|
+
version: "0"
|
60
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
segments:
|
66
|
+
- 0
|
67
|
+
version: "0"
|
68
|
+
requirements: []
|
69
|
+
|
70
|
+
rubyforge_project: hadoop-find
|
71
|
+
rubygems_version: 1.3.7
|
72
|
+
signing_key:
|
73
|
+
specification_version: 3
|
74
|
+
summary: jruby file listing utility for HDFS filesystems similar to unix find(1).
|
75
|
+
test_files: []
|
76
|
+
|