swineherd 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/swineherd.rb +3 -0
- data/lib/swineherd/filesystem/basefilesystem.rb +6 -0
- data/lib/swineherd/filesystem/hadoopfilesystem.rb +15 -0
- data/lib/swineherd/filesystem/localfilesystem.rb +7 -0
- data/lib/swineherd/filesystem/s3filesystem.rb +18 -0
- data/lib/swineherd/foo +1 -0
- data/lib/swineherd/script.rb +1 -0
- data/lib/swineherd/workflow.rb +1 -1
- data/swineherd.gemspec +4 -4
- data/tests/test_filesystem.rb +13 -2
- data/tests/test_s3_filesystem.rb +13 -2
- metadata +7 -6
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
data/lib/swineherd.rb
CHANGED
@@ -32,7 +32,22 @@ module Swineherd
|
|
32
32
|
HadoopFile.new(path,mode,self,&blk)
|
33
33
|
end
|
34
34
|
|
35
|
+
def size path
|
36
|
+
lr(path).inject(0){|sz, f| sz += @hdfs.get_file_status(Path.new(f)).get_len}
|
37
|
+
end
|
35
38
|
|
39
|
+
#
|
40
|
+
# Recursively list paths
|
41
|
+
#
|
42
|
+
def lr path
|
43
|
+
paths = entries(path)
|
44
|
+
if (paths && !paths.empty?)
|
45
|
+
paths.map{|e| lr(e)}.flatten
|
46
|
+
else
|
47
|
+
path
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
36
51
|
def rm path
|
37
52
|
@hdfs.delete(Path.new(path), true)
|
38
53
|
[path]
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'fileutils'
|
2
|
+
require 'find'
|
2
3
|
module Swineherd
|
3
4
|
|
4
5
|
class LocalFileSystem
|
@@ -12,6 +13,12 @@ module Swineherd
|
|
12
13
|
return LocalFile.new path, mode, &blk
|
13
14
|
end
|
14
15
|
|
16
|
+
def size path
|
17
|
+
sz = 0
|
18
|
+
Find.find(path){|f| sz += File.size(f)}
|
19
|
+
sz
|
20
|
+
end
|
21
|
+
|
15
22
|
def rm path
|
16
23
|
FileUtils.rm_r path
|
17
24
|
end
|
@@ -22,6 +22,24 @@ module Swineherd
|
|
22
22
|
S3File.new(path,mode,self,&blk)
|
23
23
|
end
|
24
24
|
|
25
|
+
def size path
|
26
|
+
sz = 0
|
27
|
+
if type(path) == "directory"
|
28
|
+
lr(path).each do |f|
|
29
|
+
sz += file_size(f)
|
30
|
+
end
|
31
|
+
else
|
32
|
+
sz += file_size(path)
|
33
|
+
end
|
34
|
+
sz
|
35
|
+
end
|
36
|
+
|
37
|
+
def file_size path
|
38
|
+
containing_bucket = bucket(path)
|
39
|
+
header = @s3.interface.head(containing_bucket, key_path(path))
|
40
|
+
header['content-length'].to_i
|
41
|
+
end
|
42
|
+
|
25
43
|
def rm path
|
26
44
|
bkt = bucket(path)
|
27
45
|
key = key_path(path)
|
data/lib/swineherd/foo
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
@('_')@
|
data/lib/swineherd/script.rb
CHANGED
data/lib/swineherd/workflow.rb
CHANGED
data/swineherd.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{swineherd}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Jacob Perkins"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-06-01}
|
13
13
|
s.description = %q{Swineherd is for running scripts and workflows on filesystems.}
|
14
14
|
s.email = %q{jacob.a.perkins@gmail.com}
|
15
15
|
s.executables = ["hdp-tree", "hadoop-stream"]
|
@@ -39,6 +39,7 @@ Gem::Specification.new do |s|
|
|
39
39
|
"lib/swineherd/filesystem/localfilesystem.rb",
|
40
40
|
"lib/swineherd/filesystem/localfs.rb",
|
41
41
|
"lib/swineherd/filesystem/s3filesystem.rb",
|
42
|
+
"lib/swineherd/foo",
|
42
43
|
"lib/swineherd/script.rb",
|
43
44
|
"lib/swineherd/script/hadoop_script.rb",
|
44
45
|
"lib/swineherd/script/pig_script.rb",
|
@@ -56,7 +57,7 @@ Gem::Specification.new do |s|
|
|
56
57
|
s.homepage = %q{http://github.com/Ganglion/swineherd}
|
57
58
|
s.licenses = ["MIT"]
|
58
59
|
s.require_paths = ["lib"]
|
59
|
-
s.rubygems_version = %q{1.
|
60
|
+
s.rubygems_version = %q{1.4.2}
|
60
61
|
s.summary = %q{Flexible data workflow glue.}
|
61
62
|
s.test_files = [
|
62
63
|
"examples/pagerank/pagerank.rb",
|
@@ -64,7 +65,6 @@ Gem::Specification.new do |s|
|
|
64
65
|
]
|
65
66
|
|
66
67
|
if s.respond_to? :specification_version then
|
67
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
68
68
|
s.specification_version = 3
|
69
69
|
|
70
70
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
data/tests/test_filesystem.rb
CHANGED
@@ -11,8 +11,9 @@ current_test = options['filesystem_to_test']
|
|
11
11
|
describe "A new filesystem" do
|
12
12
|
|
13
13
|
before do
|
14
|
-
@test_path
|
15
|
-
@test_path2
|
14
|
+
@test_path = "/tmp/rspec/test_path"
|
15
|
+
@test_path2 = "/tmp/rspec/test_path2"
|
16
|
+
@test_string = "@('_')@"
|
16
17
|
@fs = Swineherd::FileSystem.get(current_test)
|
17
18
|
end
|
18
19
|
|
@@ -31,6 +32,16 @@ describe "A new filesystem" do
|
|
31
32
|
@fs.exists?(@test_path).should eql(false)
|
32
33
|
end
|
33
34
|
|
35
|
+
it "should implement size" do
|
36
|
+
@fs.mkpath(File.dirname(@test_path))
|
37
|
+
fileobj = @fs.open(@test_path, 'w')
|
38
|
+
fileobj.write(@test_string)
|
39
|
+
fileobj.close
|
40
|
+
7.should eql(@fs.size(@test_path))
|
41
|
+
@fs.rm(@test_path)
|
42
|
+
@fs.rm(File.dirname(@test_path))
|
43
|
+
end
|
44
|
+
|
34
45
|
it "should be able to copy paths" do
|
35
46
|
@fs.mkpath(@test_path)
|
36
47
|
@fs.cp(@test_path, @test_path2)
|
data/tests/test_s3_filesystem.rb
CHANGED
@@ -38,8 +38,9 @@ current_test = 's3'
|
|
38
38
|
describe "A new filesystem" do
|
39
39
|
|
40
40
|
before do
|
41
|
-
@test_path
|
42
|
-
@test_path2
|
41
|
+
@test_path = "#{options['s3_test_bucket']}/tmp/rspec/test_path"
|
42
|
+
@test_path2 = "#{options['s3_test_bucket']}/tmp/rspec/test_path2"
|
43
|
+
@test_string = "@('_')@"
|
43
44
|
@fs = Swineherd::FileSystem.get(current_test, options['aws_access_key_id'], options['aws_secret_access_key'])
|
44
45
|
end
|
45
46
|
|
@@ -58,6 +59,16 @@ describe "A new filesystem" do
|
|
58
59
|
@fs.exists?(@test_path).should eql(false)
|
59
60
|
end
|
60
61
|
|
62
|
+
it "should implement size" do
|
63
|
+
@fs.mkpath(File.dirname(@test_path))
|
64
|
+
fileobj = @fs.open(@test_path, 'w')
|
65
|
+
fileobj.write(@test_string)
|
66
|
+
fileobj.close
|
67
|
+
7.should eql(@fs.size(@test_path))
|
68
|
+
@fs.rm(@test_path)
|
69
|
+
@fs.rm(File.dirname(@test_path))
|
70
|
+
end
|
71
|
+
|
61
72
|
it "should be able to copy paths" do
|
62
73
|
@fs.mkpath(@test_path)
|
63
74
|
@fs.cp(@test_path, @test_path2)
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: swineherd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jacob Perkins
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-06-01 00:00:00 +00:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -152,6 +152,7 @@ files:
|
|
152
152
|
- lib/swineherd/filesystem/localfilesystem.rb
|
153
153
|
- lib/swineherd/filesystem/localfs.rb
|
154
154
|
- lib/swineherd/filesystem/s3filesystem.rb
|
155
|
+
- lib/swineherd/foo
|
155
156
|
- lib/swineherd/script.rb
|
156
157
|
- lib/swineherd/script/hadoop_script.rb
|
157
158
|
- lib/swineherd/script/pig_script.rb
|
@@ -195,7 +196,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
195
196
|
requirements: []
|
196
197
|
|
197
198
|
rubyforge_project:
|
198
|
-
rubygems_version: 1.
|
199
|
+
rubygems_version: 1.4.2
|
199
200
|
signing_key:
|
200
201
|
specification_version: 3
|
201
202
|
summary: Flexible data workflow glue.
|