swineherd 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/swineherd.rb +3 -0
- data/lib/swineherd/filesystem/basefilesystem.rb +6 -0
- data/lib/swineherd/filesystem/hadoopfilesystem.rb +15 -0
- data/lib/swineherd/filesystem/localfilesystem.rb +7 -0
- data/lib/swineherd/filesystem/s3filesystem.rb +18 -0
- data/lib/swineherd/foo +1 -0
- data/lib/swineherd/script.rb +1 -0
- data/lib/swineherd/workflow.rb +1 -1
- data/swineherd.gemspec +4 -4
- data/tests/test_filesystem.rb +13 -2
- data/tests/test_s3_filesystem.rb +13 -2
- metadata +7 -6
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
data/lib/swineherd.rb
CHANGED
@@ -32,7 +32,22 @@ module Swineherd
|
|
32
32
|
HadoopFile.new(path,mode,self,&blk)
|
33
33
|
end
|
34
34
|
|
35
|
+
def size path
|
36
|
+
lr(path).inject(0){|sz, f| sz += @hdfs.get_file_status(Path.new(f)).get_len}
|
37
|
+
end
|
35
38
|
|
39
|
+
#
|
40
|
+
# Recursively list paths
|
41
|
+
#
|
42
|
+
def lr path
|
43
|
+
paths = entries(path)
|
44
|
+
if (paths && !paths.empty?)
|
45
|
+
paths.map{|e| lr(e)}.flatten
|
46
|
+
else
|
47
|
+
path
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
36
51
|
def rm path
|
37
52
|
@hdfs.delete(Path.new(path), true)
|
38
53
|
[path]
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'fileutils'
|
2
|
+
require 'find'
|
2
3
|
module Swineherd
|
3
4
|
|
4
5
|
class LocalFileSystem
|
@@ -12,6 +13,12 @@ module Swineherd
|
|
12
13
|
return LocalFile.new path, mode, &blk
|
13
14
|
end
|
14
15
|
|
16
|
+
def size path
|
17
|
+
sz = 0
|
18
|
+
Find.find(path){|f| sz += File.size(f)}
|
19
|
+
sz
|
20
|
+
end
|
21
|
+
|
15
22
|
def rm path
|
16
23
|
FileUtils.rm_r path
|
17
24
|
end
|
@@ -22,6 +22,24 @@ module Swineherd
|
|
22
22
|
S3File.new(path,mode,self,&blk)
|
23
23
|
end
|
24
24
|
|
25
|
+
def size path
|
26
|
+
sz = 0
|
27
|
+
if type(path) == "directory"
|
28
|
+
lr(path).each do |f|
|
29
|
+
sz += file_size(f)
|
30
|
+
end
|
31
|
+
else
|
32
|
+
sz += file_size(path)
|
33
|
+
end
|
34
|
+
sz
|
35
|
+
end
|
36
|
+
|
37
|
+
def file_size path
|
38
|
+
containing_bucket = bucket(path)
|
39
|
+
header = @s3.interface.head(containing_bucket, key_path(path))
|
40
|
+
header['content-length'].to_i
|
41
|
+
end
|
42
|
+
|
25
43
|
def rm path
|
26
44
|
bkt = bucket(path)
|
27
45
|
key = key_path(path)
|
data/lib/swineherd/foo
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
@('_')@
|
data/lib/swineherd/script.rb
CHANGED
data/lib/swineherd/workflow.rb
CHANGED
data/swineherd.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{swineherd}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Jacob Perkins"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-06-01}
|
13
13
|
s.description = %q{Swineherd is for running scripts and workflows on filesystems.}
|
14
14
|
s.email = %q{jacob.a.perkins@gmail.com}
|
15
15
|
s.executables = ["hdp-tree", "hadoop-stream"]
|
@@ -39,6 +39,7 @@ Gem::Specification.new do |s|
|
|
39
39
|
"lib/swineherd/filesystem/localfilesystem.rb",
|
40
40
|
"lib/swineherd/filesystem/localfs.rb",
|
41
41
|
"lib/swineherd/filesystem/s3filesystem.rb",
|
42
|
+
"lib/swineherd/foo",
|
42
43
|
"lib/swineherd/script.rb",
|
43
44
|
"lib/swineherd/script/hadoop_script.rb",
|
44
45
|
"lib/swineherd/script/pig_script.rb",
|
@@ -56,7 +57,7 @@ Gem::Specification.new do |s|
|
|
56
57
|
s.homepage = %q{http://github.com/Ganglion/swineherd}
|
57
58
|
s.licenses = ["MIT"]
|
58
59
|
s.require_paths = ["lib"]
|
59
|
-
s.rubygems_version = %q{1.
|
60
|
+
s.rubygems_version = %q{1.4.2}
|
60
61
|
s.summary = %q{Flexible data workflow glue.}
|
61
62
|
s.test_files = [
|
62
63
|
"examples/pagerank/pagerank.rb",
|
@@ -64,7 +65,6 @@ Gem::Specification.new do |s|
|
|
64
65
|
]
|
65
66
|
|
66
67
|
if s.respond_to? :specification_version then
|
67
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
68
68
|
s.specification_version = 3
|
69
69
|
|
70
70
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
data/tests/test_filesystem.rb
CHANGED
@@ -11,8 +11,9 @@ current_test = options['filesystem_to_test']
|
|
11
11
|
describe "A new filesystem" do
|
12
12
|
|
13
13
|
before do
|
14
|
-
@test_path
|
15
|
-
@test_path2
|
14
|
+
@test_path = "/tmp/rspec/test_path"
|
15
|
+
@test_path2 = "/tmp/rspec/test_path2"
|
16
|
+
@test_string = "@('_')@"
|
16
17
|
@fs = Swineherd::FileSystem.get(current_test)
|
17
18
|
end
|
18
19
|
|
@@ -31,6 +32,16 @@ describe "A new filesystem" do
|
|
31
32
|
@fs.exists?(@test_path).should eql(false)
|
32
33
|
end
|
33
34
|
|
35
|
+
it "should implement size" do
|
36
|
+
@fs.mkpath(File.dirname(@test_path))
|
37
|
+
fileobj = @fs.open(@test_path, 'w')
|
38
|
+
fileobj.write(@test_string)
|
39
|
+
fileobj.close
|
40
|
+
7.should eql(@fs.size(@test_path))
|
41
|
+
@fs.rm(@test_path)
|
42
|
+
@fs.rm(File.dirname(@test_path))
|
43
|
+
end
|
44
|
+
|
34
45
|
it "should be able to copy paths" do
|
35
46
|
@fs.mkpath(@test_path)
|
36
47
|
@fs.cp(@test_path, @test_path2)
|
data/tests/test_s3_filesystem.rb
CHANGED
@@ -38,8 +38,9 @@ current_test = 's3'
|
|
38
38
|
describe "A new filesystem" do
|
39
39
|
|
40
40
|
before do
|
41
|
-
@test_path
|
42
|
-
@test_path2
|
41
|
+
@test_path = "#{options['s3_test_bucket']}/tmp/rspec/test_path"
|
42
|
+
@test_path2 = "#{options['s3_test_bucket']}/tmp/rspec/test_path2"
|
43
|
+
@test_string = "@('_')@"
|
43
44
|
@fs = Swineherd::FileSystem.get(current_test, options['aws_access_key_id'], options['aws_secret_access_key'])
|
44
45
|
end
|
45
46
|
|
@@ -58,6 +59,16 @@ describe "A new filesystem" do
|
|
58
59
|
@fs.exists?(@test_path).should eql(false)
|
59
60
|
end
|
60
61
|
|
62
|
+
it "should implement size" do
|
63
|
+
@fs.mkpath(File.dirname(@test_path))
|
64
|
+
fileobj = @fs.open(@test_path, 'w')
|
65
|
+
fileobj.write(@test_string)
|
66
|
+
fileobj.close
|
67
|
+
7.should eql(@fs.size(@test_path))
|
68
|
+
@fs.rm(@test_path)
|
69
|
+
@fs.rm(File.dirname(@test_path))
|
70
|
+
end
|
71
|
+
|
61
72
|
it "should be able to copy paths" do
|
62
73
|
@fs.mkpath(@test_path)
|
63
74
|
@fs.cp(@test_path, @test_path2)
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: swineherd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jacob Perkins
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-06-01 00:00:00 +00:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -152,6 +152,7 @@ files:
|
|
152
152
|
- lib/swineherd/filesystem/localfilesystem.rb
|
153
153
|
- lib/swineherd/filesystem/localfs.rb
|
154
154
|
- lib/swineherd/filesystem/s3filesystem.rb
|
155
|
+
- lib/swineherd/foo
|
155
156
|
- lib/swineherd/script.rb
|
156
157
|
- lib/swineherd/script/hadoop_script.rb
|
157
158
|
- lib/swineherd/script/pig_script.rb
|
@@ -195,7 +196,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
195
196
|
requirements: []
|
196
197
|
|
197
198
|
rubyforge_project:
|
198
|
-
rubygems_version: 1.
|
199
|
+
rubygems_version: 1.4.2
|
199
200
|
signing_key:
|
200
201
|
specification_version: 3
|
201
202
|
summary: Flexible data workflow glue.
|