swineherd 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +201 -0
- data/README.textile +207 -0
- data/Rakefile +30 -0
- data/VERSION +1 -0
- data/bin/hadoop-stream +35 -0
- data/bin/hdp-tree +26 -0
- data/examples/pagerank/data/seinfeld_network.tsv +429 -0
- data/examples/pagerank/pagerank.rb +99 -0
- data/examples/pagerank/scripts/cut_off_list.rb +16 -0
- data/examples/pagerank/scripts/histogram.R +5 -0
- data/examples/pagerank/scripts/pagerank.pig +20 -0
- data/examples/pagerank/scripts/pagerank_initialize.pig +24 -0
- data/lib/swineherd.rb +11 -0
- data/lib/swineherd/filesystem.rb +26 -0
- data/lib/swineherd/filesystem/README_filesystem.textile +47 -0
- data/lib/swineherd/filesystem/basefilesystem.rb +125 -0
- data/lib/swineherd/filesystem/filesystems.rb +103 -0
- data/lib/swineherd/filesystem/hadoopfilesystem.rb +263 -0
- data/lib/swineherd/filesystem/localfilesystem.rb +83 -0
- data/lib/swineherd/filesystem/localfs.rb +11 -0
- data/lib/swineherd/filesystem/s3filesystem.rb +249 -0
- data/lib/swineherd/script.rb +74 -0
- data/lib/swineherd/script/hadoop_script.rb +59 -0
- data/lib/swineherd/script/pig_script.rb +46 -0
- data/lib/swineherd/script/r_script.rb +14 -0
- data/lib/swineherd/script/wukong_script.rb +31 -0
- data/lib/swineherd/template.rb +45 -0
- data/lib/swineherd/workflow.rb +53 -0
- data/lib/swineherd/workflow/job.rb +60 -0
- data/notes.txt +20 -0
- data/swineherd.gemspec +97 -0
- data/tests/test_filesystem.rb +105 -0
- data/tests/test_s3_filesystem.rb +132 -0
- data/tests/testcfg.yaml +7 -0
- metadata +204 -0
@@ -0,0 +1,105 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH << 'lib'
|
4
|
+
require 'swineherd/filesystem' ; include Swineherd
|
5
|
+
require 'rubygems'
|
6
|
+
require 'yaml'
|
7
|
+
require 'rspec'
|
8
|
+
|
9
|
+
options = YAML.load(File.read(File.dirname(__FILE__)+'/testcfg.yaml'))
|
10
|
+
current_test = options['filesystem_to_test']
|
11
|
+
describe "A new filesystem" do
|
12
|
+
|
13
|
+
before do
|
14
|
+
@test_path = "/tmp/rspec/test_path"
|
15
|
+
@test_path2 = "/tmp/rspec/test_path2"
|
16
|
+
@fs = Swineherd::FileSystem.get(current_test)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should implement exists?" do
|
20
|
+
[true, false].should include(@fs.exists?(@test_path))
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should be able to create a path" do
|
24
|
+
@fs.mkpath(@test_path)
|
25
|
+
@fs.exists?(@test_path).should eql(true)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should be able to remove a path" do
|
29
|
+
@fs.mkpath(@test_path)
|
30
|
+
@fs.rm(@test_path)
|
31
|
+
@fs.exists?(@test_path).should eql(false)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should be able to copy paths" do
|
35
|
+
@fs.mkpath(@test_path)
|
36
|
+
@fs.cp(@test_path, @test_path2)
|
37
|
+
@fs.exists?(@test_path2).should eql(true)
|
38
|
+
@fs.rm(@test_path)
|
39
|
+
@fs.rm(@test_path2)
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should be able to move paths" do
|
43
|
+
@fs.mkpath(@test_path)
|
44
|
+
@fs.mv(@test_path, @test_path2)
|
45
|
+
@fs.exists?(@test_path).should eql(false)
|
46
|
+
@fs.exists?(@test_path2).should eql(true)
|
47
|
+
@fs.rm(@test_path2)
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should return a sane path type" do
|
51
|
+
@fs.mkpath(@test_path)
|
52
|
+
["file", "directory", "symlink", "unknown"].should include(@fs.type(@test_path))
|
53
|
+
@fs.rm(@test_path)
|
54
|
+
end
|
55
|
+
|
56
|
+
it "can return an array of directory entries" do
|
57
|
+
sub_paths = ["a", "b", "c"]
|
58
|
+
sub_paths.each do |sub_path|
|
59
|
+
@fs.mkpath(File.join(@test_path, sub_path))
|
60
|
+
end
|
61
|
+
@fs.entries(@test_path).class.should eql(Array)
|
62
|
+
@fs.entries(@test_path).map{|path| File.basename(path)}.reject{|x| x =~ /\./}.sort.should eql(sub_paths.sort)
|
63
|
+
@fs.rm(@test_path)
|
64
|
+
end
|
65
|
+
|
66
|
+
it "can answer to open with a writable file object" do
|
67
|
+
fileobj = @fs.open(@test_path, 'w')
|
68
|
+
fileobj.should respond_to :write
|
69
|
+
@fs.rm(@test_path)
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
describe "A new file" do
|
75
|
+
before do
|
76
|
+
@test_path = "/tmp/rspec/test_path"
|
77
|
+
@test_path2 = "/tmp/rspec/test_path2"
|
78
|
+
@test_string = "@('_')@"
|
79
|
+
@fs = Swineherd::FileSystem.get(current_test)
|
80
|
+
end
|
81
|
+
|
82
|
+
it "should be closeable" do
|
83
|
+
@fs.open(@test_path, 'w').close
|
84
|
+
end
|
85
|
+
|
86
|
+
it "should be writeable" do
|
87
|
+
fileobj = @fs.open(@test_path, 'w')
|
88
|
+
fileobj.write(@test_string)
|
89
|
+
fileobj.close
|
90
|
+
@fs.rm(@test_path)
|
91
|
+
end
|
92
|
+
|
93
|
+
it "should be readable" do
|
94
|
+
|
95
|
+
fileobjw = @fs.open(@test_path, 'w')
|
96
|
+
fileobjw.write(@test_string)
|
97
|
+
fileobjw.close
|
98
|
+
|
99
|
+
fileobjr = @fs.open(@test_path, 'r')
|
100
|
+
fileobjr.read.should eql(@test_string)
|
101
|
+
|
102
|
+
@fs.rm(@test_path)
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# These tests cannot possibly pass unless you have an amazon account with proper
|
5
|
+
# credentials. Furthermore, you definitely want a test bucket to play with. In
|
6
|
+
# this set of mock tests I've called it 'test-bucket' which will certainly get
|
7
|
+
# you and 'access-denied' error. Also, despite all that, 4 tests (see below)
|
8
|
+
# will fail outright.
|
9
|
+
#
|
10
|
+
# This one has to break the rules slightly because amazon-s3 is not actually a
|
11
|
+
# filesystem implementation. There's no such thing as a 'path' and so the following
|
12
|
+
# tests will fail:
|
13
|
+
#
|
14
|
+
# 1. it "should be able to create a path" (path wont exist but it's ok, thats what
|
15
|
+
# we expect)
|
16
|
+
#
|
17
|
+
# 2. it "should be able to copy paths" (it can't create paths that aren't files
|
18
|
+
# and so we expect this to fail, again it's ok.)
|
19
|
+
#
|
20
|
+
# 3. it "should be able to move paths" (it can't create paths that aren't files
|
21
|
+
# and so we expect this to fail, again it's ok.)
|
22
|
+
#
|
23
|
+
# 4. it "can return an array of directory entries" (ditto)
|
24
|
+
#
|
25
|
+
# Note: If one were to rewrite the above tests to use existing paths on s3 then the
|
26
|
+
# tests will succeed. Try it.
|
27
|
+
#
|
28
|
+
|
29
|
+
|
30
|
+
$LOAD_PATH << 'lib'
|
31
|
+
require 'swineherd/filesystem' ; include Swineherd
|
32
|
+
require 'rubygems'
|
33
|
+
require 'yaml'
|
34
|
+
require 'rspec'
|
35
|
+
|
36
|
+
options = YAML.load(File.read(File.dirname(__FILE__)+'/testcfg.yaml'))
|
37
|
+
current_test = 's3'
|
38
|
+
describe "A new filesystem" do
|
39
|
+
|
40
|
+
before do
|
41
|
+
@test_path = "#{options['s3_test_bucket']}/tmp/rspec/test_path"
|
42
|
+
@test_path2 = "#{options['s3_test_bucket']}/tmp/rspec/test_path2"
|
43
|
+
@fs = Swineherd::FileSystem.get(current_test, options['aws_access_key_id'], options['aws_secret_access_key'])
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should implement exists?" do
|
47
|
+
[true, false].should include(@fs.exists?(@test_path))
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should be able to create a path" do
|
51
|
+
@fs.mkpath(@test_path)
|
52
|
+
@fs.exists?(@test_path).should eql(true)
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should be able to remove a path" do
|
56
|
+
@fs.mkpath(@test_path)
|
57
|
+
@fs.rm(@test_path)
|
58
|
+
@fs.exists?(@test_path).should eql(false)
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should be able to copy paths" do
|
62
|
+
@fs.mkpath(@test_path)
|
63
|
+
@fs.cp(@test_path, @test_path2)
|
64
|
+
@fs.exists?(@test_path2).should eql(true)
|
65
|
+
@fs.rm(@test_path)
|
66
|
+
@fs.rm(@test_path2)
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should be able to move paths" do
|
70
|
+
@fs.mkpath(@test_path)
|
71
|
+
@fs.mv(@test_path, @test_path2)
|
72
|
+
@fs.exists?(@test_path).should eql(false)
|
73
|
+
@fs.exists?(@test_path2).should eql(true)
|
74
|
+
@fs.rm(@test_path2)
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should return a sane path type" do
|
78
|
+
@fs.mkpath(@test_path)
|
79
|
+
["file", "directory", "symlink", "unknown"].should include(@fs.type(@test_path))
|
80
|
+
@fs.rm(@test_path)
|
81
|
+
end
|
82
|
+
|
83
|
+
it "can return an array of directory entries" do
|
84
|
+
sub_paths = ["a", "b", "c"]
|
85
|
+
sub_paths.each do |sub_path|
|
86
|
+
@fs.mkpath(File.join(@test_path, sub_path))
|
87
|
+
end
|
88
|
+
@fs.entries(@test_path).class.should eql(Array)
|
89
|
+
@fs.entries(@test_path).map{|path| File.basename(path)}.reject{|x| x =~ /\./}.sort.should eql(sub_paths.sort)
|
90
|
+
@fs.rm(@test_path)
|
91
|
+
end
|
92
|
+
|
93
|
+
it "can answer to open with a writable file object" do
|
94
|
+
fileobj = @fs.open(@test_path, 'w')
|
95
|
+
fileobj.should respond_to :write
|
96
|
+
@fs.rm(@test_path)
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
|
101
|
+
describe "A new file" do
|
102
|
+
before do
|
103
|
+
@test_path = "#{options['s3_test_bucket']}/tmp/rspec/test_path"
|
104
|
+
@test_path2 = "#{options['s3_test_bucket']}/test_path2"
|
105
|
+
@test_string = "@('_')@"
|
106
|
+
@fs = Swineherd::FileSystem.get(current_test, options['aws_access_key_id'], options['aws_secret_access_key'])
|
107
|
+
end
|
108
|
+
|
109
|
+
it "should be closeable" do
|
110
|
+
@fs.open(@test_path, 'w').close
|
111
|
+
end
|
112
|
+
|
113
|
+
it "should be writeable" do
|
114
|
+
fileobj = @fs.open(@test_path, 'w')
|
115
|
+
fileobj.write(@test_string)
|
116
|
+
fileobj.close
|
117
|
+
@fs.rm(@test_path)
|
118
|
+
end
|
119
|
+
|
120
|
+
it "should be readable" do
|
121
|
+
|
122
|
+
fileobjw = @fs.open(@test_path, 'w')
|
123
|
+
fileobjw.write(@test_string)
|
124
|
+
fileobjw.close
|
125
|
+
|
126
|
+
fileobjr = @fs.open(@test_path, 'r')
|
127
|
+
fileobjr.read.should eql(@test_string)
|
128
|
+
|
129
|
+
@fs.rm(@test_path)
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
data/tests/testcfg.yaml
ADDED
metadata
ADDED
@@ -0,0 +1,204 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: swineherd
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Jacob Perkins
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-04-20 00:00:00 -05:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: yard
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 7
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
- 6
|
33
|
+
- 0
|
34
|
+
version: 0.6.0
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: jeweler
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 7
|
46
|
+
segments:
|
47
|
+
- 1
|
48
|
+
- 5
|
49
|
+
- 2
|
50
|
+
version: 1.5.2
|
51
|
+
type: :development
|
52
|
+
version_requirements: *id002
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: rcov
|
55
|
+
prerelease: false
|
56
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
hash: 3
|
62
|
+
segments:
|
63
|
+
- 0
|
64
|
+
version: "0"
|
65
|
+
type: :development
|
66
|
+
version_requirements: *id003
|
67
|
+
- !ruby/object:Gem::Dependency
|
68
|
+
name: configliere
|
69
|
+
prerelease: false
|
70
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
hash: 3
|
76
|
+
segments:
|
77
|
+
- 0
|
78
|
+
version: "0"
|
79
|
+
type: :runtime
|
80
|
+
version_requirements: *id004
|
81
|
+
- !ruby/object:Gem::Dependency
|
82
|
+
name: gorillib
|
83
|
+
prerelease: false
|
84
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
85
|
+
none: false
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
hash: 3
|
90
|
+
segments:
|
91
|
+
- 0
|
92
|
+
version: "0"
|
93
|
+
type: :runtime
|
94
|
+
version_requirements: *id005
|
95
|
+
- !ruby/object:Gem::Dependency
|
96
|
+
name: erubis
|
97
|
+
prerelease: false
|
98
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
hash: 3
|
104
|
+
segments:
|
105
|
+
- 0
|
106
|
+
version: "0"
|
107
|
+
type: :runtime
|
108
|
+
version_requirements: *id006
|
109
|
+
- !ruby/object:Gem::Dependency
|
110
|
+
name: right_aws
|
111
|
+
prerelease: false
|
112
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
hash: 3
|
118
|
+
segments:
|
119
|
+
- 0
|
120
|
+
version: "0"
|
121
|
+
type: :runtime
|
122
|
+
version_requirements: *id007
|
123
|
+
description: Swineherd is for running scripts and workflows on filesystems.
|
124
|
+
email: jacob.a.perkins@gmail.com
|
125
|
+
executables:
|
126
|
+
- hdp-tree
|
127
|
+
- hadoop-stream
|
128
|
+
extensions: []
|
129
|
+
|
130
|
+
extra_rdoc_files:
|
131
|
+
- LICENSE
|
132
|
+
- README.textile
|
133
|
+
files:
|
134
|
+
- LICENSE
|
135
|
+
- README.textile
|
136
|
+
- Rakefile
|
137
|
+
- VERSION
|
138
|
+
- bin/hadoop-stream
|
139
|
+
- bin/hdp-tree
|
140
|
+
- examples/pagerank/data/seinfeld_network.tsv
|
141
|
+
- examples/pagerank/pagerank.rb
|
142
|
+
- examples/pagerank/scripts/cut_off_list.rb
|
143
|
+
- examples/pagerank/scripts/histogram.R
|
144
|
+
- examples/pagerank/scripts/pagerank.pig
|
145
|
+
- examples/pagerank/scripts/pagerank_initialize.pig
|
146
|
+
- lib/swineherd.rb
|
147
|
+
- lib/swineherd/filesystem.rb
|
148
|
+
- lib/swineherd/filesystem/README_filesystem.textile
|
149
|
+
- lib/swineherd/filesystem/basefilesystem.rb
|
150
|
+
- lib/swineherd/filesystem/filesystems.rb
|
151
|
+
- lib/swineherd/filesystem/hadoopfilesystem.rb
|
152
|
+
- lib/swineherd/filesystem/localfilesystem.rb
|
153
|
+
- lib/swineherd/filesystem/localfs.rb
|
154
|
+
- lib/swineherd/filesystem/s3filesystem.rb
|
155
|
+
- lib/swineherd/script.rb
|
156
|
+
- lib/swineherd/script/hadoop_script.rb
|
157
|
+
- lib/swineherd/script/pig_script.rb
|
158
|
+
- lib/swineherd/script/r_script.rb
|
159
|
+
- lib/swineherd/script/wukong_script.rb
|
160
|
+
- lib/swineherd/template.rb
|
161
|
+
- lib/swineherd/workflow.rb
|
162
|
+
- lib/swineherd/workflow/job.rb
|
163
|
+
- notes.txt
|
164
|
+
- swineherd.gemspec
|
165
|
+
- tests/test_filesystem.rb
|
166
|
+
- tests/test_s3_filesystem.rb
|
167
|
+
- tests/testcfg.yaml
|
168
|
+
has_rdoc: true
|
169
|
+
homepage: http://github.com/Ganglion/swineherd
|
170
|
+
licenses:
|
171
|
+
- MIT
|
172
|
+
post_install_message:
|
173
|
+
rdoc_options: []
|
174
|
+
|
175
|
+
require_paths:
|
176
|
+
- lib
|
177
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
178
|
+
none: false
|
179
|
+
requirements:
|
180
|
+
- - ">="
|
181
|
+
- !ruby/object:Gem::Version
|
182
|
+
hash: 3
|
183
|
+
segments:
|
184
|
+
- 0
|
185
|
+
version: "0"
|
186
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
187
|
+
none: false
|
188
|
+
requirements:
|
189
|
+
- - ">="
|
190
|
+
- !ruby/object:Gem::Version
|
191
|
+
hash: 3
|
192
|
+
segments:
|
193
|
+
- 0
|
194
|
+
version: "0"
|
195
|
+
requirements: []
|
196
|
+
|
197
|
+
rubyforge_project:
|
198
|
+
rubygems_version: 1.3.7
|
199
|
+
signing_key:
|
200
|
+
specification_version: 3
|
201
|
+
summary: Flexible data workflow glue.
|
202
|
+
test_files:
|
203
|
+
- examples/pagerank/pagerank.rb
|
204
|
+
- examples/pagerank/scripts/cut_off_list.rb
|