swineherd 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +201 -0
- data/README.textile +207 -0
- data/Rakefile +30 -0
- data/VERSION +1 -0
- data/bin/hadoop-stream +35 -0
- data/bin/hdp-tree +26 -0
- data/examples/pagerank/data/seinfeld_network.tsv +429 -0
- data/examples/pagerank/pagerank.rb +99 -0
- data/examples/pagerank/scripts/cut_off_list.rb +16 -0
- data/examples/pagerank/scripts/histogram.R +5 -0
- data/examples/pagerank/scripts/pagerank.pig +20 -0
- data/examples/pagerank/scripts/pagerank_initialize.pig +24 -0
- data/lib/swineherd.rb +11 -0
- data/lib/swineherd/filesystem.rb +26 -0
- data/lib/swineherd/filesystem/README_filesystem.textile +47 -0
- data/lib/swineherd/filesystem/basefilesystem.rb +125 -0
- data/lib/swineherd/filesystem/filesystems.rb +103 -0
- data/lib/swineherd/filesystem/hadoopfilesystem.rb +263 -0
- data/lib/swineherd/filesystem/localfilesystem.rb +83 -0
- data/lib/swineherd/filesystem/localfs.rb +11 -0
- data/lib/swineherd/filesystem/s3filesystem.rb +249 -0
- data/lib/swineherd/script.rb +74 -0
- data/lib/swineherd/script/hadoop_script.rb +59 -0
- data/lib/swineherd/script/pig_script.rb +46 -0
- data/lib/swineherd/script/r_script.rb +14 -0
- data/lib/swineherd/script/wukong_script.rb +31 -0
- data/lib/swineherd/template.rb +45 -0
- data/lib/swineherd/workflow.rb +53 -0
- data/lib/swineherd/workflow/job.rb +60 -0
- data/notes.txt +20 -0
- data/swineherd.gemspec +97 -0
- data/tests/test_filesystem.rb +105 -0
- data/tests/test_s3_filesystem.rb +132 -0
- data/tests/testcfg.yaml +7 -0
- metadata +204 -0
@@ -0,0 +1,105 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH << 'lib'
|
4
|
+
require 'swineherd/filesystem' ; include Swineherd
|
5
|
+
require 'rubygems'
|
6
|
+
require 'yaml'
|
7
|
+
require 'rspec'
|
8
|
+
|
9
|
+
options = YAML.load(File.read(File.dirname(__FILE__)+'/testcfg.yaml'))
|
10
|
+
current_test = options['filesystem_to_test']
|
11
|
+
describe "A new filesystem" do
|
12
|
+
|
13
|
+
before do
|
14
|
+
@test_path = "/tmp/rspec/test_path"
|
15
|
+
@test_path2 = "/tmp/rspec/test_path2"
|
16
|
+
@fs = Swineherd::FileSystem.get(current_test)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should implement exists?" do
|
20
|
+
[true, false].should include(@fs.exists?(@test_path))
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should be able to create a path" do
|
24
|
+
@fs.mkpath(@test_path)
|
25
|
+
@fs.exists?(@test_path).should eql(true)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should be able to remove a path" do
|
29
|
+
@fs.mkpath(@test_path)
|
30
|
+
@fs.rm(@test_path)
|
31
|
+
@fs.exists?(@test_path).should eql(false)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should be able to copy paths" do
|
35
|
+
@fs.mkpath(@test_path)
|
36
|
+
@fs.cp(@test_path, @test_path2)
|
37
|
+
@fs.exists?(@test_path2).should eql(true)
|
38
|
+
@fs.rm(@test_path)
|
39
|
+
@fs.rm(@test_path2)
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should be able to move paths" do
|
43
|
+
@fs.mkpath(@test_path)
|
44
|
+
@fs.mv(@test_path, @test_path2)
|
45
|
+
@fs.exists?(@test_path).should eql(false)
|
46
|
+
@fs.exists?(@test_path2).should eql(true)
|
47
|
+
@fs.rm(@test_path2)
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should return a sane path type" do
|
51
|
+
@fs.mkpath(@test_path)
|
52
|
+
["file", "directory", "symlink", "unknown"].should include(@fs.type(@test_path))
|
53
|
+
@fs.rm(@test_path)
|
54
|
+
end
|
55
|
+
|
56
|
+
it "can return an array of directory entries" do
|
57
|
+
sub_paths = ["a", "b", "c"]
|
58
|
+
sub_paths.each do |sub_path|
|
59
|
+
@fs.mkpath(File.join(@test_path, sub_path))
|
60
|
+
end
|
61
|
+
@fs.entries(@test_path).class.should eql(Array)
|
62
|
+
@fs.entries(@test_path).map{|path| File.basename(path)}.reject{|x| x =~ /\./}.sort.should eql(sub_paths.sort)
|
63
|
+
@fs.rm(@test_path)
|
64
|
+
end
|
65
|
+
|
66
|
+
it "can answer to open with a writable file object" do
|
67
|
+
fileobj = @fs.open(@test_path, 'w')
|
68
|
+
fileobj.should respond_to :write
|
69
|
+
@fs.rm(@test_path)
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
describe "A new file" do
|
75
|
+
before do
|
76
|
+
@test_path = "/tmp/rspec/test_path"
|
77
|
+
@test_path2 = "/tmp/rspec/test_path2"
|
78
|
+
@test_string = "@('_')@"
|
79
|
+
@fs = Swineherd::FileSystem.get(current_test)
|
80
|
+
end
|
81
|
+
|
82
|
+
it "should be closeable" do
|
83
|
+
@fs.open(@test_path, 'w').close
|
84
|
+
end
|
85
|
+
|
86
|
+
it "should be writeable" do
|
87
|
+
fileobj = @fs.open(@test_path, 'w')
|
88
|
+
fileobj.write(@test_string)
|
89
|
+
fileobj.close
|
90
|
+
@fs.rm(@test_path)
|
91
|
+
end
|
92
|
+
|
93
|
+
it "should be readable" do
|
94
|
+
|
95
|
+
fileobjw = @fs.open(@test_path, 'w')
|
96
|
+
fileobjw.write(@test_string)
|
97
|
+
fileobjw.close
|
98
|
+
|
99
|
+
fileobjr = @fs.open(@test_path, 'r')
|
100
|
+
fileobjr.read.should eql(@test_string)
|
101
|
+
|
102
|
+
@fs.rm(@test_path)
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# These tests cannot possibly pass unless you have an amazon account with proper
|
5
|
+
# credentials. Furthermore, you definitely want a test bucket to play with. In
|
6
|
+
# this set of mock tests I've called it 'test-bucket' which will certainly get
|
7
|
+
# you and 'access-denied' error. Also, despite all that, 4 tests (see below)
|
8
|
+
# will fail outright.
|
9
|
+
#
|
10
|
+
# This one has to break the rules slightly because amazon-s3 is not actually a
|
11
|
+
# filesystem implementation. There's no such thing as a 'path' and so the following
|
12
|
+
# tests will fail:
|
13
|
+
#
|
14
|
+
# 1. it "should be able to create a path" (path wont exist but it's ok, thats what
|
15
|
+
# we expect)
|
16
|
+
#
|
17
|
+
# 2. it "should be able to copy paths" (it can't create paths that aren't files
|
18
|
+
# and so we expect this to fail, again it's ok.)
|
19
|
+
#
|
20
|
+
# 3. it "should be able to move paths" (it can't create paths that aren't files
|
21
|
+
# and so we expect this to fail, again it's ok.)
|
22
|
+
#
|
23
|
+
# 4. it "can return an array of directory entries" (ditto)
|
24
|
+
#
|
25
|
+
# Note: If one were to rewrite the above tests to use existing paths on s3 then the
|
26
|
+
# tests will succeed. Try it.
|
27
|
+
#
|
28
|
+
|
29
|
+
|
30
|
+
$LOAD_PATH << 'lib'
|
31
|
+
require 'swineherd/filesystem' ; include Swineherd
|
32
|
+
require 'rubygems'
|
33
|
+
require 'yaml'
|
34
|
+
require 'rspec'
|
35
|
+
|
36
|
+
options = YAML.load(File.read(File.dirname(__FILE__)+'/testcfg.yaml'))
|
37
|
+
current_test = 's3'
|
38
|
+
describe "A new filesystem" do
|
39
|
+
|
40
|
+
before do
|
41
|
+
@test_path = "#{options['s3_test_bucket']}/tmp/rspec/test_path"
|
42
|
+
@test_path2 = "#{options['s3_test_bucket']}/tmp/rspec/test_path2"
|
43
|
+
@fs = Swineherd::FileSystem.get(current_test, options['aws_access_key_id'], options['aws_secret_access_key'])
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should implement exists?" do
|
47
|
+
[true, false].should include(@fs.exists?(@test_path))
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should be able to create a path" do
|
51
|
+
@fs.mkpath(@test_path)
|
52
|
+
@fs.exists?(@test_path).should eql(true)
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should be able to remove a path" do
|
56
|
+
@fs.mkpath(@test_path)
|
57
|
+
@fs.rm(@test_path)
|
58
|
+
@fs.exists?(@test_path).should eql(false)
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should be able to copy paths" do
|
62
|
+
@fs.mkpath(@test_path)
|
63
|
+
@fs.cp(@test_path, @test_path2)
|
64
|
+
@fs.exists?(@test_path2).should eql(true)
|
65
|
+
@fs.rm(@test_path)
|
66
|
+
@fs.rm(@test_path2)
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should be able to move paths" do
|
70
|
+
@fs.mkpath(@test_path)
|
71
|
+
@fs.mv(@test_path, @test_path2)
|
72
|
+
@fs.exists?(@test_path).should eql(false)
|
73
|
+
@fs.exists?(@test_path2).should eql(true)
|
74
|
+
@fs.rm(@test_path2)
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should return a sane path type" do
|
78
|
+
@fs.mkpath(@test_path)
|
79
|
+
["file", "directory", "symlink", "unknown"].should include(@fs.type(@test_path))
|
80
|
+
@fs.rm(@test_path)
|
81
|
+
end
|
82
|
+
|
83
|
+
it "can return an array of directory entries" do
|
84
|
+
sub_paths = ["a", "b", "c"]
|
85
|
+
sub_paths.each do |sub_path|
|
86
|
+
@fs.mkpath(File.join(@test_path, sub_path))
|
87
|
+
end
|
88
|
+
@fs.entries(@test_path).class.should eql(Array)
|
89
|
+
@fs.entries(@test_path).map{|path| File.basename(path)}.reject{|x| x =~ /\./}.sort.should eql(sub_paths.sort)
|
90
|
+
@fs.rm(@test_path)
|
91
|
+
end
|
92
|
+
|
93
|
+
it "can answer to open with a writable file object" do
|
94
|
+
fileobj = @fs.open(@test_path, 'w')
|
95
|
+
fileobj.should respond_to :write
|
96
|
+
@fs.rm(@test_path)
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
|
101
|
+
describe "A new file" do
|
102
|
+
before do
|
103
|
+
@test_path = "#{options['s3_test_bucket']}/tmp/rspec/test_path"
|
104
|
+
@test_path2 = "#{options['s3_test_bucket']}/test_path2"
|
105
|
+
@test_string = "@('_')@"
|
106
|
+
@fs = Swineherd::FileSystem.get(current_test, options['aws_access_key_id'], options['aws_secret_access_key'])
|
107
|
+
end
|
108
|
+
|
109
|
+
it "should be closeable" do
|
110
|
+
@fs.open(@test_path, 'w').close
|
111
|
+
end
|
112
|
+
|
113
|
+
it "should be writeable" do
|
114
|
+
fileobj = @fs.open(@test_path, 'w')
|
115
|
+
fileobj.write(@test_string)
|
116
|
+
fileobj.close
|
117
|
+
@fs.rm(@test_path)
|
118
|
+
end
|
119
|
+
|
120
|
+
it "should be readable" do
|
121
|
+
|
122
|
+
fileobjw = @fs.open(@test_path, 'w')
|
123
|
+
fileobjw.write(@test_string)
|
124
|
+
fileobjw.close
|
125
|
+
|
126
|
+
fileobjr = @fs.open(@test_path, 'r')
|
127
|
+
fileobjr.read.should eql(@test_string)
|
128
|
+
|
129
|
+
@fs.rm(@test_path)
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
data/tests/testcfg.yaml
ADDED
metadata
ADDED
@@ -0,0 +1,204 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: swineherd
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Jacob Perkins
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-04-20 00:00:00 -05:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: yard
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 7
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
- 6
|
33
|
+
- 0
|
34
|
+
version: 0.6.0
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: jeweler
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 7
|
46
|
+
segments:
|
47
|
+
- 1
|
48
|
+
- 5
|
49
|
+
- 2
|
50
|
+
version: 1.5.2
|
51
|
+
type: :development
|
52
|
+
version_requirements: *id002
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: rcov
|
55
|
+
prerelease: false
|
56
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
hash: 3
|
62
|
+
segments:
|
63
|
+
- 0
|
64
|
+
version: "0"
|
65
|
+
type: :development
|
66
|
+
version_requirements: *id003
|
67
|
+
- !ruby/object:Gem::Dependency
|
68
|
+
name: configliere
|
69
|
+
prerelease: false
|
70
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
hash: 3
|
76
|
+
segments:
|
77
|
+
- 0
|
78
|
+
version: "0"
|
79
|
+
type: :runtime
|
80
|
+
version_requirements: *id004
|
81
|
+
- !ruby/object:Gem::Dependency
|
82
|
+
name: gorillib
|
83
|
+
prerelease: false
|
84
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
85
|
+
none: false
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
hash: 3
|
90
|
+
segments:
|
91
|
+
- 0
|
92
|
+
version: "0"
|
93
|
+
type: :runtime
|
94
|
+
version_requirements: *id005
|
95
|
+
- !ruby/object:Gem::Dependency
|
96
|
+
name: erubis
|
97
|
+
prerelease: false
|
98
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
hash: 3
|
104
|
+
segments:
|
105
|
+
- 0
|
106
|
+
version: "0"
|
107
|
+
type: :runtime
|
108
|
+
version_requirements: *id006
|
109
|
+
- !ruby/object:Gem::Dependency
|
110
|
+
name: right_aws
|
111
|
+
prerelease: false
|
112
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
hash: 3
|
118
|
+
segments:
|
119
|
+
- 0
|
120
|
+
version: "0"
|
121
|
+
type: :runtime
|
122
|
+
version_requirements: *id007
|
123
|
+
description: Swineherd is for running scripts and workflows on filesystems.
|
124
|
+
email: jacob.a.perkins@gmail.com
|
125
|
+
executables:
|
126
|
+
- hdp-tree
|
127
|
+
- hadoop-stream
|
128
|
+
extensions: []
|
129
|
+
|
130
|
+
extra_rdoc_files:
|
131
|
+
- LICENSE
|
132
|
+
- README.textile
|
133
|
+
files:
|
134
|
+
- LICENSE
|
135
|
+
- README.textile
|
136
|
+
- Rakefile
|
137
|
+
- VERSION
|
138
|
+
- bin/hadoop-stream
|
139
|
+
- bin/hdp-tree
|
140
|
+
- examples/pagerank/data/seinfeld_network.tsv
|
141
|
+
- examples/pagerank/pagerank.rb
|
142
|
+
- examples/pagerank/scripts/cut_off_list.rb
|
143
|
+
- examples/pagerank/scripts/histogram.R
|
144
|
+
- examples/pagerank/scripts/pagerank.pig
|
145
|
+
- examples/pagerank/scripts/pagerank_initialize.pig
|
146
|
+
- lib/swineherd.rb
|
147
|
+
- lib/swineherd/filesystem.rb
|
148
|
+
- lib/swineherd/filesystem/README_filesystem.textile
|
149
|
+
- lib/swineherd/filesystem/basefilesystem.rb
|
150
|
+
- lib/swineherd/filesystem/filesystems.rb
|
151
|
+
- lib/swineherd/filesystem/hadoopfilesystem.rb
|
152
|
+
- lib/swineherd/filesystem/localfilesystem.rb
|
153
|
+
- lib/swineherd/filesystem/localfs.rb
|
154
|
+
- lib/swineherd/filesystem/s3filesystem.rb
|
155
|
+
- lib/swineherd/script.rb
|
156
|
+
- lib/swineherd/script/hadoop_script.rb
|
157
|
+
- lib/swineherd/script/pig_script.rb
|
158
|
+
- lib/swineherd/script/r_script.rb
|
159
|
+
- lib/swineherd/script/wukong_script.rb
|
160
|
+
- lib/swineherd/template.rb
|
161
|
+
- lib/swineherd/workflow.rb
|
162
|
+
- lib/swineherd/workflow/job.rb
|
163
|
+
- notes.txt
|
164
|
+
- swineherd.gemspec
|
165
|
+
- tests/test_filesystem.rb
|
166
|
+
- tests/test_s3_filesystem.rb
|
167
|
+
- tests/testcfg.yaml
|
168
|
+
has_rdoc: true
|
169
|
+
homepage: http://github.com/Ganglion/swineherd
|
170
|
+
licenses:
|
171
|
+
- MIT
|
172
|
+
post_install_message:
|
173
|
+
rdoc_options: []
|
174
|
+
|
175
|
+
require_paths:
|
176
|
+
- lib
|
177
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
178
|
+
none: false
|
179
|
+
requirements:
|
180
|
+
- - ">="
|
181
|
+
- !ruby/object:Gem::Version
|
182
|
+
hash: 3
|
183
|
+
segments:
|
184
|
+
- 0
|
185
|
+
version: "0"
|
186
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
187
|
+
none: false
|
188
|
+
requirements:
|
189
|
+
- - ">="
|
190
|
+
- !ruby/object:Gem::Version
|
191
|
+
hash: 3
|
192
|
+
segments:
|
193
|
+
- 0
|
194
|
+
version: "0"
|
195
|
+
requirements: []
|
196
|
+
|
197
|
+
rubyforge_project:
|
198
|
+
rubygems_version: 1.3.7
|
199
|
+
signing_key:
|
200
|
+
specification_version: 3
|
201
|
+
summary: Flexible data workflow glue.
|
202
|
+
test_files:
|
203
|
+
- examples/pagerank/pagerank.rb
|
204
|
+
- examples/pagerank/scripts/cut_off_list.rb
|