swineherd 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH << 'lib'
4
+ require 'swineherd/filesystem' ; include Swineherd
5
+ require 'rubygems'
6
+ require 'yaml'
7
+ require 'rspec'
8
+
9
+ options = YAML.load(File.read(File.dirname(__FILE__)+'/testcfg.yaml'))
10
+ current_test = options['filesystem_to_test']
11
+ describe "A new filesystem" do
12
+
13
+ before do
14
+ @test_path = "/tmp/rspec/test_path"
15
+ @test_path2 = "/tmp/rspec/test_path2"
16
+ @fs = Swineherd::FileSystem.get(current_test)
17
+ end
18
+
19
+ it "should implement exists?" do
20
+ [true, false].should include(@fs.exists?(@test_path))
21
+ end
22
+
23
+ it "should be able to create a path" do
24
+ @fs.mkpath(@test_path)
25
+ @fs.exists?(@test_path).should eql(true)
26
+ end
27
+
28
+ it "should be able to remove a path" do
29
+ @fs.mkpath(@test_path)
30
+ @fs.rm(@test_path)
31
+ @fs.exists?(@test_path).should eql(false)
32
+ end
33
+
34
+ it "should be able to copy paths" do
35
+ @fs.mkpath(@test_path)
36
+ @fs.cp(@test_path, @test_path2)
37
+ @fs.exists?(@test_path2).should eql(true)
38
+ @fs.rm(@test_path)
39
+ @fs.rm(@test_path2)
40
+ end
41
+
42
+ it "should be able to move paths" do
43
+ @fs.mkpath(@test_path)
44
+ @fs.mv(@test_path, @test_path2)
45
+ @fs.exists?(@test_path).should eql(false)
46
+ @fs.exists?(@test_path2).should eql(true)
47
+ @fs.rm(@test_path2)
48
+ end
49
+
50
+ it "should return a sane path type" do
51
+ @fs.mkpath(@test_path)
52
+ ["file", "directory", "symlink", "unknown"].should include(@fs.type(@test_path))
53
+ @fs.rm(@test_path)
54
+ end
55
+
56
+ it "can return an array of directory entries" do
57
+ sub_paths = ["a", "b", "c"]
58
+ sub_paths.each do |sub_path|
59
+ @fs.mkpath(File.join(@test_path, sub_path))
60
+ end
61
+ @fs.entries(@test_path).class.should eql(Array)
62
+ @fs.entries(@test_path).map{|path| File.basename(path)}.reject{|x| x =~ /\./}.sort.should eql(sub_paths.sort)
63
+ @fs.rm(@test_path)
64
+ end
65
+
66
+ it "can answer to open with a writable file object" do
67
+ fileobj = @fs.open(@test_path, 'w')
68
+ fileobj.should respond_to :write
69
+ @fs.rm(@test_path)
70
+ end
71
+
72
+ end
73
+
74
+ describe "A new file" do
75
+ before do
76
+ @test_path = "/tmp/rspec/test_path"
77
+ @test_path2 = "/tmp/rspec/test_path2"
78
+ @test_string = "@('_')@"
79
+ @fs = Swineherd::FileSystem.get(current_test)
80
+ end
81
+
82
+ it "should be closeable" do
83
+ @fs.open(@test_path, 'w').close
84
+ end
85
+
86
+ it "should be writeable" do
87
+ fileobj = @fs.open(@test_path, 'w')
88
+ fileobj.write(@test_string)
89
+ fileobj.close
90
+ @fs.rm(@test_path)
91
+ end
92
+
93
+ it "should be readable" do
94
+
95
+ fileobjw = @fs.open(@test_path, 'w')
96
+ fileobjw.write(@test_string)
97
+ fileobjw.close
98
+
99
+ fileobjr = @fs.open(@test_path, 'r')
100
+ fileobjr.read.should eql(@test_string)
101
+
102
+ @fs.rm(@test_path)
103
+ end
104
+
105
+ end
@@ -0,0 +1,132 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # These tests cannot possibly pass unless you have an amazon account with proper
5
+ # credentials. Furthermore, you definitely want a test bucket to play with. In
6
+ # this set of mock tests I've called it 'test-bucket' which will certainly get
7
+ # you and 'access-denied' error. Also, despite all that, 4 tests (see below)
8
+ # will fail outright.
9
+ #
10
+ # This one has to break the rules slightly because amazon-s3 is not actually a
11
+ # filesystem implementation. There's no such thing as a 'path' and so the following
12
+ # tests will fail:
13
+ #
14
+ # 1. it "should be able to create a path" (path wont exist but it's ok, thats what
15
+ # we expect)
16
+ #
17
+ # 2. it "should be able to copy paths" (it can't create paths that aren't files
18
+ # and so we expect this to fail, again it's ok.)
19
+ #
20
+ # 3. it "should be able to move paths" (it can't create paths that aren't files
21
+ # and so we expect this to fail, again it's ok.)
22
+ #
23
+ # 4. it "can return an array of directory entries" (ditto)
24
+ #
25
+ # Note: If one were to rewrite the above tests to use existing paths on s3 then the
26
+ # tests will succeed. Try it.
27
+ #
28
+
29
+
30
+ $LOAD_PATH << 'lib'
31
+ require 'swineherd/filesystem' ; include Swineherd
32
+ require 'rubygems'
33
+ require 'yaml'
34
+ require 'rspec'
35
+
36
+ options = YAML.load(File.read(File.dirname(__FILE__)+'/testcfg.yaml'))
37
+ current_test = 's3'
38
+ describe "A new filesystem" do
39
+
40
+ before do
41
+ @test_path = "#{options['s3_test_bucket']}/tmp/rspec/test_path"
42
+ @test_path2 = "#{options['s3_test_bucket']}/tmp/rspec/test_path2"
43
+ @fs = Swineherd::FileSystem.get(current_test, options['aws_access_key_id'], options['aws_secret_access_key'])
44
+ end
45
+
46
+ it "should implement exists?" do
47
+ [true, false].should include(@fs.exists?(@test_path))
48
+ end
49
+
50
+ it "should be able to create a path" do
51
+ @fs.mkpath(@test_path)
52
+ @fs.exists?(@test_path).should eql(true)
53
+ end
54
+
55
+ it "should be able to remove a path" do
56
+ @fs.mkpath(@test_path)
57
+ @fs.rm(@test_path)
58
+ @fs.exists?(@test_path).should eql(false)
59
+ end
60
+
61
+ it "should be able to copy paths" do
62
+ @fs.mkpath(@test_path)
63
+ @fs.cp(@test_path, @test_path2)
64
+ @fs.exists?(@test_path2).should eql(true)
65
+ @fs.rm(@test_path)
66
+ @fs.rm(@test_path2)
67
+ end
68
+
69
+ it "should be able to move paths" do
70
+ @fs.mkpath(@test_path)
71
+ @fs.mv(@test_path, @test_path2)
72
+ @fs.exists?(@test_path).should eql(false)
73
+ @fs.exists?(@test_path2).should eql(true)
74
+ @fs.rm(@test_path2)
75
+ end
76
+
77
+ it "should return a sane path type" do
78
+ @fs.mkpath(@test_path)
79
+ ["file", "directory", "symlink", "unknown"].should include(@fs.type(@test_path))
80
+ @fs.rm(@test_path)
81
+ end
82
+
83
+ it "can return an array of directory entries" do
84
+ sub_paths = ["a", "b", "c"]
85
+ sub_paths.each do |sub_path|
86
+ @fs.mkpath(File.join(@test_path, sub_path))
87
+ end
88
+ @fs.entries(@test_path).class.should eql(Array)
89
+ @fs.entries(@test_path).map{|path| File.basename(path)}.reject{|x| x =~ /\./}.sort.should eql(sub_paths.sort)
90
+ @fs.rm(@test_path)
91
+ end
92
+
93
+ it "can answer to open with a writable file object" do
94
+ fileobj = @fs.open(@test_path, 'w')
95
+ fileobj.should respond_to :write
96
+ @fs.rm(@test_path)
97
+ end
98
+
99
+ end
100
+
101
+ describe "A new file" do
102
+ before do
103
+ @test_path = "#{options['s3_test_bucket']}/tmp/rspec/test_path"
104
+ @test_path2 = "#{options['s3_test_bucket']}/test_path2"
105
+ @test_string = "@('_')@"
106
+ @fs = Swineherd::FileSystem.get(current_test, options['aws_access_key_id'], options['aws_secret_access_key'])
107
+ end
108
+
109
+ it "should be closeable" do
110
+ @fs.open(@test_path, 'w').close
111
+ end
112
+
113
+ it "should be writeable" do
114
+ fileobj = @fs.open(@test_path, 'w')
115
+ fileobj.write(@test_string)
116
+ fileobj.close
117
+ @fs.rm(@test_path)
118
+ end
119
+
120
+ it "should be readable" do
121
+
122
+ fileobjw = @fs.open(@test_path, 'w')
123
+ fileobjw.write(@test_string)
124
+ fileobjw.close
125
+
126
+ fileobjr = @fs.open(@test_path, 'r')
127
+ fileobjr.read.should eql(@test_string)
128
+
129
+ @fs.rm(@test_path)
130
+ end
131
+
132
+ end
@@ -0,0 +1,7 @@
1
+ ---
2
+ filesystem_to_test: file
3
+ s3_test_bucket: infochimps-test
4
+
5
+ # :) you'll probably want to change these
6
+ aws_access_key_id: myaccessid
7
+ aws_secret_access_key: 1234mysecretaccesskey8q7fh
metadata ADDED
@@ -0,0 +1,204 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: swineherd
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Jacob Perkins
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-04-20 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: yard
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ hash: 7
30
+ segments:
31
+ - 0
32
+ - 6
33
+ - 0
34
+ version: 0.6.0
35
+ type: :development
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: jeweler
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ hash: 7
46
+ segments:
47
+ - 1
48
+ - 5
49
+ - 2
50
+ version: 1.5.2
51
+ type: :development
52
+ version_requirements: *id002
53
+ - !ruby/object:Gem::Dependency
54
+ name: rcov
55
+ prerelease: false
56
+ requirement: &id003 !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ hash: 3
62
+ segments:
63
+ - 0
64
+ version: "0"
65
+ type: :development
66
+ version_requirements: *id003
67
+ - !ruby/object:Gem::Dependency
68
+ name: configliere
69
+ prerelease: false
70
+ requirement: &id004 !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ type: :runtime
80
+ version_requirements: *id004
81
+ - !ruby/object:Gem::Dependency
82
+ name: gorillib
83
+ prerelease: false
84
+ requirement: &id005 !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ hash: 3
90
+ segments:
91
+ - 0
92
+ version: "0"
93
+ type: :runtime
94
+ version_requirements: *id005
95
+ - !ruby/object:Gem::Dependency
96
+ name: erubis
97
+ prerelease: false
98
+ requirement: &id006 !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ hash: 3
104
+ segments:
105
+ - 0
106
+ version: "0"
107
+ type: :runtime
108
+ version_requirements: *id006
109
+ - !ruby/object:Gem::Dependency
110
+ name: right_aws
111
+ prerelease: false
112
+ requirement: &id007 !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ hash: 3
118
+ segments:
119
+ - 0
120
+ version: "0"
121
+ type: :runtime
122
+ version_requirements: *id007
123
+ description: Swineherd is for running scripts and workflows on filesystems.
124
+ email: jacob.a.perkins@gmail.com
125
+ executables:
126
+ - hdp-tree
127
+ - hadoop-stream
128
+ extensions: []
129
+
130
+ extra_rdoc_files:
131
+ - LICENSE
132
+ - README.textile
133
+ files:
134
+ - LICENSE
135
+ - README.textile
136
+ - Rakefile
137
+ - VERSION
138
+ - bin/hadoop-stream
139
+ - bin/hdp-tree
140
+ - examples/pagerank/data/seinfeld_network.tsv
141
+ - examples/pagerank/pagerank.rb
142
+ - examples/pagerank/scripts/cut_off_list.rb
143
+ - examples/pagerank/scripts/histogram.R
144
+ - examples/pagerank/scripts/pagerank.pig
145
+ - examples/pagerank/scripts/pagerank_initialize.pig
146
+ - lib/swineherd.rb
147
+ - lib/swineherd/filesystem.rb
148
+ - lib/swineherd/filesystem/README_filesystem.textile
149
+ - lib/swineherd/filesystem/basefilesystem.rb
150
+ - lib/swineherd/filesystem/filesystems.rb
151
+ - lib/swineherd/filesystem/hadoopfilesystem.rb
152
+ - lib/swineherd/filesystem/localfilesystem.rb
153
+ - lib/swineherd/filesystem/localfs.rb
154
+ - lib/swineherd/filesystem/s3filesystem.rb
155
+ - lib/swineherd/script.rb
156
+ - lib/swineherd/script/hadoop_script.rb
157
+ - lib/swineherd/script/pig_script.rb
158
+ - lib/swineherd/script/r_script.rb
159
+ - lib/swineherd/script/wukong_script.rb
160
+ - lib/swineherd/template.rb
161
+ - lib/swineherd/workflow.rb
162
+ - lib/swineherd/workflow/job.rb
163
+ - notes.txt
164
+ - swineherd.gemspec
165
+ - tests/test_filesystem.rb
166
+ - tests/test_s3_filesystem.rb
167
+ - tests/testcfg.yaml
168
+ has_rdoc: true
169
+ homepage: http://github.com/Ganglion/swineherd
170
+ licenses:
171
+ - MIT
172
+ post_install_message:
173
+ rdoc_options: []
174
+
175
+ require_paths:
176
+ - lib
177
+ required_ruby_version: !ruby/object:Gem::Requirement
178
+ none: false
179
+ requirements:
180
+ - - ">="
181
+ - !ruby/object:Gem::Version
182
+ hash: 3
183
+ segments:
184
+ - 0
185
+ version: "0"
186
+ required_rubygems_version: !ruby/object:Gem::Requirement
187
+ none: false
188
+ requirements:
189
+ - - ">="
190
+ - !ruby/object:Gem::Version
191
+ hash: 3
192
+ segments:
193
+ - 0
194
+ version: "0"
195
+ requirements: []
196
+
197
+ rubyforge_project:
198
+ rubygems_version: 1.3.7
199
+ signing_key:
200
+ specification_version: 3
201
+ summary: Flexible data workflow glue.
202
+ test_files:
203
+ - examples/pagerank/pagerank.rb
204
+ - examples/pagerank/scripts/cut_off_list.rb