swineherd 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH << 'lib'
4
+ require 'swineherd/filesystem' ; include Swineherd
5
+ require 'rubygems'
6
+ require 'yaml'
7
+ require 'rspec'
8
+
9
+ options = YAML.load(File.read(File.dirname(__FILE__)+'/testcfg.yaml'))
10
+ current_test = options['filesystem_to_test']
11
+ describe "A new filesystem" do
12
+
13
+ before do
14
+ @test_path = "/tmp/rspec/test_path"
15
+ @test_path2 = "/tmp/rspec/test_path2"
16
+ @fs = Swineherd::FileSystem.get(current_test)
17
+ end
18
+
19
+ it "should implement exists?" do
20
+ [true, false].should include(@fs.exists?(@test_path))
21
+ end
22
+
23
+ it "should be able to create a path" do
24
+ @fs.mkpath(@test_path)
25
+ @fs.exists?(@test_path).should eql(true)
26
+ end
27
+
28
+ it "should be able to remove a path" do
29
+ @fs.mkpath(@test_path)
30
+ @fs.rm(@test_path)
31
+ @fs.exists?(@test_path).should eql(false)
32
+ end
33
+
34
+ it "should be able to copy paths" do
35
+ @fs.mkpath(@test_path)
36
+ @fs.cp(@test_path, @test_path2)
37
+ @fs.exists?(@test_path2).should eql(true)
38
+ @fs.rm(@test_path)
39
+ @fs.rm(@test_path2)
40
+ end
41
+
42
+ it "should be able to move paths" do
43
+ @fs.mkpath(@test_path)
44
+ @fs.mv(@test_path, @test_path2)
45
+ @fs.exists?(@test_path).should eql(false)
46
+ @fs.exists?(@test_path2).should eql(true)
47
+ @fs.rm(@test_path2)
48
+ end
49
+
50
+ it "should return a sane path type" do
51
+ @fs.mkpath(@test_path)
52
+ ["file", "directory", "symlink", "unknown"].should include(@fs.type(@test_path))
53
+ @fs.rm(@test_path)
54
+ end
55
+
56
+ it "can return an array of directory entries" do
57
+ sub_paths = ["a", "b", "c"]
58
+ sub_paths.each do |sub_path|
59
+ @fs.mkpath(File.join(@test_path, sub_path))
60
+ end
61
+ @fs.entries(@test_path).class.should eql(Array)
62
+ @fs.entries(@test_path).map{|path| File.basename(path)}.reject{|x| x =~ /\./}.sort.should eql(sub_paths.sort)
63
+ @fs.rm(@test_path)
64
+ end
65
+
66
+ it "can answer to open with a writable file object" do
67
+ fileobj = @fs.open(@test_path, 'w')
68
+ fileobj.should respond_to :write
69
+ @fs.rm(@test_path)
70
+ end
71
+
72
+ end
73
+
74
+ describe "A new file" do
75
+ before do
76
+ @test_path = "/tmp/rspec/test_path"
77
+ @test_path2 = "/tmp/rspec/test_path2"
78
+ @test_string = "@('_')@"
79
+ @fs = Swineherd::FileSystem.get(current_test)
80
+ end
81
+
82
+ it "should be closeable" do
83
+ @fs.open(@test_path, 'w').close
84
+ end
85
+
86
+ it "should be writeable" do
87
+ fileobj = @fs.open(@test_path, 'w')
88
+ fileobj.write(@test_string)
89
+ fileobj.close
90
+ @fs.rm(@test_path)
91
+ end
92
+
93
+ it "should be readable" do
94
+
95
+ fileobjw = @fs.open(@test_path, 'w')
96
+ fileobjw.write(@test_string)
97
+ fileobjw.close
98
+
99
+ fileobjr = @fs.open(@test_path, 'r')
100
+ fileobjr.read.should eql(@test_string)
101
+
102
+ @fs.rm(@test_path)
103
+ end
104
+
105
+ end
@@ -0,0 +1,132 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # These tests cannot possibly pass unless you have an amazon account with proper
5
+ # credentials. Furthermore, you definitely want a test bucket to play with. In
6
+ # this set of mock tests I've called it 'test-bucket' which will certainly get
7
+ # you and 'access-denied' error. Also, despite all that, 4 tests (see below)
8
+ # will fail outright.
9
+ #
10
+ # This one has to break the rules slightly because amazon-s3 is not actually a
11
+ # filesystem implementation. There's no such thing as a 'path' and so the following
12
+ # tests will fail:
13
+ #
14
+ # 1. it "should be able to create a path" (path wont exist but it's ok, thats what
15
+ # we expect)
16
+ #
17
+ # 2. it "should be able to copy paths" (it can't create paths that aren't files
18
+ # and so we expect this to fail, again it's ok.)
19
+ #
20
+ # 3. it "should be able to move paths" (it can't create paths that aren't files
21
+ # and so we expect this to fail, again it's ok.)
22
+ #
23
+ # 4. it "can return an array of directory entries" (ditto)
24
+ #
25
+ # Note: If one were to rewrite the above tests to use existing paths on s3 then the
26
+ # tests will succeed. Try it.
27
+ #
28
+
29
+
30
+ $LOAD_PATH << 'lib'
31
+ require 'swineherd/filesystem' ; include Swineherd
32
+ require 'rubygems'
33
+ require 'yaml'
34
+ require 'rspec'
35
+
36
+ options = YAML.load(File.read(File.dirname(__FILE__)+'/testcfg.yaml'))
37
+ current_test = 's3'
38
+ describe "A new filesystem" do
39
+
40
+ before do
41
+ @test_path = "#{options['s3_test_bucket']}/tmp/rspec/test_path"
42
+ @test_path2 = "#{options['s3_test_bucket']}/tmp/rspec/test_path2"
43
+ @fs = Swineherd::FileSystem.get(current_test, options['aws_access_key_id'], options['aws_secret_access_key'])
44
+ end
45
+
46
+ it "should implement exists?" do
47
+ [true, false].should include(@fs.exists?(@test_path))
48
+ end
49
+
50
+ it "should be able to create a path" do
51
+ @fs.mkpath(@test_path)
52
+ @fs.exists?(@test_path).should eql(true)
53
+ end
54
+
55
+ it "should be able to remove a path" do
56
+ @fs.mkpath(@test_path)
57
+ @fs.rm(@test_path)
58
+ @fs.exists?(@test_path).should eql(false)
59
+ end
60
+
61
+ it "should be able to copy paths" do
62
+ @fs.mkpath(@test_path)
63
+ @fs.cp(@test_path, @test_path2)
64
+ @fs.exists?(@test_path2).should eql(true)
65
+ @fs.rm(@test_path)
66
+ @fs.rm(@test_path2)
67
+ end
68
+
69
+ it "should be able to move paths" do
70
+ @fs.mkpath(@test_path)
71
+ @fs.mv(@test_path, @test_path2)
72
+ @fs.exists?(@test_path).should eql(false)
73
+ @fs.exists?(@test_path2).should eql(true)
74
+ @fs.rm(@test_path2)
75
+ end
76
+
77
+ it "should return a sane path type" do
78
+ @fs.mkpath(@test_path)
79
+ ["file", "directory", "symlink", "unknown"].should include(@fs.type(@test_path))
80
+ @fs.rm(@test_path)
81
+ end
82
+
83
+ it "can return an array of directory entries" do
84
+ sub_paths = ["a", "b", "c"]
85
+ sub_paths.each do |sub_path|
86
+ @fs.mkpath(File.join(@test_path, sub_path))
87
+ end
88
+ @fs.entries(@test_path).class.should eql(Array)
89
+ @fs.entries(@test_path).map{|path| File.basename(path)}.reject{|x| x =~ /\./}.sort.should eql(sub_paths.sort)
90
+ @fs.rm(@test_path)
91
+ end
92
+
93
+ it "can answer to open with a writable file object" do
94
+ fileobj = @fs.open(@test_path, 'w')
95
+ fileobj.should respond_to :write
96
+ @fs.rm(@test_path)
97
+ end
98
+
99
+ end
100
+
101
+ describe "A new file" do
102
+ before do
103
+ @test_path = "#{options['s3_test_bucket']}/tmp/rspec/test_path"
104
+ @test_path2 = "#{options['s3_test_bucket']}/test_path2"
105
+ @test_string = "@('_')@"
106
+ @fs = Swineherd::FileSystem.get(current_test, options['aws_access_key_id'], options['aws_secret_access_key'])
107
+ end
108
+
109
+ it "should be closeable" do
110
+ @fs.open(@test_path, 'w').close
111
+ end
112
+
113
+ it "should be writeable" do
114
+ fileobj = @fs.open(@test_path, 'w')
115
+ fileobj.write(@test_string)
116
+ fileobj.close
117
+ @fs.rm(@test_path)
118
+ end
119
+
120
+ it "should be readable" do
121
+
122
+ fileobjw = @fs.open(@test_path, 'w')
123
+ fileobjw.write(@test_string)
124
+ fileobjw.close
125
+
126
+ fileobjr = @fs.open(@test_path, 'r')
127
+ fileobjr.read.should eql(@test_string)
128
+
129
+ @fs.rm(@test_path)
130
+ end
131
+
132
+ end
@@ -0,0 +1,7 @@
1
+ ---
2
+ filesystem_to_test: file
3
+ s3_test_bucket: infochimps-test
4
+
5
+ # :) you'll probably want to change these
6
+ aws_access_key_id: myaccessid
7
+ aws_secret_access_key: 1234mysecretaccesskey8q7fh
metadata ADDED
@@ -0,0 +1,204 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: swineherd
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Jacob Perkins
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-04-20 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: yard
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ hash: 7
30
+ segments:
31
+ - 0
32
+ - 6
33
+ - 0
34
+ version: 0.6.0
35
+ type: :development
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: jeweler
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ hash: 7
46
+ segments:
47
+ - 1
48
+ - 5
49
+ - 2
50
+ version: 1.5.2
51
+ type: :development
52
+ version_requirements: *id002
53
+ - !ruby/object:Gem::Dependency
54
+ name: rcov
55
+ prerelease: false
56
+ requirement: &id003 !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ hash: 3
62
+ segments:
63
+ - 0
64
+ version: "0"
65
+ type: :development
66
+ version_requirements: *id003
67
+ - !ruby/object:Gem::Dependency
68
+ name: configliere
69
+ prerelease: false
70
+ requirement: &id004 !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ type: :runtime
80
+ version_requirements: *id004
81
+ - !ruby/object:Gem::Dependency
82
+ name: gorillib
83
+ prerelease: false
84
+ requirement: &id005 !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ hash: 3
90
+ segments:
91
+ - 0
92
+ version: "0"
93
+ type: :runtime
94
+ version_requirements: *id005
95
+ - !ruby/object:Gem::Dependency
96
+ name: erubis
97
+ prerelease: false
98
+ requirement: &id006 !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ hash: 3
104
+ segments:
105
+ - 0
106
+ version: "0"
107
+ type: :runtime
108
+ version_requirements: *id006
109
+ - !ruby/object:Gem::Dependency
110
+ name: right_aws
111
+ prerelease: false
112
+ requirement: &id007 !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ hash: 3
118
+ segments:
119
+ - 0
120
+ version: "0"
121
+ type: :runtime
122
+ version_requirements: *id007
123
+ description: Swineherd is for running scripts and workflows on filesystems.
124
+ email: jacob.a.perkins@gmail.com
125
+ executables:
126
+ - hdp-tree
127
+ - hadoop-stream
128
+ extensions: []
129
+
130
+ extra_rdoc_files:
131
+ - LICENSE
132
+ - README.textile
133
+ files:
134
+ - LICENSE
135
+ - README.textile
136
+ - Rakefile
137
+ - VERSION
138
+ - bin/hadoop-stream
139
+ - bin/hdp-tree
140
+ - examples/pagerank/data/seinfeld_network.tsv
141
+ - examples/pagerank/pagerank.rb
142
+ - examples/pagerank/scripts/cut_off_list.rb
143
+ - examples/pagerank/scripts/histogram.R
144
+ - examples/pagerank/scripts/pagerank.pig
145
+ - examples/pagerank/scripts/pagerank_initialize.pig
146
+ - lib/swineherd.rb
147
+ - lib/swineherd/filesystem.rb
148
+ - lib/swineherd/filesystem/README_filesystem.textile
149
+ - lib/swineherd/filesystem/basefilesystem.rb
150
+ - lib/swineherd/filesystem/filesystems.rb
151
+ - lib/swineherd/filesystem/hadoopfilesystem.rb
152
+ - lib/swineherd/filesystem/localfilesystem.rb
153
+ - lib/swineherd/filesystem/localfs.rb
154
+ - lib/swineherd/filesystem/s3filesystem.rb
155
+ - lib/swineherd/script.rb
156
+ - lib/swineherd/script/hadoop_script.rb
157
+ - lib/swineherd/script/pig_script.rb
158
+ - lib/swineherd/script/r_script.rb
159
+ - lib/swineherd/script/wukong_script.rb
160
+ - lib/swineherd/template.rb
161
+ - lib/swineherd/workflow.rb
162
+ - lib/swineherd/workflow/job.rb
163
+ - notes.txt
164
+ - swineherd.gemspec
165
+ - tests/test_filesystem.rb
166
+ - tests/test_s3_filesystem.rb
167
+ - tests/testcfg.yaml
168
+ has_rdoc: true
169
+ homepage: http://github.com/Ganglion/swineherd
170
+ licenses:
171
+ - MIT
172
+ post_install_message:
173
+ rdoc_options: []
174
+
175
+ require_paths:
176
+ - lib
177
+ required_ruby_version: !ruby/object:Gem::Requirement
178
+ none: false
179
+ requirements:
180
+ - - ">="
181
+ - !ruby/object:Gem::Version
182
+ hash: 3
183
+ segments:
184
+ - 0
185
+ version: "0"
186
+ required_rubygems_version: !ruby/object:Gem::Requirement
187
+ none: false
188
+ requirements:
189
+ - - ">="
190
+ - !ruby/object:Gem::Version
191
+ hash: 3
192
+ segments:
193
+ - 0
194
+ version: "0"
195
+ requirements: []
196
+
197
+ rubyforge_project:
198
+ rubygems_version: 1.3.7
199
+ signing_key:
200
+ specification_version: 3
201
+ summary: Flexible data workflow glue.
202
+ test_files:
203
+ - examples/pagerank/pagerank.rb
204
+ - examples/pagerank/scripts/cut_off_list.rb