swineherd 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
 - data/lib/swineherd.rb +3 -0
 - data/lib/swineherd/filesystem/basefilesystem.rb +6 -0
 - data/lib/swineherd/filesystem/hadoopfilesystem.rb +15 -0
 - data/lib/swineherd/filesystem/localfilesystem.rb +7 -0
 - data/lib/swineherd/filesystem/s3filesystem.rb +18 -0
 - data/lib/swineherd/foo +1 -0
 - data/lib/swineherd/script.rb +1 -0
 - data/lib/swineherd/workflow.rb +1 -1
 - data/swineherd.gemspec +4 -4
 - data/tests/test_filesystem.rb +13 -2
 - data/tests/test_s3_filesystem.rb +13 -2
 - metadata +7 -6
 
    
        data/VERSION
    CHANGED
    
    | 
         @@ -1 +1 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            0.0. 
     | 
| 
      
 1 
     | 
    
         
            +
            0.0.2
         
     | 
    
        data/lib/swineherd.rb
    CHANGED
    
    
| 
         @@ -32,7 +32,22 @@ module Swineherd 
     | 
|
| 
       32 
32 
     | 
    
         
             
                  HadoopFile.new(path,mode,self,&blk)
         
     | 
| 
       33 
33 
     | 
    
         
             
                end
         
     | 
| 
       34 
34 
     | 
    
         | 
| 
      
 35 
     | 
    
         
            +
                def size path
         
     | 
| 
      
 36 
     | 
    
         
            +
                  lr(path).inject(0){|sz, f| sz += @hdfs.get_file_status(Path.new(f)).get_len}
         
     | 
| 
      
 37 
     | 
    
         
            +
                end
         
     | 
| 
       35 
38 
     | 
    
         | 
| 
      
 39 
     | 
    
         
            +
                #
         
     | 
| 
      
 40 
     | 
    
         
            +
                # Recursively list paths
         
     | 
| 
      
 41 
     | 
    
         
            +
                #
         
     | 
| 
      
 42 
     | 
    
         
            +
                def lr path
         
     | 
| 
      
 43 
     | 
    
         
            +
                  paths = entries(path)
         
     | 
| 
      
 44 
     | 
    
         
            +
                  if (paths && !paths.empty?)
         
     | 
| 
      
 45 
     | 
    
         
            +
                    paths.map{|e| lr(e)}.flatten
         
     | 
| 
      
 46 
     | 
    
         
            +
                  else
         
     | 
| 
      
 47 
     | 
    
         
            +
                    path
         
     | 
| 
      
 48 
     | 
    
         
            +
                  end
         
     | 
| 
      
 49 
     | 
    
         
            +
                end
         
     | 
| 
      
 50 
     | 
    
         
            +
                
         
     | 
| 
       36 
51 
     | 
    
         
             
                def rm path
         
     | 
| 
       37 
52 
     | 
    
         
             
                  @hdfs.delete(Path.new(path), true)
         
     | 
| 
       38 
53 
     | 
    
         
             
                  [path]
         
     | 
| 
         @@ -1,4 +1,5 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require 'fileutils'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'find'
         
     | 
| 
       2 
3 
     | 
    
         
             
            module Swineherd
         
     | 
| 
       3 
4 
     | 
    
         | 
| 
       4 
5 
     | 
    
         
             
              class LocalFileSystem
         
     | 
| 
         @@ -12,6 +13,12 @@ module Swineherd 
     | 
|
| 
       12 
13 
     | 
    
         
             
                  return LocalFile.new path, mode, &blk
         
     | 
| 
       13 
14 
     | 
    
         
             
                end
         
     | 
| 
       14 
15 
     | 
    
         | 
| 
      
 16 
     | 
    
         
            +
                def size path
         
     | 
| 
      
 17 
     | 
    
         
            +
                  sz = 0
         
     | 
| 
      
 18 
     | 
    
         
            +
                  Find.find(path){|f| sz += File.size(f)}
         
     | 
| 
      
 19 
     | 
    
         
            +
                  sz
         
     | 
| 
      
 20 
     | 
    
         
            +
                end
         
     | 
| 
      
 21 
     | 
    
         
            +
                
         
     | 
| 
       15 
22 
     | 
    
         
             
                def rm path
         
     | 
| 
       16 
23 
     | 
    
         
             
                  FileUtils.rm_r path
         
     | 
| 
       17 
24 
     | 
    
         
             
                end
         
     | 
| 
         @@ -22,6 +22,24 @@ module Swineherd 
     | 
|
| 
       22 
22 
     | 
    
         
             
                  S3File.new(path,mode,self,&blk)
         
     | 
| 
       23 
23 
     | 
    
         
             
                end
         
     | 
| 
       24 
24 
     | 
    
         | 
| 
      
 25 
     | 
    
         
            +
                def size path
         
     | 
| 
      
 26 
     | 
    
         
            +
                  sz = 0
         
     | 
| 
      
 27 
     | 
    
         
            +
                  if type(path) == "directory"
         
     | 
| 
      
 28 
     | 
    
         
            +
                    lr(path).each do |f|
         
     | 
| 
      
 29 
     | 
    
         
            +
                      sz += file_size(f)
         
     | 
| 
      
 30 
     | 
    
         
            +
                    end        
         
     | 
| 
      
 31 
     | 
    
         
            +
                  else
         
     | 
| 
      
 32 
     | 
    
         
            +
                    sz += file_size(path)
         
     | 
| 
      
 33 
     | 
    
         
            +
                  end
         
     | 
| 
      
 34 
     | 
    
         
            +
                  sz
         
     | 
| 
      
 35 
     | 
    
         
            +
                end    
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                def file_size path
         
     | 
| 
      
 38 
     | 
    
         
            +
                  containing_bucket = bucket(path)
         
     | 
| 
      
 39 
     | 
    
         
            +
                  header            = @s3.interface.head(containing_bucket, key_path(path))
         
     | 
| 
      
 40 
     | 
    
         
            +
                  header['content-length'].to_i
         
     | 
| 
      
 41 
     | 
    
         
            +
                end
         
     | 
| 
      
 42 
     | 
    
         
            +
                
         
     | 
| 
       25 
43 
     | 
    
         
             
                def rm path
         
     | 
| 
       26 
44 
     | 
    
         
             
                  bkt = bucket(path)
         
     | 
| 
       27 
45 
     | 
    
         
             
                  key = key_path(path)
         
     | 
    
        data/lib/swineherd/foo
    ADDED
    
    | 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            @('_')@
         
     | 
    
        data/lib/swineherd/script.rb
    CHANGED
    
    
    
        data/lib/swineherd/workflow.rb
    CHANGED
    
    
    
        data/swineherd.gemspec
    CHANGED
    
    | 
         @@ -5,11 +5,11 @@ 
     | 
|
| 
       5 
5 
     | 
    
         | 
| 
       6 
6 
     | 
    
         
             
            Gem::Specification.new do |s|
         
     | 
| 
       7 
7 
     | 
    
         
             
              s.name = %q{swineherd}
         
     | 
| 
       8 
     | 
    
         
            -
              s.version = "0.0. 
     | 
| 
      
 8 
     | 
    
         
            +
              s.version = "0.0.2"
         
     | 
| 
       9 
9 
     | 
    
         | 
| 
       10 
10 
     | 
    
         
             
              s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
         
     | 
| 
       11 
11 
     | 
    
         
             
              s.authors = ["Jacob Perkins"]
         
     | 
| 
       12 
     | 
    
         
            -
              s.date = %q{2011- 
     | 
| 
      
 12 
     | 
    
         
            +
              s.date = %q{2011-06-01}
         
     | 
| 
       13 
13 
     | 
    
         
             
              s.description = %q{Swineherd is for running scripts and workflows on filesystems.}
         
     | 
| 
       14 
14 
     | 
    
         
             
              s.email = %q{jacob.a.perkins@gmail.com}
         
     | 
| 
       15 
15 
     | 
    
         
             
              s.executables = ["hdp-tree", "hadoop-stream"]
         
     | 
| 
         @@ -39,6 +39,7 @@ Gem::Specification.new do |s| 
     | 
|
| 
       39 
39 
     | 
    
         
             
                "lib/swineherd/filesystem/localfilesystem.rb",
         
     | 
| 
       40 
40 
     | 
    
         
             
                "lib/swineherd/filesystem/localfs.rb",
         
     | 
| 
       41 
41 
     | 
    
         
             
                "lib/swineherd/filesystem/s3filesystem.rb",
         
     | 
| 
      
 42 
     | 
    
         
            +
                "lib/swineherd/foo",
         
     | 
| 
       42 
43 
     | 
    
         
             
                "lib/swineherd/script.rb",
         
     | 
| 
       43 
44 
     | 
    
         
             
                "lib/swineherd/script/hadoop_script.rb",
         
     | 
| 
       44 
45 
     | 
    
         
             
                "lib/swineherd/script/pig_script.rb",
         
     | 
| 
         @@ -56,7 +57,7 @@ Gem::Specification.new do |s| 
     | 
|
| 
       56 
57 
     | 
    
         
             
              s.homepage = %q{http://github.com/Ganglion/swineherd}
         
     | 
| 
       57 
58 
     | 
    
         
             
              s.licenses = ["MIT"]
         
     | 
| 
       58 
59 
     | 
    
         
             
              s.require_paths = ["lib"]
         
     | 
| 
       59 
     | 
    
         
            -
              s.rubygems_version = %q{1. 
     | 
| 
      
 60 
     | 
    
         
            +
              s.rubygems_version = %q{1.4.2}
         
     | 
| 
       60 
61 
     | 
    
         
             
              s.summary = %q{Flexible data workflow glue.}
         
     | 
| 
       61 
62 
     | 
    
         
             
              s.test_files = [
         
     | 
| 
       62 
63 
     | 
    
         
             
                "examples/pagerank/pagerank.rb",
         
     | 
| 
         @@ -64,7 +65,6 @@ Gem::Specification.new do |s| 
     | 
|
| 
       64 
65 
     | 
    
         
             
              ]
         
     | 
| 
       65 
66 
     | 
    
         | 
| 
       66 
67 
     | 
    
         
             
              if s.respond_to? :specification_version then
         
     | 
| 
       67 
     | 
    
         
            -
                current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
         
     | 
| 
       68 
68 
     | 
    
         
             
                s.specification_version = 3
         
     | 
| 
       69 
69 
     | 
    
         | 
| 
       70 
70 
     | 
    
         
             
                if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
         
     | 
    
        data/tests/test_filesystem.rb
    CHANGED
    
    | 
         @@ -11,8 +11,9 @@ current_test = options['filesystem_to_test'] 
     | 
|
| 
       11 
11 
     | 
    
         
             
            describe "A new filesystem" do
         
     | 
| 
       12 
12 
     | 
    
         | 
| 
       13 
13 
     | 
    
         
             
              before do
         
     | 
| 
       14 
     | 
    
         
            -
                @test_path 
     | 
| 
       15 
     | 
    
         
            -
                @test_path2 
     | 
| 
      
 14 
     | 
    
         
            +
                @test_path   = "/tmp/rspec/test_path"
         
     | 
| 
      
 15 
     | 
    
         
            +
                @test_path2  = "/tmp/rspec/test_path2"
         
     | 
| 
      
 16 
     | 
    
         
            +
                @test_string = "@('_')@" 
         
     | 
| 
       16 
17 
     | 
    
         
             
                @fs = Swineherd::FileSystem.get(current_test)
         
     | 
| 
       17 
18 
     | 
    
         
             
              end
         
     | 
| 
       18 
19 
     | 
    
         | 
| 
         @@ -31,6 +32,16 @@ describe "A new filesystem" do 
     | 
|
| 
       31 
32 
     | 
    
         
             
                @fs.exists?(@test_path).should eql(false)
         
     | 
| 
       32 
33 
     | 
    
         
             
              end
         
     | 
| 
       33 
34 
     | 
    
         | 
| 
      
 35 
     | 
    
         
            +
              it "should implement size" do
         
     | 
| 
      
 36 
     | 
    
         
            +
                @fs.mkpath(File.dirname(@test_path))
         
     | 
| 
      
 37 
     | 
    
         
            +
                fileobj = @fs.open(@test_path, 'w')
         
     | 
| 
      
 38 
     | 
    
         
            +
                fileobj.write(@test_string)
         
     | 
| 
      
 39 
     | 
    
         
            +
                fileobj.close
         
     | 
| 
      
 40 
     | 
    
         
            +
                7.should eql(@fs.size(@test_path))
         
     | 
| 
      
 41 
     | 
    
         
            +
                @fs.rm(@test_path)
         
     | 
| 
      
 42 
     | 
    
         
            +
                @fs.rm(File.dirname(@test_path))
         
     | 
| 
      
 43 
     | 
    
         
            +
              end
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
       34 
45 
     | 
    
         
             
              it "should be able to copy paths" do
         
     | 
| 
       35 
46 
     | 
    
         
             
                @fs.mkpath(@test_path)
         
     | 
| 
       36 
47 
     | 
    
         
             
                @fs.cp(@test_path, @test_path2)
         
     | 
    
        data/tests/test_s3_filesystem.rb
    CHANGED
    
    | 
         @@ -38,8 +38,9 @@ current_test = 's3' 
     | 
|
| 
       38 
38 
     | 
    
         
             
            describe "A new filesystem" do
         
     | 
| 
       39 
39 
     | 
    
         | 
| 
       40 
40 
     | 
    
         
             
              before do
         
     | 
| 
       41 
     | 
    
         
            -
                @test_path 
     | 
| 
       42 
     | 
    
         
            -
                @test_path2 
     | 
| 
      
 41 
     | 
    
         
            +
                @test_path   = "#{options['s3_test_bucket']}/tmp/rspec/test_path"
         
     | 
| 
      
 42 
     | 
    
         
            +
                @test_path2  = "#{options['s3_test_bucket']}/tmp/rspec/test_path2"
         
     | 
| 
      
 43 
     | 
    
         
            +
                @test_string = "@('_')@"     
         
     | 
| 
       43 
44 
     | 
    
         
             
                @fs = Swineherd::FileSystem.get(current_test, options['aws_access_key_id'], options['aws_secret_access_key'])
         
     | 
| 
       44 
45 
     | 
    
         
             
              end
         
     | 
| 
       45 
46 
     | 
    
         | 
| 
         @@ -58,6 +59,16 @@ describe "A new filesystem" do 
     | 
|
| 
       58 
59 
     | 
    
         
             
                @fs.exists?(@test_path).should eql(false)
         
     | 
| 
       59 
60 
     | 
    
         
             
              end
         
     | 
| 
       60 
61 
     | 
    
         | 
| 
      
 62 
     | 
    
         
            +
              it "should implement size" do
         
     | 
| 
      
 63 
     | 
    
         
            +
                @fs.mkpath(File.dirname(@test_path))
         
     | 
| 
      
 64 
     | 
    
         
            +
                fileobj = @fs.open(@test_path, 'w')
         
     | 
| 
      
 65 
     | 
    
         
            +
                fileobj.write(@test_string)
         
     | 
| 
      
 66 
     | 
    
         
            +
                fileobj.close
         
     | 
| 
      
 67 
     | 
    
         
            +
                7.should eql(@fs.size(@test_path))
         
     | 
| 
      
 68 
     | 
    
         
            +
                @fs.rm(@test_path)
         
     | 
| 
      
 69 
     | 
    
         
            +
                @fs.rm(File.dirname(@test_path))
         
     | 
| 
      
 70 
     | 
    
         
            +
              end
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
       61 
72 
     | 
    
         
             
              it "should be able to copy paths" do
         
     | 
| 
       62 
73 
     | 
    
         
             
                @fs.mkpath(@test_path)
         
     | 
| 
       63 
74 
     | 
    
         
             
                @fs.cp(@test_path, @test_path2)
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,13 +1,13 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification 
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: swineherd
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version 
         
     | 
| 
       4 
     | 
    
         
            -
              hash:  
     | 
| 
       5 
     | 
    
         
            -
              prerelease:  
     | 
| 
      
 4 
     | 
    
         
            +
              hash: 27
         
     | 
| 
      
 5 
     | 
    
         
            +
              prerelease: 
         
     | 
| 
       6 
6 
     | 
    
         
             
              segments: 
         
     | 
| 
       7 
7 
     | 
    
         
             
              - 0
         
     | 
| 
       8 
8 
     | 
    
         
             
              - 0
         
     | 
| 
       9 
     | 
    
         
            -
              -  
     | 
| 
       10 
     | 
    
         
            -
              version: 0.0. 
     | 
| 
      
 9 
     | 
    
         
            +
              - 2
         
     | 
| 
      
 10 
     | 
    
         
            +
              version: 0.0.2
         
     | 
| 
       11 
11 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       12 
12 
     | 
    
         
             
            authors: 
         
     | 
| 
       13 
13 
     | 
    
         
             
            - Jacob Perkins
         
     | 
| 
         @@ -15,7 +15,7 @@ autorequire: 
     | 
|
| 
       15 
15 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       16 
16 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       17 
17 
     | 
    
         | 
| 
       18 
     | 
    
         
            -
            date: 2011- 
     | 
| 
      
 18 
     | 
    
         
            +
            date: 2011-06-01 00:00:00 +00:00
         
     | 
| 
       19 
19 
     | 
    
         
             
            default_executable: 
         
     | 
| 
       20 
20 
     | 
    
         
             
            dependencies: 
         
     | 
| 
       21 
21 
     | 
    
         
             
            - !ruby/object:Gem::Dependency 
         
     | 
| 
         @@ -152,6 +152,7 @@ files: 
     | 
|
| 
       152 
152 
     | 
    
         
             
            - lib/swineherd/filesystem/localfilesystem.rb
         
     | 
| 
       153 
153 
     | 
    
         
             
            - lib/swineherd/filesystem/localfs.rb
         
     | 
| 
       154 
154 
     | 
    
         
             
            - lib/swineherd/filesystem/s3filesystem.rb
         
     | 
| 
      
 155 
     | 
    
         
            +
            - lib/swineherd/foo
         
     | 
| 
       155 
156 
     | 
    
         
             
            - lib/swineherd/script.rb
         
     | 
| 
       156 
157 
     | 
    
         
             
            - lib/swineherd/script/hadoop_script.rb
         
     | 
| 
       157 
158 
     | 
    
         
             
            - lib/swineherd/script/pig_script.rb
         
     | 
| 
         @@ -195,7 +196,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       195 
196 
     | 
    
         
             
            requirements: []
         
     | 
| 
       196 
197 
     | 
    
         | 
| 
       197 
198 
     | 
    
         
             
            rubyforge_project: 
         
     | 
| 
       198 
     | 
    
         
            -
            rubygems_version: 1. 
     | 
| 
      
 199 
     | 
    
         
            +
            rubygems_version: 1.4.2
         
     | 
| 
       199 
200 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       200 
201 
     | 
    
         
             
            specification_version: 3
         
     | 
| 
       201 
202 
     | 
    
         
             
            summary: Flexible data workflow glue.
         
     |