swineherd 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.2
@@ -8,4 +8,7 @@ module Swineherd
8
8
  autoload :FileSystem, 'swineherd/filesystem'
9
9
  autoload :Script, 'swineherd/script'
10
10
  autoload :Workflow, 'swineherd/workflow'
11
+
12
+ # For rake 0.9 compatibility
13
+ include Rake::DSL if defined?(Rake::DSL)
11
14
  end
@@ -21,6 +21,12 @@ module Swineherd
21
21
  def open path, mode="r", &blk
22
22
  end
23
23
 
24
+ #
25
+ # Recursively measure the size of path. Results in bytes.
26
+ #
27
+ def size path
28
+ end
29
+
24
30
  #
25
31
  # Recursively delete the path and all paths below it.
26
32
  #
@@ -32,7 +32,22 @@ module Swineherd
32
32
  HadoopFile.new(path,mode,self,&blk)
33
33
  end
34
34
 
35
+ def size path
36
+ lr(path).inject(0){|sz, f| sz += @hdfs.get_file_status(Path.new(f)).get_len}
37
+ end
35
38
 
39
+ #
40
+ # Recursively list paths
41
+ #
42
+ def lr path
43
+ paths = entries(path)
44
+ if (paths && !paths.empty?)
45
+ paths.map{|e| lr(e)}.flatten
46
+ else
47
+ path
48
+ end
49
+ end
50
+
36
51
  def rm path
37
52
  @hdfs.delete(Path.new(path), true)
38
53
  [path]
@@ -1,4 +1,5 @@
1
1
  require 'fileutils'
2
+ require 'find'
2
3
  module Swineherd
3
4
 
4
5
  class LocalFileSystem
@@ -12,6 +13,12 @@ module Swineherd
12
13
  return LocalFile.new path, mode, &blk
13
14
  end
14
15
 
16
+ def size path
17
+ sz = 0
18
+ Find.find(path){|f| sz += File.size(f)}
19
+ sz
20
+ end
21
+
15
22
  def rm path
16
23
  FileUtils.rm_r path
17
24
  end
@@ -22,6 +22,24 @@ module Swineherd
22
22
  S3File.new(path,mode,self,&blk)
23
23
  end
24
24
 
25
+ def size path
26
+ sz = 0
27
+ if type(path) == "directory"
28
+ lr(path).each do |f|
29
+ sz += file_size(f)
30
+ end
31
+ else
32
+ sz += file_size(path)
33
+ end
34
+ sz
35
+ end
36
+
37
+ def file_size path
38
+ containing_bucket = bucket(path)
39
+ header = @s3.interface.head(containing_bucket, key_path(path))
40
+ header['content-length'].to_i
41
+ end
42
+
25
43
  def rm path
26
44
  bkt = bucket(path)
27
45
  key = key_path(path)
@@ -0,0 +1 @@
1
+ @('_')@
@@ -6,6 +6,7 @@ module Swineherd
6
6
  autoload :RScript, 'swineherd/script/r_script'
7
7
 
8
8
  module Common
9
+
9
10
  attr_accessor :input, :output, :options, :attributes
10
11
  def initialize(source, input = [], output = [], options = {}, attributes ={})
11
12
  @source = source
@@ -1,7 +1,7 @@
1
1
  module Swineherd
2
2
  class Workflow
3
3
  attr_accessor :workdir, :outputs, :output_counts
4
-
4
+
5
5
  #
6
6
  # Create a new workflow and new namespace for this workflow
7
7
  #
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{swineherd}
8
- s.version = "0.0.1"
8
+ s.version = "0.0.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Jacob Perkins"]
12
- s.date = %q{2011-04-20}
12
+ s.date = %q{2011-06-01}
13
13
  s.description = %q{Swineherd is for running scripts and workflows on filesystems.}
14
14
  s.email = %q{jacob.a.perkins@gmail.com}
15
15
  s.executables = ["hdp-tree", "hadoop-stream"]
@@ -39,6 +39,7 @@ Gem::Specification.new do |s|
39
39
  "lib/swineherd/filesystem/localfilesystem.rb",
40
40
  "lib/swineherd/filesystem/localfs.rb",
41
41
  "lib/swineherd/filesystem/s3filesystem.rb",
42
+ "lib/swineherd/foo",
42
43
  "lib/swineherd/script.rb",
43
44
  "lib/swineherd/script/hadoop_script.rb",
44
45
  "lib/swineherd/script/pig_script.rb",
@@ -56,7 +57,7 @@ Gem::Specification.new do |s|
56
57
  s.homepage = %q{http://github.com/Ganglion/swineherd}
57
58
  s.licenses = ["MIT"]
58
59
  s.require_paths = ["lib"]
59
- s.rubygems_version = %q{1.3.7}
60
+ s.rubygems_version = %q{1.4.2}
60
61
  s.summary = %q{Flexible data workflow glue.}
61
62
  s.test_files = [
62
63
  "examples/pagerank/pagerank.rb",
@@ -64,7 +65,6 @@ Gem::Specification.new do |s|
64
65
  ]
65
66
 
66
67
  if s.respond_to? :specification_version then
67
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
68
68
  s.specification_version = 3
69
69
 
70
70
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
@@ -11,8 +11,9 @@ current_test = options['filesystem_to_test']
11
11
  describe "A new filesystem" do
12
12
 
13
13
  before do
14
- @test_path = "/tmp/rspec/test_path"
15
- @test_path2 = "/tmp/rspec/test_path2"
14
+ @test_path = "/tmp/rspec/test_path"
15
+ @test_path2 = "/tmp/rspec/test_path2"
16
+ @test_string = "@('_')@"
16
17
  @fs = Swineherd::FileSystem.get(current_test)
17
18
  end
18
19
 
@@ -31,6 +32,16 @@ describe "A new filesystem" do
31
32
  @fs.exists?(@test_path).should eql(false)
32
33
  end
33
34
 
35
+ it "should implement size" do
36
+ @fs.mkpath(File.dirname(@test_path))
37
+ fileobj = @fs.open(@test_path, 'w')
38
+ fileobj.write(@test_string)
39
+ fileobj.close
40
+ 7.should eql(@fs.size(@test_path))
41
+ @fs.rm(@test_path)
42
+ @fs.rm(File.dirname(@test_path))
43
+ end
44
+
34
45
  it "should be able to copy paths" do
35
46
  @fs.mkpath(@test_path)
36
47
  @fs.cp(@test_path, @test_path2)
@@ -38,8 +38,9 @@ current_test = 's3'
38
38
  describe "A new filesystem" do
39
39
 
40
40
  before do
41
- @test_path = "#{options['s3_test_bucket']}/tmp/rspec/test_path"
42
- @test_path2 = "#{options['s3_test_bucket']}/tmp/rspec/test_path2"
41
+ @test_path = "#{options['s3_test_bucket']}/tmp/rspec/test_path"
42
+ @test_path2 = "#{options['s3_test_bucket']}/tmp/rspec/test_path2"
43
+ @test_string = "@('_')@"
43
44
  @fs = Swineherd::FileSystem.get(current_test, options['aws_access_key_id'], options['aws_secret_access_key'])
44
45
  end
45
46
 
@@ -58,6 +59,16 @@ describe "A new filesystem" do
58
59
  @fs.exists?(@test_path).should eql(false)
59
60
  end
60
61
 
62
+ it "should implement size" do
63
+ @fs.mkpath(File.dirname(@test_path))
64
+ fileobj = @fs.open(@test_path, 'w')
65
+ fileobj.write(@test_string)
66
+ fileobj.close
67
+ 7.should eql(@fs.size(@test_path))
68
+ @fs.rm(@test_path)
69
+ @fs.rm(File.dirname(@test_path))
70
+ end
71
+
61
72
  it "should be able to copy paths" do
62
73
  @fs.mkpath(@test_path)
63
74
  @fs.cp(@test_path, @test_path2)
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: swineherd
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
5
- prerelease: false
4
+ hash: 27
5
+ prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 1
10
- version: 0.0.1
9
+ - 2
10
+ version: 0.0.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jacob Perkins
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-04-20 00:00:00 -05:00
18
+ date: 2011-06-01 00:00:00 +00:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -152,6 +152,7 @@ files:
152
152
  - lib/swineherd/filesystem/localfilesystem.rb
153
153
  - lib/swineherd/filesystem/localfs.rb
154
154
  - lib/swineherd/filesystem/s3filesystem.rb
155
+ - lib/swineherd/foo
155
156
  - lib/swineherd/script.rb
156
157
  - lib/swineherd/script/hadoop_script.rb
157
158
  - lib/swineherd/script/pig_script.rb
@@ -195,7 +196,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
195
196
  requirements: []
196
197
 
197
198
  rubyforge_project:
198
- rubygems_version: 1.3.7
199
+ rubygems_version: 1.4.2
199
200
  signing_key:
200
201
  specification_version: 3
201
202
  summary: Flexible data workflow glue.