swineherd 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.2
@@ -8,4 +8,7 @@ module Swineherd
8
8
  autoload :FileSystem, 'swineherd/filesystem'
9
9
  autoload :Script, 'swineherd/script'
10
10
  autoload :Workflow, 'swineherd/workflow'
11
+
12
+ # For rake 0.9 compatibility
13
+ include Rake::DSL if defined?(Rake::DSL)
11
14
  end
@@ -21,6 +21,12 @@ module Swineherd
21
21
  def open path, mode="r", &blk
22
22
  end
23
23
 
24
+ #
25
+ # Recursively measure the size of path. Results in bytes.
26
+ #
27
+ def size path
28
+ end
29
+
24
30
  #
25
31
  # Recursively delete the path and all paths below it.
26
32
  #
@@ -32,7 +32,22 @@ module Swineherd
32
32
  HadoopFile.new(path,mode,self,&blk)
33
33
  end
34
34
 
35
+ def size path
36
+ lr(path).inject(0){|sz, f| sz += @hdfs.get_file_status(Path.new(f)).get_len}
37
+ end
35
38
 
39
+ #
40
+ # Recursively list paths
41
+ #
42
+ def lr path
43
+ paths = entries(path)
44
+ if (paths && !paths.empty?)
45
+ paths.map{|e| lr(e)}.flatten
46
+ else
47
+ path
48
+ end
49
+ end
50
+
36
51
  def rm path
37
52
  @hdfs.delete(Path.new(path), true)
38
53
  [path]
@@ -1,4 +1,5 @@
1
1
  require 'fileutils'
2
+ require 'find'
2
3
  module Swineherd
3
4
 
4
5
  class LocalFileSystem
@@ -12,6 +13,12 @@ module Swineherd
12
13
  return LocalFile.new path, mode, &blk
13
14
  end
14
15
 
16
+ def size path
17
+ sz = 0
18
+ Find.find(path){|f| sz += File.size(f)}
19
+ sz
20
+ end
21
+
15
22
  def rm path
16
23
  FileUtils.rm_r path
17
24
  end
@@ -22,6 +22,24 @@ module Swineherd
22
22
  S3File.new(path,mode,self,&blk)
23
23
  end
24
24
 
25
+ def size path
26
+ sz = 0
27
+ if type(path) == "directory"
28
+ lr(path).each do |f|
29
+ sz += file_size(f)
30
+ end
31
+ else
32
+ sz += file_size(path)
33
+ end
34
+ sz
35
+ end
36
+
37
+ def file_size path
38
+ containing_bucket = bucket(path)
39
+ header = @s3.interface.head(containing_bucket, key_path(path))
40
+ header['content-length'].to_i
41
+ end
42
+
25
43
  def rm path
26
44
  bkt = bucket(path)
27
45
  key = key_path(path)
@@ -0,0 +1 @@
1
+ @('_')@
@@ -6,6 +6,7 @@ module Swineherd
6
6
  autoload :RScript, 'swineherd/script/r_script'
7
7
 
8
8
  module Common
9
+
9
10
  attr_accessor :input, :output, :options, :attributes
10
11
  def initialize(source, input = [], output = [], options = {}, attributes ={})
11
12
  @source = source
@@ -1,7 +1,7 @@
1
1
  module Swineherd
2
2
  class Workflow
3
3
  attr_accessor :workdir, :outputs, :output_counts
4
-
4
+
5
5
  #
6
6
  # Create a new workflow and new namespace for this workflow
7
7
  #
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{swineherd}
8
- s.version = "0.0.1"
8
+ s.version = "0.0.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Jacob Perkins"]
12
- s.date = %q{2011-04-20}
12
+ s.date = %q{2011-06-01}
13
13
  s.description = %q{Swineherd is for running scripts and workflows on filesystems.}
14
14
  s.email = %q{jacob.a.perkins@gmail.com}
15
15
  s.executables = ["hdp-tree", "hadoop-stream"]
@@ -39,6 +39,7 @@ Gem::Specification.new do |s|
39
39
  "lib/swineherd/filesystem/localfilesystem.rb",
40
40
  "lib/swineherd/filesystem/localfs.rb",
41
41
  "lib/swineherd/filesystem/s3filesystem.rb",
42
+ "lib/swineherd/foo",
42
43
  "lib/swineherd/script.rb",
43
44
  "lib/swineherd/script/hadoop_script.rb",
44
45
  "lib/swineherd/script/pig_script.rb",
@@ -56,7 +57,7 @@ Gem::Specification.new do |s|
56
57
  s.homepage = %q{http://github.com/Ganglion/swineherd}
57
58
  s.licenses = ["MIT"]
58
59
  s.require_paths = ["lib"]
59
- s.rubygems_version = %q{1.3.7}
60
+ s.rubygems_version = %q{1.4.2}
60
61
  s.summary = %q{Flexible data workflow glue.}
61
62
  s.test_files = [
62
63
  "examples/pagerank/pagerank.rb",
@@ -64,7 +65,6 @@ Gem::Specification.new do |s|
64
65
  ]
65
66
 
66
67
  if s.respond_to? :specification_version then
67
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
68
68
  s.specification_version = 3
69
69
 
70
70
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
@@ -11,8 +11,9 @@ current_test = options['filesystem_to_test']
11
11
  describe "A new filesystem" do
12
12
 
13
13
  before do
14
- @test_path = "/tmp/rspec/test_path"
15
- @test_path2 = "/tmp/rspec/test_path2"
14
+ @test_path = "/tmp/rspec/test_path"
15
+ @test_path2 = "/tmp/rspec/test_path2"
16
+ @test_string = "@('_')@"
16
17
  @fs = Swineherd::FileSystem.get(current_test)
17
18
  end
18
19
 
@@ -31,6 +32,16 @@ describe "A new filesystem" do
31
32
  @fs.exists?(@test_path).should eql(false)
32
33
  end
33
34
 
35
+ it "should implement size" do
36
+ @fs.mkpath(File.dirname(@test_path))
37
+ fileobj = @fs.open(@test_path, 'w')
38
+ fileobj.write(@test_string)
39
+ fileobj.close
40
+ 7.should eql(@fs.size(@test_path))
41
+ @fs.rm(@test_path)
42
+ @fs.rm(File.dirname(@test_path))
43
+ end
44
+
34
45
  it "should be able to copy paths" do
35
46
  @fs.mkpath(@test_path)
36
47
  @fs.cp(@test_path, @test_path2)
@@ -38,8 +38,9 @@ current_test = 's3'
38
38
  describe "A new filesystem" do
39
39
 
40
40
  before do
41
- @test_path = "#{options['s3_test_bucket']}/tmp/rspec/test_path"
42
- @test_path2 = "#{options['s3_test_bucket']}/tmp/rspec/test_path2"
41
+ @test_path = "#{options['s3_test_bucket']}/tmp/rspec/test_path"
42
+ @test_path2 = "#{options['s3_test_bucket']}/tmp/rspec/test_path2"
43
+ @test_string = "@('_')@"
43
44
  @fs = Swineherd::FileSystem.get(current_test, options['aws_access_key_id'], options['aws_secret_access_key'])
44
45
  end
45
46
 
@@ -58,6 +59,16 @@ describe "A new filesystem" do
58
59
  @fs.exists?(@test_path).should eql(false)
59
60
  end
60
61
 
62
+ it "should implement size" do
63
+ @fs.mkpath(File.dirname(@test_path))
64
+ fileobj = @fs.open(@test_path, 'w')
65
+ fileobj.write(@test_string)
66
+ fileobj.close
67
+ 7.should eql(@fs.size(@test_path))
68
+ @fs.rm(@test_path)
69
+ @fs.rm(File.dirname(@test_path))
70
+ end
71
+
61
72
  it "should be able to copy paths" do
62
73
  @fs.mkpath(@test_path)
63
74
  @fs.cp(@test_path, @test_path2)
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: swineherd
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
5
- prerelease: false
4
+ hash: 27
5
+ prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 1
10
- version: 0.0.1
9
+ - 2
10
+ version: 0.0.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jacob Perkins
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-04-20 00:00:00 -05:00
18
+ date: 2011-06-01 00:00:00 +00:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -152,6 +152,7 @@ files:
152
152
  - lib/swineherd/filesystem/localfilesystem.rb
153
153
  - lib/swineherd/filesystem/localfs.rb
154
154
  - lib/swineherd/filesystem/s3filesystem.rb
155
+ - lib/swineherd/foo
155
156
  - lib/swineherd/script.rb
156
157
  - lib/swineherd/script/hadoop_script.rb
157
158
  - lib/swineherd/script/pig_script.rb
@@ -195,7 +196,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
195
196
  requirements: []
196
197
 
197
198
  rubyforge_project:
198
- rubygems_version: 1.3.7
199
+ rubygems_version: 1.4.2
199
200
  signing_key:
200
201
  specification_version: 3
201
202
  summary: Flexible data workflow glue.