shovel 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ == 0.0.1 2009-03-02
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
@@ -0,0 +1,8 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.rdoc
4
+ lib/dir_ext.rb
5
+ lib/flow.rb
6
+ lib/log.rb
7
+ lib/shovel.rb
8
+ spec/example.rb
@@ -0,0 +1,52 @@
1
+ = shovel
2
+
3
+ * Shovel http://bitbucket.org/mediashelf/shovel
4
+
5
+ == DESCRIPTION:
6
+
7
+ A domain specific language for configuring ingestor processes
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ This module takes care of the drudgery of directory scanning, archive extraction and tempfile management, allowing the developer to implement the fiddly bits without thinking about the tedious bits.
12
+
13
+ More archive formats could be pretty easily supported. Some refactoring of the archive extraction code is probably a good idea too.
14
+
15
+ == SYNOPSIS:
16
+
17
+ include Shovel
18
+
19
+ == REQUIREMENTS:
20
+
21
+ * directory_watcher
22
+ * rubyzip
23
+ * archive-tar-minitar
24
+
25
+ == INSTALL:
26
+
27
+ * sudo gem install
28
+
29
+ == LICENSE:
30
+
31
+ (The MIT License)
32
+
33
+ Copyright (c) 2009 Mediashelf LLC
34
+
35
+ Permission is hereby granted, free of charge, to any person obtaining
36
+ a copy of this software and associated documentation files (the
37
+ 'Software'), to deal in the Software without restriction, including
38
+ without limitation the rights to use, copy, modify, merge, publish,
39
+ distribute, sublicense, and/or sell copies of the Software, and to
40
+ permit persons to whom the Software is furnished to do so, subject to
41
+ the following conditions:
42
+
43
+ The above copyright notice and this permission notice shall be
44
+ included in all copies or substantial portions of the Software.
45
+
46
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
47
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
48
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
49
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
50
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
51
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
52
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,19 @@
1
+ require 'tmpdir'
2
+ class Dir
3
+ #a handy method to create a tmpdir, which cleans up after itself.
4
+ # Dir.mktmp do |dir|
5
+ # puts "i'm doing neat stuff"
6
+ # end
7
+ #
8
+ #Even if your block throws an exception, the temp directory will be deleted.
9
+ def self.mktmp(prefix="tmpdir", &block)
10
+ dirname = File.join(Dir.tmpdir, "%s.%s"%[prefix, Time.now.strftime("%s")])
11
+ FileUtils.mkdir_p dirname
12
+ begin
13
+ yield dirname
14
+ ensure
15
+ FileUtils.rm_rf dirname
16
+ end
17
+ dirname
18
+ end
19
+ end
@@ -0,0 +1,62 @@
1
+ require 'log'
2
+ require 'forwardable'
3
+ module MediaShelf
4
+ module Shovel
5
+ #This class represents a "workflow" for a file contained in an ingest file being
6
+ #processed by Shovel
7
+ #See +Shovel+ for more information and an example
8
+ class Flow
9
+ extend Forwardable
10
+ def_delegators Log.instance, :d, :e, :w, :i
11
+ attr_reader :reaction_map, :exception_map
12
+
13
+
14
+ def initialize #:nodoc:
15
+ @reaction_map={:stable=>[], :added=>[], :removed=>[], :modified=>[]}
16
+ @exception_map = {}
17
+ end
18
+ #add a process to the flow, to be executed by the Shovel when the
19
+ #supplied event occurrs.
20
+ #
21
+ #event:: the event type, one of :stable, :added, :removed or :modified
22
+ #block:: your process. the process will be passed hte file extracted \
23
+ #from your archive (most likely).
24
+ #
25
+ #This method (and on_exception) returns self, so calls
26
+ #to add_reaction can be chained.
27
+ def add_reaction(event, &block)
28
+ @reaction_map[event] << block
29
+ return self
30
+ end
31
+ #add a process to the flow, to be executed by the Shovel when the
32
+ #supplied Exception occurrs.
33
+ #klazz:: a class which might be raised at some point in the ingest process
34
+ #block:: what you want to do about it. The block is passed the file, \
35
+ #and the exception instance.
36
+ #
37
+ #This method (and add_reaction) returns self, so calls
38
+ #to add_reaction can be chained.
39
+ def on_exception(klazz, &block)
40
+ @exception_map[klazz.name.to_sym]=block
41
+ return self
42
+ end
43
+
44
+ #The exception handler. It will try to find processes mapped to
45
+ #the given exception, and invoke on them. If it can't find a handler
46
+ #it just logs the exception and bails.
47
+ def handle_exception(f,x)
48
+ if @exception_map[x.class.name.to_sym]
49
+ p = @exception_map[x.class.name.to_sym]
50
+ p.call(f, x)
51
+ else
52
+ log_exception(f,x)
53
+ end
54
+ end
55
+
56
+ def log_exception(f,x)#:nodoc:
57
+ e("Error : #{f} #{x.message}. Stack trace follows:")
58
+ e(x.backtrace.join("\n"))
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,34 @@
1
+ require 'singleton'
2
+ require 'logger'
3
+ module MediaShelf
4
+ # A singleton logger. That way we can use the same instance all over the place.
5
+ # Singleton claims to be threadsafe.
6
+ class Log
7
+ include Singleton
8
+ def silence!
9
+ @logger = Logger.new(File.open('/dev/null', 'w'))
10
+ end
11
+ def unsilence!
12
+ @logger = Logger.new(STDOUT)
13
+ end
14
+ def initialize
15
+ @logger = Logger.new(STDOUT)
16
+ @logger.datetime_format = "%y%m%d %H:%M:%S"
17
+ end
18
+ def d(msg)
19
+ @logger.debug msg if @logger.level == Logger::DEBUG
20
+ end
21
+ def e(msg)
22
+ @logger.error msg
23
+ end
24
+ def w(msg)
25
+ @logger.warn msg
26
+ end
27
+ def i(msg)
28
+ @logger.info msg
29
+ end
30
+ def f(msg)
31
+ @logger.fatal msg
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,137 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+ require 'rubygems'
4
+ gem 'directory_watcher'
5
+ gem 'rubyzip'
6
+ gem 'archive-tar-minitar'
7
+ require 'zip/zip'
8
+ require 'directory_watcher'
9
+ require 'log'
10
+ require 'flow'
11
+ require 'dir_ext'
12
+ require 'zlib'
13
+ require 'archive/tar/minitar'
14
+
15
+ module Shovel
16
+ VERSION = '0.0.1'
17
+ end
18
+
19
+ module MediaShelf
20
+
21
+ # = Shovel
22
+ # This module can be mixed into a class that would like to react to
23
+ # file events in a particular directory. For a complete
24
+ # example see spec/example.rb
25
+ # :include:../spec/example.rb
26
+ module Shovel
27
+ include Archive::Tar::Minitar
28
+
29
+ def d(s) #:nodoc:
30
+ Log.instance.d(s)
31
+ end
32
+
33
+
34
+ def update(*events)#:nodoc:
35
+ d("events received: #{events.inspect}")
36
+ events.each do |e|
37
+ d "found : "+@flow.reaction_map[e.type].inspect
38
+ @flow.reaction_map[e.type].each {|x|
39
+ begin
40
+ d("executing: #{x}")
41
+ x.call(e.path)
42
+ rescue Exception=>x
43
+ @flow.handle_exception(e.path, x)
44
+ end
45
+ }
46
+ end
47
+ end
48
+
49
+ #This method is used to configure the directory scanner. A +Flow+ object is returned,
50
+ #so constructs like
51
+ #
52
+ # configure_process.add_reaction.add_reaction
53
+ #
54
+ #can be created. This method will throw an exception if it's invoked
55
+ #more than once on the same instance.
56
+ #
57
+ #dir:: directory to scan, will be created if it doesn't exist
58
+ #interval:: how often to scan
59
+ #stable_cycles:: how many intervals should the file remain unchanged to fire a stable event
60
+ #glob:: array of globs to scan for
61
+ #pre_load:: whether to suppress file events for files that already exist when the scanner is started. Defaults to true
62
+ def configure_process(dir, interval, stable_cycles, glob, pre_load=true )
63
+ raise ArgumentError.new("cannot pass a regex in here, globs only please") if glob.is_a?(Regexp)
64
+ raise ConfigurationError unless @dw.nil?
65
+ Log.instance.d("configured to scan #{dir} every #{interval}s for #{glob.inspect}. pre loading existing files? #{pre_load}")
66
+ @dw = DirectoryWatcher.new(dir, :pre_load=>pre_load, :glob=>glob, :stable=>stable_cycles, :interval=>interval)
67
+ @flow = Flow.new
68
+ @dw.add_observer(self)
69
+ @flow
70
+
71
+ end
72
+
73
+ #Starts the directory scanner process.
74
+ def start
75
+ @dw.start
76
+ @dw.join
77
+ end
78
+
79
+ #this method knows how to extract files from a standard .zip file. The files
80
+ #themselves are extracted into an opaque tmp directory, which is cleaned up
81
+ #after the block is yielded.
82
+ #
83
+ #filename:: the name of the zip archive
84
+ #process:: A block which will be passed each entry in the zipfile for further pocessing
85
+ #
86
+ #Example:
87
+ # unzip('foo.zip'){|file| puts "i'm processing #{file}"}
88
+ def unzip(filename, &process)
89
+ d("unzipping #{filename}")
90
+ Dir.mktmp do |d|
91
+ Zip::ZipFile.foreach(filename) do |entry|
92
+ d "processing #{entry}"
93
+ FileUtils.mkdir_p(File.dirname(File.join(d, entry.name)))
94
+ entry.extract(File.join(d, entry.name)){|x| w "warn: #{x} exists!"}
95
+ yield File.join(d, entry.name)
96
+ end
97
+ end
98
+ end
99
+ #see +unzip+. Same thing, except for tar archives
100
+ def untar(filename, &process)
101
+ d("untarring #{filename}")
102
+ untar_gz(filename, false, &process )
103
+ end
104
+ #see +unzip+. Same thing, except for gzipped tar archives.
105
+ def untar_gz(filename, gzipped=true, &process)
106
+ d("targunzipping #{filename}")
107
+ tgz = gzipped ? Zlib::GzipReader.new(File.open(filename, 'rb')) : filename
108
+ Dir.mktmp do |d|
109
+ Archive::Tar::Minitar.unpack(tgz,d) do |event, name, stats|
110
+ yield File.join(d,name) if event == :file_done
111
+ end
112
+ end
113
+ end
114
+ #see +unzip+. This method will try to figure out what kind of archive you passed it,
115
+ #then call the appropriate method.
116
+ def extract(filename, &process)
117
+ case filename
118
+ when /(.*\.tar.gz|.*\.tgz)/
119
+ untar_gz(filename, &process)
120
+ when /.*\.zip/
121
+ unzip( filename, &process)
122
+ when /.*\.tar/
123
+ untar(filename, &process)
124
+ else
125
+ raise "Couldn't figure out what kind of archive #{filename} was..."
126
+ end
127
+ end
128
+
129
+ #Raised if configure_process is called more than once.
130
+ class ConfigurationError < Exception
131
+ def initialize
132
+ super("The shovel can only be configured once.")
133
+ end
134
+ end
135
+ end
136
+
137
+ end
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ gem 'shovel'
3
+ require 'ruby-debug'
4
+ require 'shovel'
5
+
6
+ class Example
7
+ #include the shovel mixin
8
+ include MediaShelf::Shovel
9
+
10
+ def initialize
11
+ super
12
+ #make the ingest directory.
13
+ root = File.dirname(__FILE__)
14
+ indir = File.join(root, 'ingest')
15
+ faildir= File.join(root, 'failboat')
16
+ FileUtils.mkdir_p faildir unless File.exist?(faildir)
17
+
18
+ #configure an ingest process to scan our ingest directory for compressed archives every 10s, consider an archive
19
+ #stable if it doesn't change in 10s, and don't suppress file events for files that are already in there(!)
20
+ flow = configure_process(indir, 4, 1, %w(**/*.zip **/*.tgz **/*.tar.gz **/*.tar), false)
21
+ #let's react to stable events by...
22
+ flow.add_reaction(:stable) do |file|
23
+ #extracting the file...
24
+ extract(file) do |inner|
25
+ #and doing some stuff to it
26
+ puts "i'm doing stuff to #{inner} of size #{File.size(inner)}"
27
+ #and possibly raising an exception, so we can demonstrate exception handling
28
+ raise "This file is Exceptional! !" if file =~ /.*\.gz/
29
+ end
30
+ end
31
+ flow.add_reaction(:stable) do |file|
32
+ #now let's delete it. these reactions are executed _in_order!
33
+ FileUtils.rm(file, :verbose=>true)
34
+ end
35
+ flow.add_reaction(:removed) do |file|
36
+ d("so long #{file}!")
37
+ end
38
+
39
+
40
+ flow.on_exception(RuntimeError) do |f, e|
41
+ d "URF!, an exception was thrown on #{f}! Welcome to the FAILboat!"
42
+ d e.message
43
+ FileUtils.mv(f, File.join(faildir, "%s.%d"%[File.basename(f),Time.now]), :verbose=>true)
44
+ end
45
+ end
46
+
47
+
48
+ end
49
+
50
+ #create our example class
51
+ ex = Example.new
52
+ #and start it.
53
+ ex.start
metadata ADDED
@@ -0,0 +1,133 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: shovel
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - McClain Looney
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-04 00:00:00 -06:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: directory_watcher
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.1.2
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: rubyzip
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.9.1
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: archive-tar-minitar
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 0.5.2
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: newgem
47
+ type: :development
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 1.2.3
54
+ version:
55
+ - !ruby/object:Gem::Dependency
56
+ name: mocha
57
+ type: :development
58
+ version_requirement:
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: 0.9.5
64
+ version:
65
+ - !ruby/object:Gem::Dependency
66
+ name: thoughtbot-shoulda
67
+ type: :development
68
+ version_requirement:
69
+ version_requirements: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: 2.9.1
74
+ version:
75
+ - !ruby/object:Gem::Dependency
76
+ name: hoe
77
+ type: :development
78
+ version_requirement:
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: 1.8.0
84
+ version:
85
+ description: A domain specific language for configuring ingestor processes
86
+ email:
87
+ - mcclain.looney@yourmediashelf.com
88
+ executables: []
89
+
90
+ extensions: []
91
+
92
+ extra_rdoc_files:
93
+ - History.txt
94
+ - Manifest.txt
95
+ - README.rdoc
96
+ files:
97
+ - History.txt
98
+ - Manifest.txt
99
+ - README.rdoc
100
+ - lib/dir_ext.rb
101
+ - lib/flow.rb
102
+ - lib/log.rb
103
+ - lib/shovel.rb
104
+ - spec/example.rb
105
+ has_rdoc: true
106
+ homepage: Shovel http://bitbucket.org/mediashelf/shovel
107
+ post_install_message:
108
+ rdoc_options:
109
+ - --main
110
+ - README.rdoc
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: "0"
118
+ version:
119
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: "0"
124
+ version:
125
+ requirements: []
126
+
127
+ rubyforge_project: rubyfedora
128
+ rubygems_version: 1.3.1
129
+ signing_key:
130
+ specification_version: 2
131
+ summary: A domain specific language for configuring ingestor processes
132
+ test_files: []
133
+