shovel 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ == 0.0.1 2009-03-02
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
@@ -0,0 +1,8 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.rdoc
4
+ lib/dir_ext.rb
5
+ lib/flow.rb
6
+ lib/log.rb
7
+ lib/shovel.rb
8
+ spec/example.rb
@@ -0,0 +1,52 @@
1
+ = shovel
2
+
3
+ * Shovel http://bitbucket.org/mediashelf/shovel
4
+
5
+ == DESCRIPTION:
6
+
7
+ A domain specific language for configuring ingestor processes
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ This module takes care of the drudgery of directory scanning, archive extraction and tempfile management, allowing the developer to implement the fiddly bits without thinking about the tedious bits.
12
+
13
+ More archive formats could be pretty easily supported. Some refactoring of the archive extraction code is probably a good idea too.
14
+
15
+ == SYNOPSIS:
16
+
17
+ include Shovel
18
+
19
+ == REQUIREMENTS:
20
+
21
+ * directory_watcher
22
+ * rubyzip
23
+ * archive-tar-minitar
24
+
25
+ == INSTALL:
26
+
27
+ * sudo gem install
28
+
29
+ == LICENSE:
30
+
31
+ (The MIT License)
32
+
33
+ Copyright (c) 2009 Mediashelf LLC
34
+
35
+ Permission is hereby granted, free of charge, to any person obtaining
36
+ a copy of this software and associated documentation files (the
37
+ 'Software'), to deal in the Software without restriction, including
38
+ without limitation the rights to use, copy, modify, merge, publish,
39
+ distribute, sublicense, and/or sell copies of the Software, and to
40
+ permit persons to whom the Software is furnished to do so, subject to
41
+ the following conditions:
42
+
43
+ The above copyright notice and this permission notice shall be
44
+ included in all copies or substantial portions of the Software.
45
+
46
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
47
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
48
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
49
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
50
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
51
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
52
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,19 @@
1
+ require 'tmpdir'
2
+ class Dir
3
+ #a handy method to create a tmpdir, which cleans up after itself.
4
+ # Dir.mktmp do |dir|
5
+ # puts "i'm doing neat stuff"
6
+ # end
7
+ #
8
+ #Even if your block throws an exception, the temp directory will be deleted.
9
+ def self.mktmp(prefix="tmpdir", &block)
10
+ dirname = File.join(Dir.tmpdir, "%s.%s"%[prefix, Time.now.strftime("%s")])
11
+ FileUtils.mkdir_p dirname
12
+ begin
13
+ yield dirname
14
+ ensure
15
+ FileUtils.rm_rf dirname
16
+ end
17
+ dirname
18
+ end
19
+ end
@@ -0,0 +1,62 @@
1
+ require 'log'
2
+ require 'forwardable'
3
+ module MediaShelf
4
+ module Shovel
5
+ #This class represents a "workflow" for a file contained in an ingest file being
6
+ #processed by Shovel
7
+ #See +Shovel+ for more information and an example
8
+ class Flow
9
+ extend Forwardable
10
+ def_delegators Log.instance, :d, :e, :w, :i
11
+ attr_reader :reaction_map, :exception_map
12
+
13
+
14
+ def initialize #:nodoc:
15
+ @reaction_map={:stable=>[], :added=>[], :removed=>[], :modified=>[]}
16
+ @exception_map = {}
17
+ end
18
+ #add a process to the flow, to be executed by the Shovel when the
19
+ #supplied event occurrs.
20
+ #
21
+ #event:: the event type, one of :stable, :added, :removed or :modified
22
+ #block:: your process. the process will be passed hte file extracted \
23
+ #from your archive (most likely).
24
+ #
25
+ #This method (and on_exception) returns self, so calls
26
+ #to add_reaction can be chained.
27
+ def add_reaction(event, &block)
28
+ @reaction_map[event] << block
29
+ return self
30
+ end
31
+ #add a process to the flow, to be executed by the Shovel when the
32
+ #supplied Exception occurrs.
33
+ #klazz:: a class which might be raised at some point in the ingest process
34
+ #block:: what you want to do about it. The block is passed the file, \
35
+ #and the exception instance.
36
+ #
37
+ #This method (and add_reaction) returns self, so calls
38
+ #to add_reaction can be chained.
39
+ def on_exception(klazz, &block)
40
+ @exception_map[klazz.name.to_sym]=block
41
+ return self
42
+ end
43
+
44
+ #The exception handler. It will try to find processes mapped to
45
+ #the given exception, and invoke on them. If it can't find a handler
46
+ #it just logs the exception and bails.
47
+ def handle_exception(f,x)
48
+ if @exception_map[x.class.name.to_sym]
49
+ p = @exception_map[x.class.name.to_sym]
50
+ p.call(f, x)
51
+ else
52
+ log_exception(f,x)
53
+ end
54
+ end
55
+
56
+ def log_exception(f,x)#:nodoc:
57
+ e("Error : #{f} #{x.message}. Stack trace follows:")
58
+ e(x.backtrace.join("\n"))
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,34 @@
1
+ require 'singleton'
2
+ require 'logger'
3
+ module MediaShelf
4
+ # A singleton logger. That way we can use the same instance all over the place.
5
+ # Singleton claims to be threadsafe.
6
+ class Log
7
+ include Singleton
8
+ def silence!
9
+ @logger = Logger.new(File.open('/dev/null', 'w'))
10
+ end
11
+ def unsilence!
12
+ @logger = Logger.new(STDOUT)
13
+ end
14
+ def initialize
15
+ @logger = Logger.new(STDOUT)
16
+ @logger.datetime_format = "%y%m%d %H:%M:%S"
17
+ end
18
+ def d(msg)
19
+ @logger.debug msg if @logger.level == Logger::DEBUG
20
+ end
21
+ def e(msg)
22
+ @logger.error msg
23
+ end
24
+ def w(msg)
25
+ @logger.warn msg
26
+ end
27
+ def i(msg)
28
+ @logger.info msg
29
+ end
30
+ def f(msg)
31
+ @logger.fatal msg
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,137 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+ require 'rubygems'
4
+ gem 'directory_watcher'
5
+ gem 'rubyzip'
6
+ gem 'archive-tar-minitar'
7
+ require 'zip/zip'
8
+ require 'directory_watcher'
9
+ require 'log'
10
+ require 'flow'
11
+ require 'dir_ext'
12
+ require 'zlib'
13
+ require 'archive/tar/minitar'
14
+
15
+ module Shovel
16
+ VERSION = '0.0.1'
17
+ end
18
+
19
+ module MediaShelf
20
+
21
+ # = Shovel
22
+ # This module can be mixed into a class that would like to react to
23
+ # file events in a particular directory. For a complete
24
+ # example see spec/example.rb
25
+ # :include:../spec/example.rb
26
+ module Shovel
27
+ include Archive::Tar::Minitar
28
+
29
+ def d(s) #:nodoc:
30
+ Log.instance.d(s)
31
+ end
32
+
33
+
34
+ def update(*events)#:nodoc:
35
+ d("events received: #{events.inspect}")
36
+ events.each do |e|
37
+ d "found : "+@flow.reaction_map[e.type].inspect
38
+ @flow.reaction_map[e.type].each {|x|
39
+ begin
40
+ d("executing: #{x}")
41
+ x.call(e.path)
42
+ rescue Exception=>x
43
+ @flow.handle_exception(e.path, x)
44
+ end
45
+ }
46
+ end
47
+ end
48
+
49
+ #This method is used to configure the directory scanner. A +Flow+ object is returned,
50
+ #so constructs like
51
+ #
52
+ # configure_process.add_reaction.add_reaction
53
+ #
54
+ #can be created. This method will throw an exception if it's invoked
55
+ #more than once on the same instance.
56
+ #
57
+ #dir:: directory to scan, will be created if it doesn't exist
58
+ #interval:: how often to scan
59
+ #stable_cycles:: how many intervals should the file remain unchanged to fire a stable event
60
+ #glob:: array of globs to scan for
61
+ #pre_load:: whether to suppress file events for files that already exist when the scanner is started. Defaults to true
62
+ def configure_process(dir, interval, stable_cycles, glob, pre_load=true )
63
+ raise ArgumentError.new("cannot pass a regex in here, globs only please") if glob.is_a?(Regexp)
64
+ raise ConfigurationError unless @dw.nil?
65
+ Log.instance.d("configured to scan #{dir} every #{interval}s for #{glob.inspect}. pre loading existing files? #{pre_load}")
66
+ @dw = DirectoryWatcher.new(dir, :pre_load=>pre_load, :glob=>glob, :stable=>stable_cycles, :interval=>interval)
67
+ @flow = Flow.new
68
+ @dw.add_observer(self)
69
+ @flow
70
+
71
+ end
72
+
73
+ #Starts the directory scanner process.
74
+ def start
75
+ @dw.start
76
+ @dw.join
77
+ end
78
+
79
+ #this method knows how to extract files from a standard .zip file. The files
80
+ #themselves are extracted into an opaque tmp directory, which is cleaned up
81
+ #after the block is yielded.
82
+ #
83
+ #filename:: the name of the zip archive
84
+ #process:: A block which will be passed each entry in the zipfile for further pocessing
85
+ #
86
+ #Example:
87
+ # unzip('foo.zip'){|file| puts "i'm processing #{file}"}
88
+ def unzip(filename, &process)
89
+ d("unzipping #{filename}")
90
+ Dir.mktmp do |d|
91
+ Zip::ZipFile.foreach(filename) do |entry|
92
+ d "processing #{entry}"
93
+ FileUtils.mkdir_p(File.dirname(File.join(d, entry.name)))
94
+ entry.extract(File.join(d, entry.name)){|x| w "warn: #{x} exists!"}
95
+ yield File.join(d, entry.name)
96
+ end
97
+ end
98
+ end
99
+ #see +unzip+. Same thing, except for tar archives
100
+ def untar(filename, &process)
101
+ d("untarring #{filename}")
102
+ untar_gz(filename, false, &process )
103
+ end
104
+ #see +unzip+. Same thing, except for gzipped tar archives.
105
+ def untar_gz(filename, gzipped=true, &process)
106
+ d("targunzipping #{filename}")
107
+ tgz = gzipped ? Zlib::GzipReader.new(File.open(filename, 'rb')) : filename
108
+ Dir.mktmp do |d|
109
+ Archive::Tar::Minitar.unpack(tgz,d) do |event, name, stats|
110
+ yield File.join(d,name) if event == :file_done
111
+ end
112
+ end
113
+ end
114
+ #see +unzip+. This method will try to figure out what kind of archive you passed it,
115
+ #then call the appropriate method.
116
+ def extract(filename, &process)
117
+ case filename
118
+ when /(.*\.tar.gz|.*\.tgz)/
119
+ untar_gz(filename, &process)
120
+ when /.*\.zip/
121
+ unzip( filename, &process)
122
+ when /.*\.tar/
123
+ untar(filename, &process)
124
+ else
125
+ raise "Couldn't figure out what kind of archive #{filename} was..."
126
+ end
127
+ end
128
+
129
+ #Raised if configure_process is called more than once.
130
+ class ConfigurationError < Exception
131
+ def initialize
132
+ super("The shovel can only be configured once.")
133
+ end
134
+ end
135
+ end
136
+
137
+ end
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ gem 'shovel'
3
+ require 'ruby-debug'
4
+ require 'shovel'
5
+
6
+ class Example
7
+ #include the shovel mixin
8
+ include MediaShelf::Shovel
9
+
10
+ def initialize
11
+ super
12
+ #make the ingest directory.
13
+ root = File.dirname(__FILE__)
14
+ indir = File.join(root, 'ingest')
15
+ faildir= File.join(root, 'failboat')
16
+ FileUtils.mkdir_p faildir unless File.exist?(faildir)
17
+
18
+ #configure an ingest process to scan our ingest directory for compressed archives every 10s, consider an archive
19
+ #stable if it doesn't change in 10s, and don't suppress file events for files that are already in there(!)
20
+ flow = configure_process(indir, 4, 1, %w(**/*.zip **/*.tgz **/*.tar.gz **/*.tar), false)
21
+ #let's react to stable events by...
22
+ flow.add_reaction(:stable) do |file|
23
+ #extracting the file...
24
+ extract(file) do |inner|
25
+ #and doing some stuff to it
26
+ puts "i'm doing stuff to #{inner} of size #{File.size(inner)}"
27
+ #and possibly raising an exception, so we can demonstrate exception handling
28
+ raise "This file is Exceptional! !" if file =~ /.*\.gz/
29
+ end
30
+ end
31
+ flow.add_reaction(:stable) do |file|
32
+ #now let's delete it. these reactions are executed _in_order!
33
+ FileUtils.rm(file, :verbose=>true)
34
+ end
35
+ flow.add_reaction(:removed) do |file|
36
+ d("so long #{file}!")
37
+ end
38
+
39
+
40
+ flow.on_exception(RuntimeError) do |f, e|
41
+ d "URF!, an exception was thrown on #{f}! Welcome to the FAILboat!"
42
+ d e.message
43
+ FileUtils.mv(f, File.join(faildir, "%s.%d"%[File.basename(f),Time.now]), :verbose=>true)
44
+ end
45
+ end
46
+
47
+
48
+ end
49
+
50
+ #create our example class
51
+ ex = Example.new
52
+ #and start it.
53
+ ex.start
metadata ADDED
@@ -0,0 +1,133 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: shovel
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - McClain Looney
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-04 00:00:00 -06:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: directory_watcher
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.1.2
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: rubyzip
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.9.1
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: archive-tar-minitar
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 0.5.2
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: newgem
47
+ type: :development
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 1.2.3
54
+ version:
55
+ - !ruby/object:Gem::Dependency
56
+ name: mocha
57
+ type: :development
58
+ version_requirement:
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: 0.9.5
64
+ version:
65
+ - !ruby/object:Gem::Dependency
66
+ name: thoughtbot-shoulda
67
+ type: :development
68
+ version_requirement:
69
+ version_requirements: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: 2.9.1
74
+ version:
75
+ - !ruby/object:Gem::Dependency
76
+ name: hoe
77
+ type: :development
78
+ version_requirement:
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: 1.8.0
84
+ version:
85
+ description: A domain specific language for configuring ingestor processes
86
+ email:
87
+ - mcclain.looney@yourmediashelf.com
88
+ executables: []
89
+
90
+ extensions: []
91
+
92
+ extra_rdoc_files:
93
+ - History.txt
94
+ - Manifest.txt
95
+ - README.rdoc
96
+ files:
97
+ - History.txt
98
+ - Manifest.txt
99
+ - README.rdoc
100
+ - lib/dir_ext.rb
101
+ - lib/flow.rb
102
+ - lib/log.rb
103
+ - lib/shovel.rb
104
+ - spec/example.rb
105
+ has_rdoc: true
106
+ homepage: Shovel http://bitbucket.org/mediashelf/shovel
107
+ post_install_message:
108
+ rdoc_options:
109
+ - --main
110
+ - README.rdoc
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: "0"
118
+ version:
119
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: "0"
124
+ version:
125
+ requirements: []
126
+
127
+ rubyforge_project: rubyfedora
128
+ rubygems_version: 1.3.1
129
+ signing_key:
130
+ specification_version: 2
131
+ summary: A domain specific language for configuring ingestor processes
132
+ test_files: []
133
+