shovel 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +8 -0
- data/README.rdoc +52 -0
- data/lib/dir_ext.rb +19 -0
- data/lib/flow.rb +62 -0
- data/lib/log.rb +34 -0
- data/lib/shovel.rb +137 -0
- data/spec/example.rb +53 -0
- metadata +133 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
= shovel
|
2
|
+
|
3
|
+
* Shovel http://bitbucket.org/mediashelf/shovel
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
A domain specific language for configuring ingestor processes
|
8
|
+
|
9
|
+
== FEATURES/PROBLEMS:
|
10
|
+
|
11
|
+
This module takes care of the drudgery of directory scanning, archive extraction and tempfile management, allowing the developer to implement the fiddly bits without thinking about the tedious bits.
|
12
|
+
|
13
|
+
More archive formats could be pretty easily supported. Some refactoring of the archive extraction code is probably a good idea too.
|
14
|
+
|
15
|
+
== SYNOPSIS:
|
16
|
+
|
17
|
+
include Shovel
|
18
|
+
|
19
|
+
== REQUIREMENTS:
|
20
|
+
|
21
|
+
* directory_watcher
|
22
|
+
* rubyzip
|
23
|
+
* archive-tar-minitar
|
24
|
+
|
25
|
+
== INSTALL:
|
26
|
+
|
27
|
+
* sudo gem install
|
28
|
+
|
29
|
+
== LICENSE:
|
30
|
+
|
31
|
+
(The MIT License)
|
32
|
+
|
33
|
+
Copyright (c) 2009 Mediashelf LLC
|
34
|
+
|
35
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
36
|
+
a copy of this software and associated documentation files (the
|
37
|
+
'Software'), to deal in the Software without restriction, including
|
38
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
39
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
40
|
+
permit persons to whom the Software is furnished to do so, subject to
|
41
|
+
the following conditions:
|
42
|
+
|
43
|
+
The above copyright notice and this permission notice shall be
|
44
|
+
included in all copies or substantial portions of the Software.
|
45
|
+
|
46
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
47
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
48
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
49
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
50
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
51
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
52
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/lib/dir_ext.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'tmpdir'
|
2
|
+
class Dir
|
3
|
+
#a handy method to create a tmpdir, which cleans up after itself.
|
4
|
+
# Dir.mktmp do |dir|
|
5
|
+
# puts "i'm doing neat stuff"
|
6
|
+
# end
|
7
|
+
#
|
8
|
+
#Even if your block throws an exception, the temp directory will be deleted.
|
9
|
+
def self.mktmp(prefix="tmpdir", &block)
|
10
|
+
dirname = File.join(Dir.tmpdir, "%s.%s"%[prefix, Time.now.strftime("%s")])
|
11
|
+
FileUtils.mkdir_p dirname
|
12
|
+
begin
|
13
|
+
yield dirname
|
14
|
+
ensure
|
15
|
+
FileUtils.rm_rf dirname
|
16
|
+
end
|
17
|
+
dirname
|
18
|
+
end
|
19
|
+
end
|
data/lib/flow.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'log'
|
2
|
+
require 'forwardable'
|
3
|
+
module MediaShelf
|
4
|
+
module Shovel
|
5
|
+
#This class represents a "workflow" for a file contained in an ingest file being
|
6
|
+
#processed by Shovel
|
7
|
+
#See +Shovel+ for more information and an example
|
8
|
+
class Flow
|
9
|
+
extend Forwardable
|
10
|
+
def_delegators Log.instance, :d, :e, :w, :i
|
11
|
+
attr_reader :reaction_map, :exception_map
|
12
|
+
|
13
|
+
|
14
|
+
def initialize #:nodoc:
|
15
|
+
@reaction_map={:stable=>[], :added=>[], :removed=>[], :modified=>[]}
|
16
|
+
@exception_map = {}
|
17
|
+
end
|
18
|
+
#add a process to the flow, to be executed by the Shovel when the
|
19
|
+
#supplied event occurrs.
|
20
|
+
#
|
21
|
+
#event:: the event type, one of :stable, :added, :removed or :modified
|
22
|
+
#block:: your process. the process will be passed hte file extracted \
|
23
|
+
#from your archive (most likely).
|
24
|
+
#
|
25
|
+
#This method (and on_exception) returns self, so calls
|
26
|
+
#to add_reaction can be chained.
|
27
|
+
def add_reaction(event, &block)
|
28
|
+
@reaction_map[event] << block
|
29
|
+
return self
|
30
|
+
end
|
31
|
+
#add a process to the flow, to be executed by the Shovel when the
|
32
|
+
#supplied Exception occurrs.
|
33
|
+
#klazz:: a class which might be raised at some point in the ingest process
|
34
|
+
#block:: what you want to do about it. The block is passed the file, \
|
35
|
+
#and the exception instance.
|
36
|
+
#
|
37
|
+
#This method (and add_reaction) returns self, so calls
|
38
|
+
#to add_reaction can be chained.
|
39
|
+
def on_exception(klazz, &block)
|
40
|
+
@exception_map[klazz.name.to_sym]=block
|
41
|
+
return self
|
42
|
+
end
|
43
|
+
|
44
|
+
#The exception handler. It will try to find processes mapped to
|
45
|
+
#the given exception, and invoke on them. If it can't find a handler
|
46
|
+
#it just logs the exception and bails.
|
47
|
+
def handle_exception(f,x)
|
48
|
+
if @exception_map[x.class.name.to_sym]
|
49
|
+
p = @exception_map[x.class.name.to_sym]
|
50
|
+
p.call(f, x)
|
51
|
+
else
|
52
|
+
log_exception(f,x)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def log_exception(f,x)#:nodoc:
|
57
|
+
e("Error : #{f} #{x.message}. Stack trace follows:")
|
58
|
+
e(x.backtrace.join("\n"))
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
data/lib/log.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
require 'logger'
|
3
|
+
module MediaShelf
|
4
|
+
# A singleton logger. That way we can use the same instance all over the place.
|
5
|
+
# Singleton claims to be threadsafe.
|
6
|
+
class Log
|
7
|
+
include Singleton
|
8
|
+
def silence!
|
9
|
+
@logger = Logger.new(File.open('/dev/null', 'w'))
|
10
|
+
end
|
11
|
+
def unsilence!
|
12
|
+
@logger = Logger.new(STDOUT)
|
13
|
+
end
|
14
|
+
def initialize
|
15
|
+
@logger = Logger.new(STDOUT)
|
16
|
+
@logger.datetime_format = "%y%m%d %H:%M:%S"
|
17
|
+
end
|
18
|
+
def d(msg)
|
19
|
+
@logger.debug msg if @logger.level == Logger::DEBUG
|
20
|
+
end
|
21
|
+
def e(msg)
|
22
|
+
@logger.error msg
|
23
|
+
end
|
24
|
+
def w(msg)
|
25
|
+
@logger.warn msg
|
26
|
+
end
|
27
|
+
def i(msg)
|
28
|
+
@logger.info msg
|
29
|
+
end
|
30
|
+
def f(msg)
|
31
|
+
@logger.fatal msg
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/shovel.rb
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless
|
2
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
|
+
require 'rubygems'
|
4
|
+
gem 'directory_watcher'
|
5
|
+
gem 'rubyzip'
|
6
|
+
gem 'archive-tar-minitar'
|
7
|
+
require 'zip/zip'
|
8
|
+
require 'directory_watcher'
|
9
|
+
require 'log'
|
10
|
+
require 'flow'
|
11
|
+
require 'dir_ext'
|
12
|
+
require 'zlib'
|
13
|
+
require 'archive/tar/minitar'
|
14
|
+
|
15
|
+
module Shovel
|
16
|
+
VERSION = '0.0.1'
|
17
|
+
end
|
18
|
+
|
19
|
+
module MediaShelf
|
20
|
+
|
21
|
+
# = Shovel
|
22
|
+
# This module can be mixed into a class that would like to react to
|
23
|
+
# file events in a particular directory. For a complete
|
24
|
+
# example see spec/example.rb
|
25
|
+
# :include:../spec/example.rb
|
26
|
+
module Shovel
|
27
|
+
include Archive::Tar::Minitar
|
28
|
+
|
29
|
+
def d(s) #:nodoc:
|
30
|
+
Log.instance.d(s)
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def update(*events)#:nodoc:
|
35
|
+
d("events received: #{events.inspect}")
|
36
|
+
events.each do |e|
|
37
|
+
d "found : "+@flow.reaction_map[e.type].inspect
|
38
|
+
@flow.reaction_map[e.type].each {|x|
|
39
|
+
begin
|
40
|
+
d("executing: #{x}")
|
41
|
+
x.call(e.path)
|
42
|
+
rescue Exception=>x
|
43
|
+
@flow.handle_exception(e.path, x)
|
44
|
+
end
|
45
|
+
}
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
#This method is used to configure the directory scanner. A +Flow+ object is returned,
|
50
|
+
#so constructs like
|
51
|
+
#
|
52
|
+
# configure_process.add_reaction.add_reaction
|
53
|
+
#
|
54
|
+
#can be created. This method will throw an exception if it's invoked
|
55
|
+
#more than once on the same instance.
|
56
|
+
#
|
57
|
+
#dir:: directory to scan, will be created if it doesn't exist
|
58
|
+
#interval:: how often to scan
|
59
|
+
#stable_cycles:: how many intervals should the file remain unchanged to fire a stable event
|
60
|
+
#glob:: array of globs to scan for
|
61
|
+
#pre_load:: whether to suppress file events for files that already exist when the scanner is started. Defaults to true
|
62
|
+
def configure_process(dir, interval, stable_cycles, glob, pre_load=true )
|
63
|
+
raise ArgumentError.new("cannot pass a regex in here, globs only please") if glob.is_a?(Regexp)
|
64
|
+
raise ConfigurationError unless @dw.nil?
|
65
|
+
Log.instance.d("configured to scan #{dir} every #{interval}s for #{glob.inspect}. pre loading existing files? #{pre_load}")
|
66
|
+
@dw = DirectoryWatcher.new(dir, :pre_load=>pre_load, :glob=>glob, :stable=>stable_cycles, :interval=>interval)
|
67
|
+
@flow = Flow.new
|
68
|
+
@dw.add_observer(self)
|
69
|
+
@flow
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
#Starts the directory scanner process.
|
74
|
+
def start
|
75
|
+
@dw.start
|
76
|
+
@dw.join
|
77
|
+
end
|
78
|
+
|
79
|
+
#this method knows how to extract files from a standard .zip file. The files
|
80
|
+
#themselves are extracted into an opaque tmp directory, which is cleaned up
|
81
|
+
#after the block is yielded.
|
82
|
+
#
|
83
|
+
#filename:: the name of the zip archive
|
84
|
+
#process:: A block which will be passed each entry in the zipfile for further pocessing
|
85
|
+
#
|
86
|
+
#Example:
|
87
|
+
# unzip('foo.zip'){|file| puts "i'm processing #{file}"}
|
88
|
+
def unzip(filename, &process)
|
89
|
+
d("unzipping #{filename}")
|
90
|
+
Dir.mktmp do |d|
|
91
|
+
Zip::ZipFile.foreach(filename) do |entry|
|
92
|
+
d "processing #{entry}"
|
93
|
+
FileUtils.mkdir_p(File.dirname(File.join(d, entry.name)))
|
94
|
+
entry.extract(File.join(d, entry.name)){|x| w "warn: #{x} exists!"}
|
95
|
+
yield File.join(d, entry.name)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
#see +unzip+. Same thing, except for tar archives
|
100
|
+
def untar(filename, &process)
|
101
|
+
d("untarring #{filename}")
|
102
|
+
untar_gz(filename, false, &process )
|
103
|
+
end
|
104
|
+
#see +unzip+. Same thing, except for gzipped tar archives.
|
105
|
+
def untar_gz(filename, gzipped=true, &process)
|
106
|
+
d("targunzipping #{filename}")
|
107
|
+
tgz = gzipped ? Zlib::GzipReader.new(File.open(filename, 'rb')) : filename
|
108
|
+
Dir.mktmp do |d|
|
109
|
+
Archive::Tar::Minitar.unpack(tgz,d) do |event, name, stats|
|
110
|
+
yield File.join(d,name) if event == :file_done
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
#see +unzip+. This method will try to figure out what kind of archive you passed it,
|
115
|
+
#then call the appropriate method.
|
116
|
+
def extract(filename, &process)
|
117
|
+
case filename
|
118
|
+
when /(.*\.tar.gz|.*\.tgz)/
|
119
|
+
untar_gz(filename, &process)
|
120
|
+
when /.*\.zip/
|
121
|
+
unzip( filename, &process)
|
122
|
+
when /.*\.tar/
|
123
|
+
untar(filename, &process)
|
124
|
+
else
|
125
|
+
raise "Couldn't figure out what kind of archive #{filename} was..."
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
#Raised if configure_process is called more than once.
|
130
|
+
class ConfigurationError < Exception
|
131
|
+
def initialize
|
132
|
+
super("The shovel can only be configured once.")
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
data/spec/example.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'shovel'
|
3
|
+
require 'ruby-debug'
|
4
|
+
require 'shovel'
|
5
|
+
|
6
|
+
class Example
|
7
|
+
#include the shovel mixin
|
8
|
+
include MediaShelf::Shovel
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
super
|
12
|
+
#make the ingest directory.
|
13
|
+
root = File.dirname(__FILE__)
|
14
|
+
indir = File.join(root, 'ingest')
|
15
|
+
faildir= File.join(root, 'failboat')
|
16
|
+
FileUtils.mkdir_p faildir unless File.exist?(faildir)
|
17
|
+
|
18
|
+
#configure an ingest process to scan our ingest directory for compressed archives every 10s, consider an archive
|
19
|
+
#stable if it doesn't change in 10s, and don't suppress file events for files that are already in there(!)
|
20
|
+
flow = configure_process(indir, 4, 1, %w(**/*.zip **/*.tgz **/*.tar.gz **/*.tar), false)
|
21
|
+
#let's react to stable events by...
|
22
|
+
flow.add_reaction(:stable) do |file|
|
23
|
+
#extracting the file...
|
24
|
+
extract(file) do |inner|
|
25
|
+
#and doing some stuff to it
|
26
|
+
puts "i'm doing stuff to #{inner} of size #{File.size(inner)}"
|
27
|
+
#and possibly raising an exception, so we can demonstrate exception handling
|
28
|
+
raise "This file is Exceptional! !" if file =~ /.*\.gz/
|
29
|
+
end
|
30
|
+
end
|
31
|
+
flow.add_reaction(:stable) do |file|
|
32
|
+
#now let's delete it. these reactions are executed _in_order!
|
33
|
+
FileUtils.rm(file, :verbose=>true)
|
34
|
+
end
|
35
|
+
flow.add_reaction(:removed) do |file|
|
36
|
+
d("so long #{file}!")
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
flow.on_exception(RuntimeError) do |f, e|
|
41
|
+
d "URF!, an exception was thrown on #{f}! Welcome to the FAILboat!"
|
42
|
+
d e.message
|
43
|
+
FileUtils.mv(f, File.join(faildir, "%s.%d"%[File.basename(f),Time.now]), :verbose=>true)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
#create our example class
|
51
|
+
ex = Example.new
|
52
|
+
#and start it.
|
53
|
+
ex.start
|
metadata
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: shovel
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- McClain Looney
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-03-04 00:00:00 -06:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: directory_watcher
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.1.2
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rubyzip
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.9.1
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: archive-tar-minitar
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 0.5.2
|
44
|
+
version:
|
45
|
+
- !ruby/object:Gem::Dependency
|
46
|
+
name: newgem
|
47
|
+
type: :development
|
48
|
+
version_requirement:
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 1.2.3
|
54
|
+
version:
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: mocha
|
57
|
+
type: :development
|
58
|
+
version_requirement:
|
59
|
+
version_requirements: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: 0.9.5
|
64
|
+
version:
|
65
|
+
- !ruby/object:Gem::Dependency
|
66
|
+
name: thoughtbot-shoulda
|
67
|
+
type: :development
|
68
|
+
version_requirement:
|
69
|
+
version_requirements: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: 2.9.1
|
74
|
+
version:
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: hoe
|
77
|
+
type: :development
|
78
|
+
version_requirement:
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: 1.8.0
|
84
|
+
version:
|
85
|
+
description: A domain specific language for configuring ingestor processes
|
86
|
+
email:
|
87
|
+
- mcclain.looney@yourmediashelf.com
|
88
|
+
executables: []
|
89
|
+
|
90
|
+
extensions: []
|
91
|
+
|
92
|
+
extra_rdoc_files:
|
93
|
+
- History.txt
|
94
|
+
- Manifest.txt
|
95
|
+
- README.rdoc
|
96
|
+
files:
|
97
|
+
- History.txt
|
98
|
+
- Manifest.txt
|
99
|
+
- README.rdoc
|
100
|
+
- lib/dir_ext.rb
|
101
|
+
- lib/flow.rb
|
102
|
+
- lib/log.rb
|
103
|
+
- lib/shovel.rb
|
104
|
+
- spec/example.rb
|
105
|
+
has_rdoc: true
|
106
|
+
homepage: Shovel http://bitbucket.org/mediashelf/shovel
|
107
|
+
post_install_message:
|
108
|
+
rdoc_options:
|
109
|
+
- --main
|
110
|
+
- README.rdoc
|
111
|
+
require_paths:
|
112
|
+
- lib
|
113
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: "0"
|
118
|
+
version:
|
119
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: "0"
|
124
|
+
version:
|
125
|
+
requirements: []
|
126
|
+
|
127
|
+
rubyforge_project: rubyfedora
|
128
|
+
rubygems_version: 1.3.1
|
129
|
+
signing_key:
|
130
|
+
specification_version: 2
|
131
|
+
summary: A domain specific language for configuring ingestor processes
|
132
|
+
test_files: []
|
133
|
+
|