shovel 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +8 -0
- data/README.rdoc +52 -0
- data/lib/dir_ext.rb +19 -0
- data/lib/flow.rb +62 -0
- data/lib/log.rb +34 -0
- data/lib/shovel.rb +137 -0
- data/spec/example.rb +53 -0
- metadata +133 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
= shovel
|
2
|
+
|
3
|
+
* Shovel http://bitbucket.org/mediashelf/shovel
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
A domain specific language for configuring ingestor processes
|
8
|
+
|
9
|
+
== FEATURES/PROBLEMS:
|
10
|
+
|
11
|
+
This module takes care of the drudgery of directory scanning, archive extraction and tempfile management, allowing the developer to implement the fiddly bits without thinking about the tedious bits.
|
12
|
+
|
13
|
+
More archive formats could be pretty easily supported. Some refactoring of the archive extraction code is probably a good idea too.
|
14
|
+
|
15
|
+
== SYNOPSIS:
|
16
|
+
|
17
|
+
include Shovel
|
18
|
+
|
19
|
+
== REQUIREMENTS:
|
20
|
+
|
21
|
+
* directory_watcher
|
22
|
+
* rubyzip
|
23
|
+
* archive-tar-minitar
|
24
|
+
|
25
|
+
== INSTALL:
|
26
|
+
|
27
|
+
* sudo gem install
|
28
|
+
|
29
|
+
== LICENSE:
|
30
|
+
|
31
|
+
(The MIT License)
|
32
|
+
|
33
|
+
Copyright (c) 2009 Mediashelf LLC
|
34
|
+
|
35
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
36
|
+
a copy of this software and associated documentation files (the
|
37
|
+
'Software'), to deal in the Software without restriction, including
|
38
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
39
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
40
|
+
permit persons to whom the Software is furnished to do so, subject to
|
41
|
+
the following conditions:
|
42
|
+
|
43
|
+
The above copyright notice and this permission notice shall be
|
44
|
+
included in all copies or substantial portions of the Software.
|
45
|
+
|
46
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
47
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
48
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
49
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
50
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
51
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
52
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/lib/dir_ext.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'tmpdir'
|
2
|
+
class Dir
|
3
|
+
#a handy method to create a tmpdir, which cleans up after itself.
|
4
|
+
# Dir.mktmp do |dir|
|
5
|
+
# puts "i'm doing neat stuff"
|
6
|
+
# end
|
7
|
+
#
|
8
|
+
#Even if your block throws an exception, the temp directory will be deleted.
|
9
|
+
def self.mktmp(prefix="tmpdir", &block)
|
10
|
+
dirname = File.join(Dir.tmpdir, "%s.%s"%[prefix, Time.now.strftime("%s")])
|
11
|
+
FileUtils.mkdir_p dirname
|
12
|
+
begin
|
13
|
+
yield dirname
|
14
|
+
ensure
|
15
|
+
FileUtils.rm_rf dirname
|
16
|
+
end
|
17
|
+
dirname
|
18
|
+
end
|
19
|
+
end
|
data/lib/flow.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'log'
|
2
|
+
require 'forwardable'
|
3
|
+
module MediaShelf
|
4
|
+
module Shovel
|
5
|
+
#This class represents a "workflow" for a file contained in an ingest file being
|
6
|
+
#processed by Shovel
|
7
|
+
#See +Shovel+ for more information and an example
|
8
|
+
class Flow
|
9
|
+
extend Forwardable
|
10
|
+
def_delegators Log.instance, :d, :e, :w, :i
|
11
|
+
attr_reader :reaction_map, :exception_map
|
12
|
+
|
13
|
+
|
14
|
+
def initialize #:nodoc:
|
15
|
+
@reaction_map={:stable=>[], :added=>[], :removed=>[], :modified=>[]}
|
16
|
+
@exception_map = {}
|
17
|
+
end
|
18
|
+
#add a process to the flow, to be executed by the Shovel when the
|
19
|
+
#supplied event occurrs.
|
20
|
+
#
|
21
|
+
#event:: the event type, one of :stable, :added, :removed or :modified
|
22
|
+
#block:: your process. the process will be passed hte file extracted \
|
23
|
+
#from your archive (most likely).
|
24
|
+
#
|
25
|
+
#This method (and on_exception) returns self, so calls
|
26
|
+
#to add_reaction can be chained.
|
27
|
+
def add_reaction(event, &block)
|
28
|
+
@reaction_map[event] << block
|
29
|
+
return self
|
30
|
+
end
|
31
|
+
#add a process to the flow, to be executed by the Shovel when the
|
32
|
+
#supplied Exception occurrs.
|
33
|
+
#klazz:: a class which might be raised at some point in the ingest process
|
34
|
+
#block:: what you want to do about it. The block is passed the file, \
|
35
|
+
#and the exception instance.
|
36
|
+
#
|
37
|
+
#This method (and add_reaction) returns self, so calls
|
38
|
+
#to add_reaction can be chained.
|
39
|
+
def on_exception(klazz, &block)
|
40
|
+
@exception_map[klazz.name.to_sym]=block
|
41
|
+
return self
|
42
|
+
end
|
43
|
+
|
44
|
+
#The exception handler. It will try to find processes mapped to
|
45
|
+
#the given exception, and invoke on them. If it can't find a handler
|
46
|
+
#it just logs the exception and bails.
|
47
|
+
def handle_exception(f,x)
|
48
|
+
if @exception_map[x.class.name.to_sym]
|
49
|
+
p = @exception_map[x.class.name.to_sym]
|
50
|
+
p.call(f, x)
|
51
|
+
else
|
52
|
+
log_exception(f,x)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def log_exception(f,x)#:nodoc:
|
57
|
+
e("Error : #{f} #{x.message}. Stack trace follows:")
|
58
|
+
e(x.backtrace.join("\n"))
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
data/lib/log.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
require 'logger'
|
3
|
+
module MediaShelf
|
4
|
+
# A singleton logger. That way we can use the same instance all over the place.
|
5
|
+
# Singleton claims to be threadsafe.
|
6
|
+
class Log
|
7
|
+
include Singleton
|
8
|
+
def silence!
|
9
|
+
@logger = Logger.new(File.open('/dev/null', 'w'))
|
10
|
+
end
|
11
|
+
def unsilence!
|
12
|
+
@logger = Logger.new(STDOUT)
|
13
|
+
end
|
14
|
+
def initialize
|
15
|
+
@logger = Logger.new(STDOUT)
|
16
|
+
@logger.datetime_format = "%y%m%d %H:%M:%S"
|
17
|
+
end
|
18
|
+
def d(msg)
|
19
|
+
@logger.debug msg if @logger.level == Logger::DEBUG
|
20
|
+
end
|
21
|
+
def e(msg)
|
22
|
+
@logger.error msg
|
23
|
+
end
|
24
|
+
def w(msg)
|
25
|
+
@logger.warn msg
|
26
|
+
end
|
27
|
+
def i(msg)
|
28
|
+
@logger.info msg
|
29
|
+
end
|
30
|
+
def f(msg)
|
31
|
+
@logger.fatal msg
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/shovel.rb
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless
|
2
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
|
+
require 'rubygems'
|
4
|
+
gem 'directory_watcher'
|
5
|
+
gem 'rubyzip'
|
6
|
+
gem 'archive-tar-minitar'
|
7
|
+
require 'zip/zip'
|
8
|
+
require 'directory_watcher'
|
9
|
+
require 'log'
|
10
|
+
require 'flow'
|
11
|
+
require 'dir_ext'
|
12
|
+
require 'zlib'
|
13
|
+
require 'archive/tar/minitar'
|
14
|
+
|
15
|
+
module Shovel
|
16
|
+
VERSION = '0.0.1'
|
17
|
+
end
|
18
|
+
|
19
|
+
module MediaShelf
|
20
|
+
|
21
|
+
# = Shovel
|
22
|
+
# This module can be mixed into a class that would like to react to
|
23
|
+
# file events in a particular directory. For a complete
|
24
|
+
# example see spec/example.rb
|
25
|
+
# :include:../spec/example.rb
|
26
|
+
module Shovel
|
27
|
+
include Archive::Tar::Minitar
|
28
|
+
|
29
|
+
def d(s) #:nodoc:
|
30
|
+
Log.instance.d(s)
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def update(*events)#:nodoc:
|
35
|
+
d("events received: #{events.inspect}")
|
36
|
+
events.each do |e|
|
37
|
+
d "found : "+@flow.reaction_map[e.type].inspect
|
38
|
+
@flow.reaction_map[e.type].each {|x|
|
39
|
+
begin
|
40
|
+
d("executing: #{x}")
|
41
|
+
x.call(e.path)
|
42
|
+
rescue Exception=>x
|
43
|
+
@flow.handle_exception(e.path, x)
|
44
|
+
end
|
45
|
+
}
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
#This method is used to configure the directory scanner. A +Flow+ object is returned,
|
50
|
+
#so constructs like
|
51
|
+
#
|
52
|
+
# configure_process.add_reaction.add_reaction
|
53
|
+
#
|
54
|
+
#can be created. This method will throw an exception if it's invoked
|
55
|
+
#more than once on the same instance.
|
56
|
+
#
|
57
|
+
#dir:: directory to scan, will be created if it doesn't exist
|
58
|
+
#interval:: how often to scan
|
59
|
+
#stable_cycles:: how many intervals should the file remain unchanged to fire a stable event
|
60
|
+
#glob:: array of globs to scan for
|
61
|
+
#pre_load:: whether to suppress file events for files that already exist when the scanner is started. Defaults to true
|
62
|
+
def configure_process(dir, interval, stable_cycles, glob, pre_load=true )
|
63
|
+
raise ArgumentError.new("cannot pass a regex in here, globs only please") if glob.is_a?(Regexp)
|
64
|
+
raise ConfigurationError unless @dw.nil?
|
65
|
+
Log.instance.d("configured to scan #{dir} every #{interval}s for #{glob.inspect}. pre loading existing files? #{pre_load}")
|
66
|
+
@dw = DirectoryWatcher.new(dir, :pre_load=>pre_load, :glob=>glob, :stable=>stable_cycles, :interval=>interval)
|
67
|
+
@flow = Flow.new
|
68
|
+
@dw.add_observer(self)
|
69
|
+
@flow
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
#Starts the directory scanner process.
|
74
|
+
def start
|
75
|
+
@dw.start
|
76
|
+
@dw.join
|
77
|
+
end
|
78
|
+
|
79
|
+
#this method knows how to extract files from a standard .zip file. The files
|
80
|
+
#themselves are extracted into an opaque tmp directory, which is cleaned up
|
81
|
+
#after the block is yielded.
|
82
|
+
#
|
83
|
+
#filename:: the name of the zip archive
|
84
|
+
#process:: A block which will be passed each entry in the zipfile for further pocessing
|
85
|
+
#
|
86
|
+
#Example:
|
87
|
+
# unzip('foo.zip'){|file| puts "i'm processing #{file}"}
|
88
|
+
def unzip(filename, &process)
|
89
|
+
d("unzipping #{filename}")
|
90
|
+
Dir.mktmp do |d|
|
91
|
+
Zip::ZipFile.foreach(filename) do |entry|
|
92
|
+
d "processing #{entry}"
|
93
|
+
FileUtils.mkdir_p(File.dirname(File.join(d, entry.name)))
|
94
|
+
entry.extract(File.join(d, entry.name)){|x| w "warn: #{x} exists!"}
|
95
|
+
yield File.join(d, entry.name)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
#see +unzip+. Same thing, except for tar archives
|
100
|
+
def untar(filename, &process)
|
101
|
+
d("untarring #{filename}")
|
102
|
+
untar_gz(filename, false, &process )
|
103
|
+
end
|
104
|
+
#see +unzip+. Same thing, except for gzipped tar archives.
|
105
|
+
def untar_gz(filename, gzipped=true, &process)
|
106
|
+
d("targunzipping #{filename}")
|
107
|
+
tgz = gzipped ? Zlib::GzipReader.new(File.open(filename, 'rb')) : filename
|
108
|
+
Dir.mktmp do |d|
|
109
|
+
Archive::Tar::Minitar.unpack(tgz,d) do |event, name, stats|
|
110
|
+
yield File.join(d,name) if event == :file_done
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
#see +unzip+. This method will try to figure out what kind of archive you passed it,
|
115
|
+
#then call the appropriate method.
|
116
|
+
def extract(filename, &process)
|
117
|
+
case filename
|
118
|
+
when /(.*\.tar.gz|.*\.tgz)/
|
119
|
+
untar_gz(filename, &process)
|
120
|
+
when /.*\.zip/
|
121
|
+
unzip( filename, &process)
|
122
|
+
when /.*\.tar/
|
123
|
+
untar(filename, &process)
|
124
|
+
else
|
125
|
+
raise "Couldn't figure out what kind of archive #{filename} was..."
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
#Raised if configure_process is called more than once.
|
130
|
+
class ConfigurationError < Exception
|
131
|
+
def initialize
|
132
|
+
super("The shovel can only be configured once.")
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
data/spec/example.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'shovel'
|
3
|
+
require 'ruby-debug'
|
4
|
+
require 'shovel'
|
5
|
+
|
6
|
+
class Example
|
7
|
+
#include the shovel mixin
|
8
|
+
include MediaShelf::Shovel
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
super
|
12
|
+
#make the ingest directory.
|
13
|
+
root = File.dirname(__FILE__)
|
14
|
+
indir = File.join(root, 'ingest')
|
15
|
+
faildir= File.join(root, 'failboat')
|
16
|
+
FileUtils.mkdir_p faildir unless File.exist?(faildir)
|
17
|
+
|
18
|
+
#configure an ingest process to scan our ingest directory for compressed archives every 10s, consider an archive
|
19
|
+
#stable if it doesn't change in 10s, and don't suppress file events for files that are already in there(!)
|
20
|
+
flow = configure_process(indir, 4, 1, %w(**/*.zip **/*.tgz **/*.tar.gz **/*.tar), false)
|
21
|
+
#let's react to stable events by...
|
22
|
+
flow.add_reaction(:stable) do |file|
|
23
|
+
#extracting the file...
|
24
|
+
extract(file) do |inner|
|
25
|
+
#and doing some stuff to it
|
26
|
+
puts "i'm doing stuff to #{inner} of size #{File.size(inner)}"
|
27
|
+
#and possibly raising an exception, so we can demonstrate exception handling
|
28
|
+
raise "This file is Exceptional! !" if file =~ /.*\.gz/
|
29
|
+
end
|
30
|
+
end
|
31
|
+
flow.add_reaction(:stable) do |file|
|
32
|
+
#now let's delete it. these reactions are executed _in_order!
|
33
|
+
FileUtils.rm(file, :verbose=>true)
|
34
|
+
end
|
35
|
+
flow.add_reaction(:removed) do |file|
|
36
|
+
d("so long #{file}!")
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
flow.on_exception(RuntimeError) do |f, e|
|
41
|
+
d "URF!, an exception was thrown on #{f}! Welcome to the FAILboat!"
|
42
|
+
d e.message
|
43
|
+
FileUtils.mv(f, File.join(faildir, "%s.%d"%[File.basename(f),Time.now]), :verbose=>true)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
#create our example class
|
51
|
+
ex = Example.new
|
52
|
+
#and start it.
|
53
|
+
ex.start
|
metadata
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: shovel
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- McClain Looney
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-03-04 00:00:00 -06:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: directory_watcher
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.1.2
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rubyzip
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.9.1
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: archive-tar-minitar
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 0.5.2
|
44
|
+
version:
|
45
|
+
- !ruby/object:Gem::Dependency
|
46
|
+
name: newgem
|
47
|
+
type: :development
|
48
|
+
version_requirement:
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 1.2.3
|
54
|
+
version:
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: mocha
|
57
|
+
type: :development
|
58
|
+
version_requirement:
|
59
|
+
version_requirements: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: 0.9.5
|
64
|
+
version:
|
65
|
+
- !ruby/object:Gem::Dependency
|
66
|
+
name: thoughtbot-shoulda
|
67
|
+
type: :development
|
68
|
+
version_requirement:
|
69
|
+
version_requirements: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: 2.9.1
|
74
|
+
version:
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: hoe
|
77
|
+
type: :development
|
78
|
+
version_requirement:
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: 1.8.0
|
84
|
+
version:
|
85
|
+
description: A domain specific language for configuring ingestor processes
|
86
|
+
email:
|
87
|
+
- mcclain.looney@yourmediashelf.com
|
88
|
+
executables: []
|
89
|
+
|
90
|
+
extensions: []
|
91
|
+
|
92
|
+
extra_rdoc_files:
|
93
|
+
- History.txt
|
94
|
+
- Manifest.txt
|
95
|
+
- README.rdoc
|
96
|
+
files:
|
97
|
+
- History.txt
|
98
|
+
- Manifest.txt
|
99
|
+
- README.rdoc
|
100
|
+
- lib/dir_ext.rb
|
101
|
+
- lib/flow.rb
|
102
|
+
- lib/log.rb
|
103
|
+
- lib/shovel.rb
|
104
|
+
- spec/example.rb
|
105
|
+
has_rdoc: true
|
106
|
+
homepage: Shovel http://bitbucket.org/mediashelf/shovel
|
107
|
+
post_install_message:
|
108
|
+
rdoc_options:
|
109
|
+
- --main
|
110
|
+
- README.rdoc
|
111
|
+
require_paths:
|
112
|
+
- lib
|
113
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: "0"
|
118
|
+
version:
|
119
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: "0"
|
124
|
+
version:
|
125
|
+
requirements: []
|
126
|
+
|
127
|
+
rubyforge_project: rubyfedora
|
128
|
+
rubygems_version: 1.3.1
|
129
|
+
signing_key:
|
130
|
+
specification_version: 2
|
131
|
+
summary: A domain specific language for configuring ingestor processes
|
132
|
+
test_files: []
|
133
|
+
|