unbreakable 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,10 +6,10 @@ require 'dragonfly'
6
6
  # You may enhance a datastore with {Decorators} and {Observers}: for example,
7
7
  # a {Decorators::Timeout Timeout} decorator to retry on timeout with exponential
8
8
  # backoff and a {Observers::Log Log} observer which logs retrieval progress.
9
- # Of course, you must also define a {Processors::Transform Processor} to turn
10
- # your raw data into machine-readable data.
9
+ # Of course, you must also define a {Processors Processor} to turn your raw data
10
+ # into machine-readable data.
11
11
  #
12
- # A skeleton scraper:
12
+ # A simple skeleton scraper:
13
13
  #
14
14
  # require 'unbreakable'
15
15
  #
@@ -40,7 +40,24 @@ require 'dragonfly'
40
40
  #
41
41
  # Every scraper script can run as a command-line script. Try it!
42
42
  #
43
- # ruby myscraper.rb
43
+ # $ ruby myscraper.rb
44
+ # usage: irb [options] <command> [<args>]
45
+ #
46
+ # The most commonly used commands are:
47
+ # retrieve Cache remote files to the datastore for later processing
48
+ # process Process cached files into machine-readable data
49
+ # config Print the current configuration
50
+ #
51
+ # Specific options:
52
+ # --root_path ARG default "/var/tmp/unbreakable"
53
+ # --[no-]store_meta default true
54
+ # --cache_duration ARG default 31536000
55
+ # --fallback_mime_type ARG default "application/octet-stream"
56
+ # --secret ARG default "secret yo"
57
+ # --[no-]trust_file_extensions default true
58
+ #
59
+ # General options:
60
+ # -h, --help Display this screen
44
61
  module Unbreakable
45
62
  autoload :Scraper, 'unbreakable/scraper'
46
63
 
@@ -1,6 +1,51 @@
1
1
  module Unbreakable
2
+ # Processors are {http://markevans.github.com/dragonfly/file.Processing.html
3
+ # Dragonfly} processors. For example:
4
+ #
5
+ # class MyProcessor
6
+ # def coolify(temp_object, opts = {})
7
+ # SomeLib.coolify(temp_object.data, opts)
8
+ # end
9
+ #
10
+ # def uglify(temp_object, ugliness)
11
+ # `uglify -i #{temp_object.path} -u #{ugliness}`
12
+ # end
13
+ #
14
+ # def conditional(temp_object, format, pages)
15
+ # throw :unable_to_handle unless format == :pdf
16
+ # # do stuff
17
+ # end
18
+ #
19
+ # private
20
+ #
21
+ # def my_helper_method
22
+ # # do stuff
23
+ # end
24
+ # end
25
+ # MyScraper.processor.register MyProcessor
26
+ #
27
+ # Public methods must return an object with which a +TempObject+ may be
28
+ # initialized (+String+, +File+, +Tempfile+, +Pathname+ or +TempObject+).
29
+ #
30
+ # You can raise +Dragonfly::Configurable::NotConfigured+ if a configurable
31
+ # variable is required but missing. If a variable is invalid, you can raise
32
+ # +Dragonfly::Configurable::BadConfigAttribute+.
33
+ #
34
+ # If a process has dependencies or conditions, then you can test for these
35
+ # conditions and throw +:unable_to_handle+ to skip processing.
36
+ #
37
+ # If multiple processors define a public method by the same name, the methods
38
+ # will be run in reverse order from the last processor to define the method
39
+ # until one fails to throw +:unable_to_handle+. If all raise an error, then
40
+ # +Dragonfly::FunctionManager::UnableToHandle+ will be thrown.
41
+ #
42
+ # As such, if you are writing a document to plain-text converter, you can
43
+ # write a pdftotext processor, a doctopdf processor, etc. which all define
44
+ # a +to_text+ public method, and use +:unable_to_handle+ to make sure the
45
+ # correct processor runs.
2
46
  module Processors
3
- # You may implement a transform process by subclassing this class:
47
+ # If you are writing a simple scraper and only need one processor, you may
48
+ # implement a single +transform+ processor method by subclassing this class:
4
49
  #
5
50
  # require 'nokogiri'
6
51
  # class MyProcessor < Unbreakable::Processors::Transform
@@ -21,8 +66,10 @@ module Unbreakable
21
66
  # * +perform+
22
67
  # * +persist+
23
68
  #
24
- # You may also override +transform+, which calls +perform+ and +persist+ in
25
- # the default implementation, but you probably won't have to.
69
+ # +transform+ calls +persist+ with the output of +perform+. This makes it
70
+ # easy for others to subclass your processor and just change the +persist+
71
+ # method to change the external database, for example, while still taking
72
+ # advantage of the hard work done by +perform+.
26
73
  class Transform
27
74
  include Dragonfly::Configurable
28
75
  include Dragonfly::Loggable
@@ -2,7 +2,7 @@ require 'forwardable'
2
2
  require 'optparse'
3
3
  require 'securerandom'
4
4
 
5
- require 'active_support/inflector/methods'
5
+ require 'active_support/core_ext/class/attribute_accessors'
6
6
 
7
7
  module Unbreakable
8
8
  # You may implement a scraper by subclassing this class:
@@ -44,6 +44,9 @@ module Unbreakable
44
44
  def_delegators :@app, :add_child_configurable, :configure, :datastore,
45
45
  :fetch, :log, :processor
46
46
 
47
+ cattr_accessor :commands
48
+ @@commands = []
49
+
47
50
  # Initializes a Dragonfly app for storage and processing.
48
51
  def initialize
49
52
  @app = Dragonfly[SecureRandom.hex.to_sym]
@@ -73,10 +76,12 @@ The most commonly used commands are:
73
76
 
74
77
  @opts.separator ''
75
78
  @opts.separator 'Specific options:'
79
+ specific_options
76
80
  extract_configuration @app
77
81
 
78
82
  @opts.separator ''
79
83
  @opts.separator 'General options:'
84
+ general_options
80
85
  @opts.on_tail('-h', '--help', 'Display this screen') do
81
86
  puts @opts
82
87
  exit
@@ -85,6 +90,24 @@ The most commonly used commands are:
85
90
  @opts
86
91
  end
87
92
 
93
+ # def specific_options
94
+ # @opts.on('--echo ARG', 'Write a string to standard output') do |x|
95
+ # puts x
96
+ # end
97
+ # end
98
+ #
99
+ # @abstract Override to add specific options to the option parser.
100
+ def specific_options; end
101
+
102
+ # def general_options
103
+ # @opts.on('--echo ARG', 'Write a string to standard output') do |x|
104
+ # puts x
105
+ # end
106
+ # end
107
+ #
108
+ # @abstract Override to add general options to the option parser.
109
+ def general_options; end
110
+
88
111
  # Runs the command. Most often run from a command-line script as:
89
112
  #
90
113
  # scraper.run(ARGV)
@@ -104,7 +127,12 @@ The most commonly used commands are:
104
127
  when nil
105
128
  puts opts
106
129
  else
107
- opts.abort "'#{command}' is not a #{opts.program_name} command. See '#{opts.program_name} --help'."
130
+ # Allow subclasses to add more commands.
131
+ if self.commands.include? command.to_sym
132
+ send command, args
133
+ else
134
+ opts.abort "'#{command}' is not a #{opts.program_name} command. See '#{opts.program_name} --help'."
135
+ end
108
136
  end
109
137
  end
110
138
 
@@ -171,17 +199,18 @@ The most commonly used commands are:
171
199
 
172
200
  # @param [#configuration] object
173
201
  def extract_configuration(object)
174
- object.default_configuration.merge(object.configuration).each do |key,value|
175
- if true === value or false === value
176
- @opts.on("--[no-]#{key}", "default #{value.inspect}") do |x|
177
- object.send "#{key}=", x
202
+ object.config_methods.each do |meth|
203
+ default = object.configuration[meth] || object.default_configuration[meth]
204
+ if true === default or false === default
205
+ @opts.on("--[no-]#{meth}", "default #{default.inspect}") do |x|
206
+ object.configure{|c| c.send "#{meth}=", x}
178
207
  end
179
- elsif String === value or Fixnum === value
180
- @opts.on("--#{key} ARG", "default #{value.inspect}") do |x|
181
- object.send "#{key}=", x
208
+ elsif String === default or Fixnum === default
209
+ @opts.on("--#{meth} ARG", "default #{default.inspect}") do |x|
210
+ object.configure{|c| c.send "#{meth}=", x}
182
211
  end
183
- elsif object != value and value.respond_to? :configuration
184
- extract_configuration value
212
+ elsif object != default and default.respond_to? :configuration
213
+ extract_configuration default
185
214
  end
186
215
  end
187
216
  end
@@ -190,11 +219,12 @@ The most commonly used commands are:
190
219
  def print_configuration(object, indent = 0)
191
220
  indentation = ' ' * indent
192
221
  puts "#{indentation}#{object.class.name}:"
193
- object.default_configuration.merge(object.configuration).each do |key,value|
194
- if true === value or false === value or String === value or Fixnum === value
195
- puts " #{indentation}#{key.to_s.ljust 25 - indent}#{value.inspect}"
196
- elsif object != value and value.respond_to? :configuration
197
- print_configuration value, indent + 2
222
+ object.config_methods.each do |meth|
223
+ default = object.configuration[meth] || object.default_configuration[meth]
224
+ if true === default or false === default or String === default or Fixnum === default
225
+ puts " #{indentation}#{meth.to_s.ljust 25 - indent}#{default.inspect}"
226
+ elsif object != default and default.respond_to? :configuration
227
+ print_configuration default, indent + 2
198
228
  end
199
229
  end
200
230
  end
@@ -1,3 +1,3 @@
1
1
  module Unbreakable
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -20,6 +20,6 @@ Gem::Specification.new do |s|
20
20
  s.require_paths = ["lib"]
21
21
 
22
22
  s.add_runtime_dependency('activesupport', '~> 3.1.0')
23
- s.add_runtime_dependency('dragonfly', '~> 0.9.5')
23
+ s.add_runtime_dependency('dragonfly', '~> 0.9.8')
24
24
  s.add_development_dependency('rspec', '~> 2.6.0')
25
25
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unbreakable
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-08 00:00:00.000000000Z
12
+ date: 2011-09-09 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
16
- requirement: &70251392277560 !ruby/object:Gem::Requirement
16
+ requirement: &70196241006160 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,21 +21,21 @@ dependencies:
21
21
  version: 3.1.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70251392277560
24
+ version_requirements: *70196241006160
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: dragonfly
27
- requirement: &70251392277060 !ruby/object:Gem::Requirement
27
+ requirement: &70196240974480 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
31
31
  - !ruby/object:Gem::Version
32
- version: 0.9.5
32
+ version: 0.9.8
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70251392277060
35
+ version_requirements: *70196240974480
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rspec
38
- requirement: &70251392276600 !ruby/object:Gem::Requirement
38
+ requirement: &70196240973840 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: 2.6.0
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70251392276600
46
+ version_requirements: *70196240973840
47
47
  description: Abstracts and bulletproofs common scraping tasks.
48
48
  email:
49
49
  - info@opennorth.ca