unbreakable 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,10 +6,10 @@ require 'dragonfly'
6
6
  # You may enhance a datastore with {Decorators} and {Observers}: for example,
7
7
  # a {Decorators::Timeout Timeout} decorator to retry on timeout with exponential
8
8
  # backoff and a {Observers::Log Log} observer which logs retrieval progress.
9
- # Of course, you must also define a {Processors::Transform Processor} to turn
10
- # your raw data into machine-readable data.
9
+ # Of course, you must also define a {Processors Processor} to turn your raw data
10
+ # into machine-readable data.
11
11
  #
12
- # A skeleton scraper:
12
+ # A simple skeleton scraper:
13
13
  #
14
14
  # require 'unbreakable'
15
15
  #
@@ -40,7 +40,24 @@ require 'dragonfly'
40
40
  #
41
41
  # Every scraper script can run as a command-line script. Try it!
42
42
  #
43
- # ruby myscraper.rb
43
+ # $ ruby myscraper.rb
44
+ # usage: irb [options] <command> [<args>]
45
+ #
46
+ # The most commonly used commands are:
47
+ # retrieve Cache remote files to the datastore for later processing
48
+ # process Process cached files into machine-readable data
49
+ # config Print the current configuration
50
+ #
51
+ # Specific options:
52
+ # --root_path ARG default "/var/tmp/unbreakable"
53
+ # --[no-]store_meta default true
54
+ # --cache_duration ARG default 31536000
55
+ # --fallback_mime_type ARG default "application/octet-stream"
56
+ # --secret ARG default "secret yo"
57
+ # --[no-]trust_file_extensions default true
58
+ #
59
+ # General options:
60
+ # -h, --help Display this screen
44
61
  module Unbreakable
45
62
  autoload :Scraper, 'unbreakable/scraper'
46
63
 
@@ -1,6 +1,51 @@
1
1
  module Unbreakable
2
+ # Processors are {http://markevans.github.com/dragonfly/file.Processing.html
3
+ # Dragonfly} processors. For example:
4
+ #
5
+ # class MyProcessor
6
+ # def coolify(temp_object, opts = {})
7
+ # SomeLib.coolify(temp_object.data, opts)
8
+ # end
9
+ #
10
+ # def uglify(temp_object, ugliness)
11
+ # `uglify -i #{temp_object.path} -u #{ugliness}`
12
+ # end
13
+ #
14
+ # def conditional(temp_object, format, pages)
15
+ # throw :unable_to_handle unless format == :pdf
16
+ # # do stuff
17
+ # end
18
+ #
19
+ # private
20
+ #
21
+ # def my_helper_method
22
+ # # do stuff
23
+ # end
24
+ # end
25
+ # MyScraper.processor.register MyProcessor
26
+ #
27
+ # Public methods must return an object with which a +TempObject+ may be
28
+ # initialized (+String+, +File+, +Tempfile+, +Pathname+ or +TempObject+).
29
+ #
30
+ # You can raise +Dragonfly::Configurable::NotConfigured+ if a configurable
31
+ # variable is required but missing. If a variable is invalid, you can raise
32
+ # +Dragonfly::Configurable::BadConfigAttribute+.
33
+ #
34
+ # If a process has dependencies or conditions, then you can test for these
35
+ # conditions and throw +:unable_to_handle+ to skip processing.
36
+ #
37
+ # If multiple processors define a public method by the same name, the methods
38
+ # will be run in reverse order from the last processor to define the method
39
+ # until one fails to throw +:unable_to_handle+. If all raise an error, then
40
+ # +Dragonfly::FunctionManager::UnableToHandle+ will be thrown.
41
+ #
42
+ # As such, if you are writing a document to plain-text converter, you can
43
+ # write a pdftotext processor, a doctopdf processor, etc. which all define
44
+ # a +to_text+ public method, and use +:unable_to_handle+ to make sure the
45
+ # correct processor runs.
2
46
  module Processors
3
- # You may implement a transform process by subclassing this class:
47
+ # If you are writing a simple scraper and only need one processor, you may
48
+ # implement a single +transform+ processor method by subclassing this class:
4
49
  #
5
50
  # require 'nokogiri'
6
51
  # class MyProcessor < Unbreakable::Processors::Transform
@@ -21,8 +66,10 @@ module Unbreakable
21
66
  # * +perform+
22
67
  # * +persist+
23
68
  #
24
- # You may also override +transform+, which calls +perform+ and +persist+ in
25
- # the default implementation, but you probably won't have to.
69
+ # +transform+ calls +persist+ with the output of +perform+. This makes it
70
+ # easy for others to subclass your processor and just change the +persist+
71
+ # method to change the external database, for example, while still taking
72
+ # advantage of the hard work done by +perform+.
26
73
  class Transform
27
74
  include Dragonfly::Configurable
28
75
  include Dragonfly::Loggable
@@ -2,7 +2,7 @@ require 'forwardable'
2
2
  require 'optparse'
3
3
  require 'securerandom'
4
4
 
5
- require 'active_support/inflector/methods'
5
+ require 'active_support/core_ext/class/attribute_accessors'
6
6
 
7
7
  module Unbreakable
8
8
  # You may implement a scraper by subclassing this class:
@@ -44,6 +44,9 @@ module Unbreakable
44
44
  def_delegators :@app, :add_child_configurable, :configure, :datastore,
45
45
  :fetch, :log, :processor
46
46
 
47
+ cattr_accessor :commands
48
+ @@commands = []
49
+
47
50
  # Initializes a Dragonfly app for storage and processing.
48
51
  def initialize
49
52
  @app = Dragonfly[SecureRandom.hex.to_sym]
@@ -73,10 +76,12 @@ The most commonly used commands are:
73
76
 
74
77
  @opts.separator ''
75
78
  @opts.separator 'Specific options:'
79
+ specific_options
76
80
  extract_configuration @app
77
81
 
78
82
  @opts.separator ''
79
83
  @opts.separator 'General options:'
84
+ general_options
80
85
  @opts.on_tail('-h', '--help', 'Display this screen') do
81
86
  puts @opts
82
87
  exit
@@ -85,6 +90,24 @@ The most commonly used commands are:
85
90
  @opts
86
91
  end
87
92
 
93
+ # def specific_options
94
+ # @opts.on('--echo ARG', 'Write a string to standard output') do |x|
95
+ # puts x
96
+ # end
97
+ # end
98
+ #
99
+ # @abstract Override to add specific options to the option parser.
100
+ def specific_options; end
101
+
102
+ # def general_options
103
+ # @opts.on('--echo ARG', 'Write a string to standard output') do |x|
104
+ # puts x
105
+ # end
106
+ # end
107
+ #
108
+ # @abstract Override to add general options to the option parser.
109
+ def general_options; end
110
+
88
111
  # Runs the command. Most often run from a command-line script as:
89
112
  #
90
113
  # scraper.run(ARGV)
@@ -104,7 +127,12 @@ The most commonly used commands are:
104
127
  when nil
105
128
  puts opts
106
129
  else
107
- opts.abort "'#{command}' is not a #{opts.program_name} command. See '#{opts.program_name} --help'."
130
+ # Allow subclasses to add more commands.
131
+ if self.commands.include? command.to_sym
132
+ send command, args
133
+ else
134
+ opts.abort "'#{command}' is not a #{opts.program_name} command. See '#{opts.program_name} --help'."
135
+ end
108
136
  end
109
137
  end
110
138
 
@@ -171,17 +199,18 @@ The most commonly used commands are:
171
199
 
172
200
  # @param [#configuration] object
173
201
  def extract_configuration(object)
174
- object.default_configuration.merge(object.configuration).each do |key,value|
175
- if true === value or false === value
176
- @opts.on("--[no-]#{key}", "default #{value.inspect}") do |x|
177
- object.send "#{key}=", x
202
+ object.config_methods.each do |meth|
203
+ default = object.configuration[meth] || object.default_configuration[meth]
204
+ if true === default or false === default
205
+ @opts.on("--[no-]#{meth}", "default #{default.inspect}") do |x|
206
+ object.configure{|c| c.send "#{meth}=", x}
178
207
  end
179
- elsif String === value or Fixnum === value
180
- @opts.on("--#{key} ARG", "default #{value.inspect}") do |x|
181
- object.send "#{key}=", x
208
+ elsif String === default or Fixnum === default
209
+ @opts.on("--#{meth} ARG", "default #{default.inspect}") do |x|
210
+ object.configure{|c| c.send "#{meth}=", x}
182
211
  end
183
- elsif object != value and value.respond_to? :configuration
184
- extract_configuration value
212
+ elsif object != default and default.respond_to? :configuration
213
+ extract_configuration default
185
214
  end
186
215
  end
187
216
  end
@@ -190,11 +219,12 @@ The most commonly used commands are:
190
219
  def print_configuration(object, indent = 0)
191
220
  indentation = ' ' * indent
192
221
  puts "#{indentation}#{object.class.name}:"
193
- object.default_configuration.merge(object.configuration).each do |key,value|
194
- if true === value or false === value or String === value or Fixnum === value
195
- puts " #{indentation}#{key.to_s.ljust 25 - indent}#{value.inspect}"
196
- elsif object != value and value.respond_to? :configuration
197
- print_configuration value, indent + 2
222
+ object.config_methods.each do |meth|
223
+ default = object.configuration[meth] || object.default_configuration[meth]
224
+ if true === default or false === default or String === default or Fixnum === default
225
+ puts " #{indentation}#{meth.to_s.ljust 25 - indent}#{default.inspect}"
226
+ elsif object != default and default.respond_to? :configuration
227
+ print_configuration default, indent + 2
198
228
  end
199
229
  end
200
230
  end
@@ -1,3 +1,3 @@
1
1
  module Unbreakable
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -20,6 +20,6 @@ Gem::Specification.new do |s|
20
20
  s.require_paths = ["lib"]
21
21
 
22
22
  s.add_runtime_dependency('activesupport', '~> 3.1.0')
23
- s.add_runtime_dependency('dragonfly', '~> 0.9.5')
23
+ s.add_runtime_dependency('dragonfly', '~> 0.9.8')
24
24
  s.add_development_dependency('rspec', '~> 2.6.0')
25
25
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unbreakable
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-08 00:00:00.000000000Z
12
+ date: 2011-09-09 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
16
- requirement: &70251392277560 !ruby/object:Gem::Requirement
16
+ requirement: &70196241006160 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,21 +21,21 @@ dependencies:
21
21
  version: 3.1.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70251392277560
24
+ version_requirements: *70196241006160
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: dragonfly
27
- requirement: &70251392277060 !ruby/object:Gem::Requirement
27
+ requirement: &70196240974480 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
31
31
  - !ruby/object:Gem::Version
32
- version: 0.9.5
32
+ version: 0.9.8
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70251392277060
35
+ version_requirements: *70196240974480
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rspec
38
- requirement: &70251392276600 !ruby/object:Gem::Requirement
38
+ requirement: &70196240973840 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: 2.6.0
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70251392276600
46
+ version_requirements: *70196240973840
47
47
  description: Abstracts and bulletproofs common scraping tasks.
48
48
  email:
49
49
  - info@opennorth.ca