unbreakable 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/unbreakable.rb +3 -3
- data/lib/unbreakable/processors/transform.rb +10 -8
- data/lib/unbreakable/scraper.rb +9 -7
- data/lib/unbreakable/version.rb +1 -1
- metadata +8 -8
data/lib/unbreakable.rb
CHANGED
@@ -14,7 +14,7 @@ require 'dragonfly'
|
|
14
14
|
# require 'unbreakable'
|
15
15
|
#
|
16
16
|
# class MyScraper < Unbreakable::Scraper
|
17
|
-
# def retrieve
|
17
|
+
# def retrieve(*args)
|
18
18
|
# # download all the documents
|
19
19
|
# end
|
20
20
|
# def processable
|
@@ -23,10 +23,10 @@ require 'dragonfly'
|
|
23
23
|
# end
|
24
24
|
#
|
25
25
|
# class MyProcessor < Unbreakable::Processors::Transform
|
26
|
-
# def perform
|
26
|
+
# def perform
|
27
27
|
# # return the transformed record as a hash, array, etc.
|
28
28
|
# end
|
29
|
-
# def persist(
|
29
|
+
# def persist(arg)
|
30
30
|
# # store the hash/array/etc. in Mongo, MySQL, YAML, etc.
|
31
31
|
# end
|
32
32
|
# end
|
@@ -5,12 +5,12 @@ module Unbreakable
|
|
5
5
|
# require 'nokogiri'
|
6
6
|
# class MyProcessor < Unbreakable::Processors::Transform
|
7
7
|
# # Extracts the page title from an HTML page.
|
8
|
-
# def perform
|
8
|
+
# def perform
|
9
9
|
# Nokogiri::HTML(temp_object.data).at_css('title')
|
10
10
|
# end
|
11
11
|
#
|
12
12
|
# # Saves the page title to an external database.
|
13
|
-
# def persist(
|
13
|
+
# def persist(arg)
|
14
14
|
# MyModel.create(:title => arg)
|
15
15
|
# end
|
16
16
|
# end
|
@@ -27,14 +27,18 @@ module Unbreakable
|
|
27
27
|
include Dragonfly::Configurable
|
28
28
|
include Dragonfly::Loggable
|
29
29
|
|
30
|
+
attr_reader :temp_object, :opts
|
31
|
+
|
30
32
|
# +#transform+ must be defined on the subclass for Dragonfly to see it.
|
31
33
|
# @param [Class] subclass a subclass
|
32
34
|
def self.inherited(subclass)
|
33
35
|
subclass.class_eval do
|
34
36
|
# @param [Dragonfly::TempObject] temp_object
|
37
|
+
# @param [Hash] opts
|
35
38
|
# @return [Dragonfly::TempObject] the same object
|
36
|
-
def transform(temp_object)
|
37
|
-
|
39
|
+
def transform(temp_object, opts = {})
|
40
|
+
@temp_object, @opts = temp_object, opts
|
41
|
+
persist perform
|
38
42
|
temp_object
|
39
43
|
end
|
40
44
|
end
|
@@ -43,16 +47,14 @@ module Unbreakable
|
|
43
47
|
private
|
44
48
|
|
45
49
|
# Transforms a record.
|
46
|
-
# @param [Dragonfly::TempObject] temp_object
|
47
50
|
# @return [Hash] the transformed record
|
48
|
-
def perform
|
51
|
+
def perform
|
49
52
|
raise NotImplementedError
|
50
53
|
end
|
51
54
|
|
52
55
|
# Persists a transformed record.
|
53
|
-
# @param [Dragonfly::TempObject] temp_object
|
54
56
|
# @param arg a transformed record
|
55
|
-
def persist(
|
57
|
+
def persist(arg)
|
56
58
|
raise NotImplementedError
|
57
59
|
end
|
58
60
|
end
|
data/lib/unbreakable/scraper.rb
CHANGED
@@ -10,12 +10,12 @@ module Unbreakable
|
|
10
10
|
# require 'open-uri'
|
11
11
|
# class MyScraper < Unbreakable::Scraper
|
12
12
|
# # Stores the contents of +http://www.example.com/+ in +index.html+.
|
13
|
-
# def retrieve
|
13
|
+
# def retrieve(*args)
|
14
14
|
# store(:path => 'index.html'){ open('http://www.example.com/').read }
|
15
15
|
# end
|
16
16
|
#
|
17
17
|
# # Processes +index.html+.
|
18
|
-
# def process
|
18
|
+
# def process(*args)
|
19
19
|
# fetch('index.html').process(:transform).apply
|
20
20
|
# end
|
21
21
|
#
|
@@ -96,9 +96,9 @@ The most commonly used commands are:
|
|
96
96
|
command = args.shift
|
97
97
|
case command
|
98
98
|
when 'retrieve'
|
99
|
-
retrieve
|
99
|
+
retrieve(*args)
|
100
100
|
when 'process'
|
101
|
-
process
|
101
|
+
process(*args)
|
102
102
|
when 'config'
|
103
103
|
print_configuration @app
|
104
104
|
when nil
|
@@ -148,14 +148,16 @@ The most commonly used commands are:
|
|
148
148
|
end
|
149
149
|
|
150
150
|
# Caches remote files to the datastore for later processing.
|
151
|
-
|
151
|
+
# @param [Array] args splat of command-line arguments
|
152
|
+
def retrieve(*args)
|
152
153
|
raise NotImplementedError
|
153
154
|
end
|
154
155
|
|
155
156
|
# Processes cached files into machine-readable data.
|
156
|
-
|
157
|
+
# @param [Array] args splat of command-line arguments
|
158
|
+
def process(*args)
|
157
159
|
processable.each do |record|
|
158
|
-
fetch(record).process(:transform).apply
|
160
|
+
fetch(record).process(:transform, :args => args).apply
|
159
161
|
end
|
160
162
|
end
|
161
163
|
|
data/lib/unbreakable/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unbreakable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-09-
|
12
|
+
date: 2011-09-08 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
16
|
-
requirement: &
|
16
|
+
requirement: &70289593588560 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 3.1.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70289593588560
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: dragonfly
|
27
|
-
requirement: &
|
27
|
+
requirement: &70289593588040 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.9.5
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70289593588040
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rspec
|
38
|
-
requirement: &
|
38
|
+
requirement: &70289593587560 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: 2.6.0
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70289593587560
|
47
47
|
description: Abstracts and bulletproofs common scraping tasks.
|
48
48
|
email:
|
49
49
|
- info@opennorth.ca
|