unbreakable 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/unbreakable.rb +3 -3
- data/lib/unbreakable/processors/transform.rb +10 -8
- data/lib/unbreakable/scraper.rb +9 -7
- data/lib/unbreakable/version.rb +1 -1
- metadata +8 -8
data/lib/unbreakable.rb
CHANGED
@@ -14,7 +14,7 @@ require 'dragonfly'
|
|
14
14
|
# require 'unbreakable'
|
15
15
|
#
|
16
16
|
# class MyScraper < Unbreakable::Scraper
|
17
|
-
# def retrieve
|
17
|
+
# def retrieve(*args)
|
18
18
|
# # download all the documents
|
19
19
|
# end
|
20
20
|
# def processable
|
@@ -23,10 +23,10 @@ require 'dragonfly'
|
|
23
23
|
# end
|
24
24
|
#
|
25
25
|
# class MyProcessor < Unbreakable::Processors::Transform
|
26
|
-
# def perform
|
26
|
+
# def perform
|
27
27
|
# # return the transformed record as a hash, array, etc.
|
28
28
|
# end
|
29
|
-
# def persist(
|
29
|
+
# def persist(arg)
|
30
30
|
# # store the hash/array/etc. in Mongo, MySQL, YAML, etc.
|
31
31
|
# end
|
32
32
|
# end
|
@@ -5,12 +5,12 @@ module Unbreakable
|
|
5
5
|
# require 'nokogiri'
|
6
6
|
# class MyProcessor < Unbreakable::Processors::Transform
|
7
7
|
# # Extracts the page title from an HTML page.
|
8
|
-
# def perform
|
8
|
+
# def perform
|
9
9
|
# Nokogiri::HTML(temp_object.data).at_css('title')
|
10
10
|
# end
|
11
11
|
#
|
12
12
|
# # Saves the page title to an external database.
|
13
|
-
# def persist(
|
13
|
+
# def persist(arg)
|
14
14
|
# MyModel.create(:title => arg)
|
15
15
|
# end
|
16
16
|
# end
|
@@ -27,14 +27,18 @@ module Unbreakable
|
|
27
27
|
include Dragonfly::Configurable
|
28
28
|
include Dragonfly::Loggable
|
29
29
|
|
30
|
+
attr_reader :temp_object, :opts
|
31
|
+
|
30
32
|
# +#transform+ must be defined on the subclass for Dragonfly to see it.
|
31
33
|
# @param [Class] subclass a subclass
|
32
34
|
def self.inherited(subclass)
|
33
35
|
subclass.class_eval do
|
34
36
|
# @param [Dragonfly::TempObject] temp_object
|
37
|
+
# @param [Hash] opts
|
35
38
|
# @return [Dragonfly::TempObject] the same object
|
36
|
-
def transform(temp_object)
|
37
|
-
|
39
|
+
def transform(temp_object, opts = {})
|
40
|
+
@temp_object, @opts = temp_object, opts
|
41
|
+
persist perform
|
38
42
|
temp_object
|
39
43
|
end
|
40
44
|
end
|
@@ -43,16 +47,14 @@ module Unbreakable
|
|
43
47
|
private
|
44
48
|
|
45
49
|
# Transforms a record.
|
46
|
-
# @param [Dragonfly::TempObject] temp_object
|
47
50
|
# @return [Hash] the transformed record
|
48
|
-
def perform
|
51
|
+
def perform
|
49
52
|
raise NotImplementedError
|
50
53
|
end
|
51
54
|
|
52
55
|
# Persists a transformed record.
|
53
|
-
# @param [Dragonfly::TempObject] temp_object
|
54
56
|
# @param arg a transformed record
|
55
|
-
def persist(
|
57
|
+
def persist(arg)
|
56
58
|
raise NotImplementedError
|
57
59
|
end
|
58
60
|
end
|
data/lib/unbreakable/scraper.rb
CHANGED
@@ -10,12 +10,12 @@ module Unbreakable
|
|
10
10
|
# require 'open-uri'
|
11
11
|
# class MyScraper < Unbreakable::Scraper
|
12
12
|
# # Stores the contents of +http://www.example.com/+ in +index.html+.
|
13
|
-
# def retrieve
|
13
|
+
# def retrieve(*args)
|
14
14
|
# store(:path => 'index.html'){ open('http://www.example.com/').read }
|
15
15
|
# end
|
16
16
|
#
|
17
17
|
# # Processes +index.html+.
|
18
|
-
# def process
|
18
|
+
# def process(*args)
|
19
19
|
# fetch('index.html').process(:transform).apply
|
20
20
|
# end
|
21
21
|
#
|
@@ -96,9 +96,9 @@ The most commonly used commands are:
|
|
96
96
|
command = args.shift
|
97
97
|
case command
|
98
98
|
when 'retrieve'
|
99
|
-
retrieve
|
99
|
+
retrieve(*args)
|
100
100
|
when 'process'
|
101
|
-
process
|
101
|
+
process(*args)
|
102
102
|
when 'config'
|
103
103
|
print_configuration @app
|
104
104
|
when nil
|
@@ -148,14 +148,16 @@ The most commonly used commands are:
|
|
148
148
|
end
|
149
149
|
|
150
150
|
# Caches remote files to the datastore for later processing.
|
151
|
-
|
151
|
+
# @param [Array] args splat of command-line arguments
|
152
|
+
def retrieve(*args)
|
152
153
|
raise NotImplementedError
|
153
154
|
end
|
154
155
|
|
155
156
|
# Processes cached files into machine-readable data.
|
156
|
-
|
157
|
+
# @param [Array] args splat of command-line arguments
|
158
|
+
def process(*args)
|
157
159
|
processable.each do |record|
|
158
|
-
fetch(record).process(:transform).apply
|
160
|
+
fetch(record).process(:transform, :args => args).apply
|
159
161
|
end
|
160
162
|
end
|
161
163
|
|
data/lib/unbreakable/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unbreakable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-09-
|
12
|
+
date: 2011-09-08 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
16
|
-
requirement: &
|
16
|
+
requirement: &70289593588560 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 3.1.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70289593588560
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: dragonfly
|
27
|
-
requirement: &
|
27
|
+
requirement: &70289593588040 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.9.5
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70289593588040
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rspec
|
38
|
-
requirement: &
|
38
|
+
requirement: &70289593587560 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: 2.6.0
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70289593587560
|
47
47
|
description: Abstracts and bulletproofs common scraping tasks.
|
48
48
|
email:
|
49
49
|
- info@opennorth.ca
|