klepto 0.4.7 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -1
- data/README.md +2 -1
- data/lib/klepto/browser.rb +2 -0
- data/lib/klepto/config.rb +3 -3
- data/lib/klepto/structure.rb +13 -5
- data/lib/klepto/version.rb +1 -1
- data/lib/klepto.rb +9 -3
- data/spec/lib/klepto/bot_spec.rb +4 -4
- data/spec/spec_helper.rb +3 -0
- metadata +12 -12
data/.gitignore
CHANGED
data/README.md
CHANGED
data/lib/klepto/browser.rb
CHANGED
@@ -3,6 +3,7 @@ module Klepto
|
|
3
3
|
include Capybara::DSL
|
4
4
|
|
5
5
|
def initialize(*args)
|
6
|
+
Klepto.logger.debug("===== Initializing new browser. =====")
|
6
7
|
super
|
7
8
|
end
|
8
9
|
|
@@ -35,6 +36,7 @@ module Klepto
|
|
35
36
|
end
|
36
37
|
|
37
38
|
def fetch!(url)
|
39
|
+
Klepto.logger.debug("Fetching #{url}")
|
38
40
|
#Capybara.using_driver use_driver do
|
39
41
|
visit url
|
40
42
|
page
|
data/lib/klepto/config.rb
CHANGED
@@ -9,9 +9,9 @@ module Klepto
|
|
9
9
|
@abort_on_redirect = false
|
10
10
|
@urls = []
|
11
11
|
@after_handlers = {
|
12
|
-
:each => [],
|
13
|
-
:get => [],
|
14
|
-
:abort=> []
|
12
|
+
:each => [], #after each call to
|
13
|
+
:get => [], #after GET, before structure
|
14
|
+
:abort=> [] #after abort
|
15
15
|
}
|
16
16
|
@before_handlers = {:each => []}
|
17
17
|
@status_handlers = {}
|
data/lib/klepto/structure.rb
CHANGED
@@ -11,6 +11,7 @@ module Klepto
|
|
11
11
|
attr_reader :_context
|
12
12
|
|
13
13
|
def initialize(_context=nil, _parent=nil)
|
14
|
+
Klepto.logger.debug("\tnew Structure (#{_parent}) -> (#{_context})")
|
14
15
|
@_context = _context
|
15
16
|
@_parent = _parent
|
16
17
|
@_hash = {}
|
@@ -30,37 +31,41 @@ module Klepto
|
|
30
31
|
options[:limit] ||= nil
|
31
32
|
selector = args.shift
|
32
33
|
|
34
|
+
Klepto.logger.debug("\t\tDefining attribute: #{meth} -> #{selector}")
|
35
|
+
|
33
36
|
if options[:as] == :collection
|
34
|
-
|
35
37
|
@_hash[meth] = []
|
36
38
|
result = _context.all( options[:syntax], selector )
|
39
|
+
|
40
|
+
Klepto.logger.debug("\t\t\tAs: collection, Result? #{!result.nil?}")
|
41
|
+
|
37
42
|
options[:limit] ||= result.length
|
38
43
|
result[0, options[:limit]].each do |ele|
|
39
44
|
@_hash[meth].push Structure.build(ele, self, &block)
|
40
45
|
end
|
41
46
|
|
42
47
|
elsif options[:as] == :resource
|
43
|
-
|
44
48
|
result = _context.first( options[:syntax], selector )
|
49
|
+
Klepto.logger.debug("\t\t\tAs: resource, Result? #{!result.nil?}")
|
45
50
|
@_hash[meth] = Structure.build(result, self, &block)
|
46
51
|
|
47
52
|
elsif block
|
48
|
-
|
49
53
|
result = selector ?
|
50
54
|
_context.send( options[:match], options[:syntax], selector ) : _context
|
51
55
|
|
52
56
|
if options[:match] == :all
|
53
|
-
|
57
|
+
Klepto.logger.debug("\t\t\tAs: block (match all), Result? #{!result.nil?}")
|
54
58
|
@_hash[meth] = []
|
55
59
|
options[:limit] ||= result.length
|
56
60
|
result[0, options[:limit]].each do |node|
|
57
61
|
@_hash[meth] << block.call( node )
|
58
62
|
end
|
59
|
-
|
60
63
|
else
|
61
64
|
if result
|
65
|
+
Klepto.logger.debug("\t\t\tAs: block (match one)")
|
62
66
|
@_hash[meth] = block.call( result )
|
63
67
|
else
|
68
|
+
Klepto.logger.debug("\t\t\tAs: block (no match, default: #{options[:default]})")
|
64
69
|
@_hash[meth] = options[:default]
|
65
70
|
end
|
66
71
|
end
|
@@ -69,14 +74,17 @@ module Klepto
|
|
69
74
|
result = _context.send( options[:match], options[:syntax], selector )
|
70
75
|
|
71
76
|
if options[:match] == :all
|
77
|
+
Klepto.logger.debug("\t\t\tAs: simple (match all), Result? #{!result.nil?}")
|
72
78
|
@_hash[meth] = []
|
73
79
|
options[:limit] ||= result.length
|
74
80
|
result[0, options[:limit]].each do |node|
|
75
81
|
@_hash[meth] << (node[options[:attr]] || node.try(:text))
|
76
82
|
end
|
77
83
|
elsif result
|
84
|
+
Klepto.logger.debug("\t\t\tAs: block (match one)")
|
78
85
|
@_hash[meth] = (result[options[:attr]] || result.try(:text))
|
79
86
|
else
|
87
|
+
Klepto.logger.debug("\t\t\tAs: block (no match, default: #{options[:default]})")
|
80
88
|
@_hash[meth] = options[:default]
|
81
89
|
end
|
82
90
|
end
|
data/lib/klepto/version.rb
CHANGED
data/lib/klepto.rb
CHANGED
@@ -14,10 +14,16 @@ end
|
|
14
14
|
Capybara.current_driver = :poltergeist
|
15
15
|
|
16
16
|
module Klepto
|
17
|
-
|
18
|
-
|
17
|
+
def self.logger
|
18
|
+
@@logger
|
19
|
+
end
|
20
|
+
def self.logger=(logger)
|
21
|
+
@@logger = logger
|
22
|
+
end
|
19
23
|
end
|
20
|
-
|
24
|
+
Klepto.logger = Logger.new(STDOUT)
|
25
|
+
Klepto.logger.level = Logger::INFO
|
26
|
+
|
21
27
|
require 'klepto/version'
|
22
28
|
require 'klepto/config'
|
23
29
|
require 'klepto/browser'
|
data/spec/lib/klepto/bot_spec.rb
CHANGED
@@ -55,7 +55,7 @@ describe Klepto::Bot do
|
|
55
55
|
|
56
56
|
describe 'aborting after a failure' do
|
57
57
|
before(:each) do
|
58
|
-
@bot = Klepto::Bot.new("
|
58
|
+
@bot = Klepto::Bot.new("http://coryodaniel.com/nowayjose"){
|
59
59
|
name 'h1.fullname'
|
60
60
|
config.abort_on_failure true
|
61
61
|
config.after(:abort) do |page|
|
@@ -72,8 +72,8 @@ describe Klepto::Bot do
|
|
72
72
|
|
73
73
|
describe 'structuring a 4xx or 5xx response' do
|
74
74
|
before(:each) do
|
75
|
-
@bot = Klepto::Bot.new("
|
76
|
-
|
75
|
+
@bot = Klepto::Bot.new("http://coryodaniel.com/nowayjose"){
|
76
|
+
title 'h2'
|
77
77
|
config.abort_on_failure false
|
78
78
|
config.after(:abort) do |page|
|
79
79
|
StatusLog.create message: 'Aborted.'
|
@@ -83,7 +83,7 @@ describe Klepto::Bot do
|
|
83
83
|
end
|
84
84
|
|
85
85
|
it 'should perform structuring' do
|
86
|
-
@structure.first[:
|
86
|
+
@structure.first[:title].should == 'Not Found'
|
87
87
|
end
|
88
88
|
|
89
89
|
it 'should not abort after a 4xx or 5xx' do
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: klepto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-05-
|
12
|
+
date: 2013-05-30 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: poltergeist
|
16
|
-
requirement: &
|
16
|
+
requirement: &70255695656940 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - =
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.1.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70255695656940
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: capybara
|
27
|
-
requirement: &
|
27
|
+
requirement: &70255695655600 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - =
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 2.0.2
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70255695655600
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: nokogiri
|
38
|
-
requirement: &
|
38
|
+
requirement: &70255695653740 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.5.6
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70255695653740
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: activesupport
|
49
|
-
requirement: &
|
49
|
+
requirement: &70255695652860 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70255695652860
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: multi_json
|
60
|
-
requirement: &
|
60
|
+
requirement: &70255695375220 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,7 +65,7 @@ dependencies:
|
|
65
65
|
version: '1.0'
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70255695375220
|
69
69
|
description: Tearing up web pages into ActiveRecord resources
|
70
70
|
email:
|
71
71
|
- github@coryodaniel.com
|