kabutops 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/lib/kabutops/adapters/database_adapter.rb +2 -0
- data/lib/kabutops/crawler.rb +23 -11
- data/lib/kabutops/crawler_extensions/pstore_storage.rb +2 -2
- data/lib/kabutops/extensions/callback_support.rb +4 -2
- data/lib/kabutops/extensions/parameterable.rb +4 -3
- data/lib/kabutops/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 321de7144e42cfc9cba6d6e6675eac975a2981a5
|
4
|
+
data.tar.gz: 39e39c701812ff208f6131e5caa31438c3268ed5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cbb53e66f5b2be03d080728ee7be1c4c9c40210796f7ab851d87e94af7519ef54c13e46c60e7a4770d5ec89a0c3c0043d60995ccc7508d43f07fd1918957161d
|
7
|
+
data.tar.gz: 0f3ae02d465fb1a056018bc7db0d1e47a01df19d4c6ce5ec54855fa4774edf497c1c359e5b8eb3dfc2d592e67c3b983ca06f0a21b740c8d97c6b2b38e9f8bbc2
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Kabutops
|
1
|
+
Kabutops [![Code Climate](https://codeclimate.com/github/reneklacan/kabutops.png)](https://codeclimate.com/github/reneklacan/kabutops) [![Coverage](https://codeclimate.com/github/reneklacan/kabutops/coverage.png)](https://codeclimate.com/github/reneklacan/kabutops)
|
2
2
|
========
|
3
3
|
|
4
4
|
Installation
|
@@ -13,7 +13,7 @@ gem install kabutops
|
|
13
13
|
Or you can put it in your Gemfile
|
14
14
|
|
15
15
|
```ruby
|
16
|
-
gem 'kabutops', '~> 0.0.
|
16
|
+
gem 'kabutops', '~> 0.0.6'
|
17
17
|
```
|
18
18
|
|
19
19
|
Basic example
|
@@ -41,7 +41,7 @@ class GemListCrawler < Kabutops::Crawler
|
|
41
41
|
after_crawl do |resource, page|
|
42
42
|
links = page.xpath("//a[contains(@href, '/gems?letter=#{resource[:letter]}')]")
|
43
43
|
links.each do |link|
|
44
|
-
|
44
|
+
GemListCrawler << {
|
45
45
|
letter: resource[:letter],
|
46
46
|
url: "https://rubygems.org#{link['href']}",
|
47
47
|
}
|
data/lib/kabutops/crawler.rb
CHANGED
@@ -20,7 +20,7 @@ module Kabutops
|
|
20
20
|
end
|
21
21
|
|
22
22
|
def crawl! collection=nil
|
23
|
-
@map ||=
|
23
|
+
@map ||= Hashie::Mash.new
|
24
24
|
|
25
25
|
if storage[:status].nil?
|
26
26
|
(collection || params[:collection] || []).each do |resource|
|
@@ -37,6 +37,7 @@ module Kabutops
|
|
37
37
|
end
|
38
38
|
|
39
39
|
key = resource[:id] || resource[:url]
|
40
|
+
@map ||= Hashie::Mash.new
|
40
41
|
|
41
42
|
if key.nil?
|
42
43
|
raise "url must be specified for resource"
|
@@ -51,26 +52,37 @@ module Kabutops
|
|
51
52
|
|
52
53
|
def perform resource
|
53
54
|
resource = Hashie::Mash.new(resource)
|
55
|
+
page = crawl(resource)
|
54
56
|
|
57
|
+
self.class.adapters.each do |adapter|
|
58
|
+
adapter.process(resource, page)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def << resource
|
63
|
+
self.class << resource
|
64
|
+
end
|
65
|
+
|
66
|
+
protected
|
67
|
+
|
68
|
+
def crawl resource
|
55
69
|
content = Cachy.cache_if(self.class.params.cache, resource[:url]) do
|
56
|
-
|
57
|
-
agent.set_proxy(*self.class.params[:proxy]) if self.class.params[:proxy]
|
70
|
+
sleep self.class.params[:wait] || 0 # wait only if value is not from cache
|
58
71
|
agent.get(resource[:url]).body
|
59
72
|
end
|
60
73
|
|
61
74
|
page = Nokogiri::HTML(content)
|
62
|
-
|
63
75
|
self.class.notify(:after_crawl, resource, page)
|
76
|
+
page
|
77
|
+
end
|
64
78
|
|
65
|
-
|
66
|
-
|
79
|
+
def agent
|
80
|
+
unless @agent
|
81
|
+
@agent = Mechanize.new
|
82
|
+
@agent.set_proxy(*self.class.params[:proxy]) if self.class.params[:proxy]
|
67
83
|
end
|
68
84
|
|
69
|
-
|
70
|
-
end
|
71
|
-
|
72
|
-
def << resource
|
73
|
-
self.class << resource
|
85
|
+
@agent
|
74
86
|
end
|
75
87
|
end
|
76
88
|
|
@@ -6,7 +6,7 @@ module Kabutops
|
|
6
6
|
|
7
7
|
module PStoreStorage
|
8
8
|
class Storage
|
9
|
-
def initialize path
|
9
|
+
def initialize path='.kabutopus.config.pstore'
|
10
10
|
@storage ||= PStore.new(path)
|
11
11
|
end
|
12
12
|
|
@@ -28,7 +28,7 @@ module Kabutops
|
|
28
28
|
module ClassMethods
|
29
29
|
|
30
30
|
def storage
|
31
|
-
@storage ||= Storage.new
|
31
|
+
@storage ||= Storage.new
|
32
32
|
end
|
33
33
|
|
34
34
|
end
|
@@ -14,18 +14,20 @@ module Kabutops
|
|
14
14
|
end
|
15
15
|
|
16
16
|
class Manager
|
17
|
+
attr_reader :map, :allowed
|
18
|
+
|
17
19
|
def initialize allowed=nil
|
18
20
|
@allowed = allowed || []
|
21
|
+
@map ||= Hashie::Mash.new
|
19
22
|
end
|
20
23
|
|
21
24
|
def method_missing name, *args, &block
|
22
|
-
return unless block_given?
|
25
|
+
return super unless block_given?
|
23
26
|
|
24
27
|
unless @allowed.include?(name)
|
25
28
|
raise "Invalid callback name: #{name}"
|
26
29
|
end
|
27
30
|
|
28
|
-
@map ||= Hashie::Mash.new
|
29
31
|
@map[name] ||= []
|
30
32
|
@map[name] << block
|
31
33
|
end
|
@@ -8,9 +8,6 @@ module Kabutops
|
|
8
8
|
|
9
9
|
def self.included base
|
10
10
|
base.extend(ClassMethods)
|
11
|
-
base.class_eval do
|
12
|
-
attr_reader :params
|
13
|
-
end
|
14
11
|
end
|
15
12
|
|
16
13
|
module ClassMethods
|
@@ -26,6 +23,10 @@ module Kabutops
|
|
26
23
|
end
|
27
24
|
end
|
28
25
|
end
|
26
|
+
|
27
|
+
define_method :params do
|
28
|
+
@params ||= Hashie::Mash.new
|
29
|
+
end
|
29
30
|
end
|
30
31
|
|
31
32
|
end
|
data/lib/kabutops/version.rb
CHANGED