kabutops 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/lib/kabutops/adapters/database_adapter.rb +2 -0
- data/lib/kabutops/crawler.rb +23 -11
- data/lib/kabutops/crawler_extensions/pstore_storage.rb +2 -2
- data/lib/kabutops/extensions/callback_support.rb +4 -2
- data/lib/kabutops/extensions/parameterable.rb +4 -3
- data/lib/kabutops/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 321de7144e42cfc9cba6d6e6675eac975a2981a5
|
4
|
+
data.tar.gz: 39e39c701812ff208f6131e5caa31438c3268ed5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cbb53e66f5b2be03d080728ee7be1c4c9c40210796f7ab851d87e94af7519ef54c13e46c60e7a4770d5ec89a0c3c0043d60995ccc7508d43f07fd1918957161d
|
7
|
+
data.tar.gz: 0f3ae02d465fb1a056018bc7db0d1e47a01df19d4c6ce5ec54855fa4774edf497c1c359e5b8eb3dfc2d592e67c3b983ca06f0a21b740c8d97c6b2b38e9f8bbc2
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Kabutops
|
1
|
+
Kabutops [](https://codeclimate.com/github/reneklacan/kabutops) [](https://codeclimate.com/github/reneklacan/kabutops)
|
2
2
|
========
|
3
3
|
|
4
4
|
Installation
|
@@ -13,7 +13,7 @@ gem install kabutops
|
|
13
13
|
Or you can put it in your Gemfile
|
14
14
|
|
15
15
|
```ruby
|
16
|
-
gem 'kabutops', '~> 0.0.
|
16
|
+
gem 'kabutops', '~> 0.0.6'
|
17
17
|
```
|
18
18
|
|
19
19
|
Basic example
|
@@ -41,7 +41,7 @@ class GemListCrawler < Kabutops::Crawler
|
|
41
41
|
after_crawl do |resource, page|
|
42
42
|
links = page.xpath("//a[contains(@href, '/gems?letter=#{resource[:letter]}')]")
|
43
43
|
links.each do |link|
|
44
|
-
|
44
|
+
GemListCrawler << {
|
45
45
|
letter: resource[:letter],
|
46
46
|
url: "https://rubygems.org#{link['href']}",
|
47
47
|
}
|
data/lib/kabutops/crawler.rb
CHANGED
@@ -20,7 +20,7 @@ module Kabutops
|
|
20
20
|
end
|
21
21
|
|
22
22
|
def crawl! collection=nil
|
23
|
-
@map ||=
|
23
|
+
@map ||= Hashie::Mash.new
|
24
24
|
|
25
25
|
if storage[:status].nil?
|
26
26
|
(collection || params[:collection] || []).each do |resource|
|
@@ -37,6 +37,7 @@ module Kabutops
|
|
37
37
|
end
|
38
38
|
|
39
39
|
key = resource[:id] || resource[:url]
|
40
|
+
@map ||= Hashie::Mash.new
|
40
41
|
|
41
42
|
if key.nil?
|
42
43
|
raise "url must be specified for resource"
|
@@ -51,26 +52,37 @@ module Kabutops
|
|
51
52
|
|
52
53
|
def perform resource
|
53
54
|
resource = Hashie::Mash.new(resource)
|
55
|
+
page = crawl(resource)
|
54
56
|
|
57
|
+
self.class.adapters.each do |adapter|
|
58
|
+
adapter.process(resource, page)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def << resource
|
63
|
+
self.class << resource
|
64
|
+
end
|
65
|
+
|
66
|
+
protected
|
67
|
+
|
68
|
+
def crawl resource
|
55
69
|
content = Cachy.cache_if(self.class.params.cache, resource[:url]) do
|
56
|
-
|
57
|
-
agent.set_proxy(*self.class.params[:proxy]) if self.class.params[:proxy]
|
70
|
+
sleep self.class.params[:wait] || 0 # wait only if value is not from cache
|
58
71
|
agent.get(resource[:url]).body
|
59
72
|
end
|
60
73
|
|
61
74
|
page = Nokogiri::HTML(content)
|
62
|
-
|
63
75
|
self.class.notify(:after_crawl, resource, page)
|
76
|
+
page
|
77
|
+
end
|
64
78
|
|
65
|
-
|
66
|
-
|
79
|
+
def agent
|
80
|
+
unless @agent
|
81
|
+
@agent = Mechanize.new
|
82
|
+
@agent.set_proxy(*self.class.params[:proxy]) if self.class.params[:proxy]
|
67
83
|
end
|
68
84
|
|
69
|
-
|
70
|
-
end
|
71
|
-
|
72
|
-
def << resource
|
73
|
-
self.class << resource
|
85
|
+
@agent
|
74
86
|
end
|
75
87
|
end
|
76
88
|
|
@@ -6,7 +6,7 @@ module Kabutops
|
|
6
6
|
|
7
7
|
module PStoreStorage
|
8
8
|
class Storage
|
9
|
-
def initialize path
|
9
|
+
def initialize path='.kabutopus.config.pstore'
|
10
10
|
@storage ||= PStore.new(path)
|
11
11
|
end
|
12
12
|
|
@@ -28,7 +28,7 @@ module Kabutops
|
|
28
28
|
module ClassMethods
|
29
29
|
|
30
30
|
def storage
|
31
|
-
@storage ||= Storage.new
|
31
|
+
@storage ||= Storage.new
|
32
32
|
end
|
33
33
|
|
34
34
|
end
|
@@ -14,18 +14,20 @@ module Kabutops
|
|
14
14
|
end
|
15
15
|
|
16
16
|
class Manager
|
17
|
+
attr_reader :map, :allowed
|
18
|
+
|
17
19
|
def initialize allowed=nil
|
18
20
|
@allowed = allowed || []
|
21
|
+
@map ||= Hashie::Mash.new
|
19
22
|
end
|
20
23
|
|
21
24
|
def method_missing name, *args, &block
|
22
|
-
return unless block_given?
|
25
|
+
return super unless block_given?
|
23
26
|
|
24
27
|
unless @allowed.include?(name)
|
25
28
|
raise "Invalid callback name: #{name}"
|
26
29
|
end
|
27
30
|
|
28
|
-
@map ||= Hashie::Mash.new
|
29
31
|
@map[name] ||= []
|
30
32
|
@map[name] << block
|
31
33
|
end
|
@@ -8,9 +8,6 @@ module Kabutops
|
|
8
8
|
|
9
9
|
def self.included base
|
10
10
|
base.extend(ClassMethods)
|
11
|
-
base.class_eval do
|
12
|
-
attr_reader :params
|
13
|
-
end
|
14
11
|
end
|
15
12
|
|
16
13
|
module ClassMethods
|
@@ -26,6 +23,10 @@ module Kabutops
|
|
26
23
|
end
|
27
24
|
end
|
28
25
|
end
|
26
|
+
|
27
|
+
define_method :params do
|
28
|
+
@params ||= Hashie::Mash.new
|
29
|
+
end
|
29
30
|
end
|
30
31
|
|
31
32
|
end
|
data/lib/kabutops/version.rb
CHANGED