kabutops 0.0.15 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3fccc58091b11a4171dbf3ef78bf61e8a9399936
4
- data.tar.gz: 72e2f5edf655110a9af6ed7d806da7f04119cf76
3
+ metadata.gz: 05df1a21e10c723541c9cc4514174959b794dc27
4
+ data.tar.gz: 6d60b8c1b4acf024f50b8c363b905c8d167f503c
5
5
  SHA512:
6
- metadata.gz: 5098cd922353e30708f081e9b9f2467027cead27aba5d2045045ac413e4965703f6cb9adb454528e4c3a94de960a5542549af26f43b806773193edafa8e5dbd5
7
- data.tar.gz: 7376e74da6006fd4bc4da1023e21865aa861a17306d0a99c5b47b736a2cf207a34d8f2efe7a8cd47eb3e2e05e5eea68ac010ba5d9a0bdae3b721d87fbc17fcf4
6
+ metadata.gz: 5cf88297902defb328c85d39233af7a8f11fc20935c193a4a9cc568d38c844f3657973ea58425db4d719b6faa943df7edc71e179e06425ef39911cadf5599d21
7
+ data.tar.gz: 47e08167bb5129ef665310c2d332c57ae071d9755a2c5f133b03b158d1ae1520126ef1ab8ff58c2f70564bd93ac6ccac0a613e2eae49e8ca657d63bfe6024814
data/README.md CHANGED
@@ -24,7 +24,7 @@ gem install kabutops
24
24
  Or you can put it in your Gemfile
25
25
 
26
26
  ```ruby
27
- gem 'kabutops', '~> 0.0.15'
27
+ gem 'kabutops', '~> 0.1.0'
28
28
  ```
29
29
 
30
30
  You will also need Redis database installed and running.
@@ -12,25 +12,25 @@ module Kabutops
12
12
 
13
13
  callbacks :after_save, :save_if
14
14
 
15
- def data &block
16
- @recipe = Recipe.new
15
+ def data params={}, &block
16
+ @recipe = Recipe.new(params)
17
17
  @recipe.instance_eval &block
18
18
  end
19
19
 
20
20
  def process resource, page
21
21
  raise 'data block not defined' unless @recipe
22
22
 
23
- result = @recipe.process(resource, page)
24
- result.update(updated_at: Time.now.to_i)
25
-
26
- save = (notify(:save_if, resource, page, result) || []).all?
27
-
28
- if debug
29
- logger.info("#{self.class.to_s} outputs:")
30
- logger.info(save ? result.to_hash : 'not valid for save')
31
- elsif save
32
- store(result)
33
- notify(:after_save, result)
23
+ [@recipe.process(resource, page)].flatten.each do |result|
24
+ result.update(updated_at: Time.now.to_i)
25
+ save = (notify(:save_if, resource, page, result) || []).all?
26
+
27
+ if debug
28
+ logger.info("#{self.class.to_s} outputs:")
29
+ logger.info(save ? result.to_hash : 'not valid for save')
30
+ elsif save
31
+ store(result)
32
+ notify(:after_save, result)
33
+ end
34
34
  end
35
35
  end
36
36
 
@@ -17,7 +17,7 @@ module Kabutops
17
17
 
18
18
  params :collection, :proxy, :cache, :wait,
19
19
  :skip_existing, :agent
20
- callbacks :after_crawl, :before_cache
20
+ callbacks :after_crawl, :before_cache, :store_if
21
21
 
22
22
  def adapters
23
23
  @adapters ||= []
@@ -77,6 +77,10 @@ module Kabutops
77
77
 
78
78
  return if page.nil?
79
79
 
80
+ save = (self.class.notify(:store_if, resource, page) || []).all?
81
+
82
+ return unless save
83
+
80
84
  adapters.each do |adapter|
81
85
  adapter.process(resource, page)
82
86
  end
@@ -5,7 +5,8 @@ module Kabutops
5
5
  class Recipe
6
6
  attr_reader :items
7
7
 
8
- def initialize
8
+ def initialize params={}
9
+ @params = Hashie::Mash.new(params)
9
10
  @items = Hashie::Mash.new
10
11
  @nested = false
11
12
  end
@@ -23,10 +24,20 @@ module Kabutops
23
24
  end
24
25
 
25
26
  def process resource, page
27
+ if @params[:each]
28
+ page.xpath(@params[:each]).map{ |n| process_one(resource, n) }
29
+ elsif @params[:each_css]
30
+ page.css(@params[:each_css]).map{ |n| process_one(resource, n) }
31
+ else
32
+ process_one(resource, page)
33
+ end
34
+ end
35
+
36
+ def process_one resource, node
26
37
  result = Hashie::Mash.new
27
38
 
28
39
  @items.each do |name, item|
29
- result[name] = item.process(resource, page)
40
+ result[name] = item.process(resource, node)
30
41
  end
31
42
 
32
43
  result
@@ -18,6 +18,11 @@ module Kabutops
18
18
  super(collection || [{ url: params.url, }])
19
19
  end
20
20
 
21
+ def reset!
22
+ super
23
+ redis.keys.each{ |k| redis.del(k) }
24
+ end
25
+
21
26
  def << resource
22
27
  if resource_status(resource).nil?
23
28
  resource_status(resource, 'new')
@@ -25,6 +30,12 @@ module Kabutops
25
30
  end
26
31
  end
27
32
 
33
+ def follow link
34
+ self << {
35
+ url: URI.join(params.url, URI.escape(link)).to_s
36
+ }
37
+ end
38
+
28
39
  def resource_status resource, status=nil
29
40
  url_status(resource[:url], status)
30
41
  end
@@ -73,12 +84,10 @@ module Kabutops
73
84
 
74
85
  def after_crawl resource, page
75
86
  page.css('a').each do |a|
87
+ next if a['href'].nil?
88
+
76
89
  follow = self.class.notify(:follow_if, a['href']).any?
77
- if follow
78
- self << {
79
- url: URI.join(params.url, URI.escape(a['href'])).to_s
80
- }
81
- end
90
+ self.class.follow(a['href']) if follow
82
91
  end
83
92
  end
84
93
  end
@@ -1,5 +1,5 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
 
3
3
  module Kabutops
4
- VERSION = '0.0.15'
4
+ VERSION = '0.1.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kabutops
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.15
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rene Klacan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-17 00:00:00.000000000 Z
11
+ date: 2014-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize