kabutops 0.0.15 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3fccc58091b11a4171dbf3ef78bf61e8a9399936
4
- data.tar.gz: 72e2f5edf655110a9af6ed7d806da7f04119cf76
3
+ metadata.gz: 05df1a21e10c723541c9cc4514174959b794dc27
4
+ data.tar.gz: 6d60b8c1b4acf024f50b8c363b905c8d167f503c
5
5
  SHA512:
6
- metadata.gz: 5098cd922353e30708f081e9b9f2467027cead27aba5d2045045ac413e4965703f6cb9adb454528e4c3a94de960a5542549af26f43b806773193edafa8e5dbd5
7
- data.tar.gz: 7376e74da6006fd4bc4da1023e21865aa861a17306d0a99c5b47b736a2cf207a34d8f2efe7a8cd47eb3e2e05e5eea68ac010ba5d9a0bdae3b721d87fbc17fcf4
6
+ metadata.gz: 5cf88297902defb328c85d39233af7a8f11fc20935c193a4a9cc568d38c844f3657973ea58425db4d719b6faa943df7edc71e179e06425ef39911cadf5599d21
7
+ data.tar.gz: 47e08167bb5129ef665310c2d332c57ae071d9755a2c5f133b03b158d1ae1520126ef1ab8ff58c2f70564bd93ac6ccac0a613e2eae49e8ca657d63bfe6024814
data/README.md CHANGED
@@ -24,7 +24,7 @@ gem install kabutops
24
24
  Or you can put it in your Gemfile
25
25
 
26
26
  ```ruby
27
- gem 'kabutops', '~> 0.0.15'
27
+ gem 'kabutops', '~> 0.1.0'
28
28
  ```
29
29
 
30
30
  You will also need Redis database installed and running.
@@ -12,25 +12,25 @@ module Kabutops
12
12
 
13
13
  callbacks :after_save, :save_if
14
14
 
15
- def data &block
16
- @recipe = Recipe.new
15
+ def data params={}, &block
16
+ @recipe = Recipe.new(params)
17
17
  @recipe.instance_eval &block
18
18
  end
19
19
 
20
20
  def process resource, page
21
21
  raise 'data block not defined' unless @recipe
22
22
 
23
- result = @recipe.process(resource, page)
24
- result.update(updated_at: Time.now.to_i)
25
-
26
- save = (notify(:save_if, resource, page, result) || []).all?
27
-
28
- if debug
29
- logger.info("#{self.class.to_s} outputs:")
30
- logger.info(save ? result.to_hash : 'not valid for save')
31
- elsif save
32
- store(result)
33
- notify(:after_save, result)
23
+ [@recipe.process(resource, page)].flatten.each do |result|
24
+ result.update(updated_at: Time.now.to_i)
25
+ save = (notify(:save_if, resource, page, result) || []).all?
26
+
27
+ if debug
28
+ logger.info("#{self.class.to_s} outputs:")
29
+ logger.info(save ? result.to_hash : 'not valid for save')
30
+ elsif save
31
+ store(result)
32
+ notify(:after_save, result)
33
+ end
34
34
  end
35
35
  end
36
36
 
@@ -17,7 +17,7 @@ module Kabutops
17
17
 
18
18
  params :collection, :proxy, :cache, :wait,
19
19
  :skip_existing, :agent
20
- callbacks :after_crawl, :before_cache
20
+ callbacks :after_crawl, :before_cache, :store_if
21
21
 
22
22
  def adapters
23
23
  @adapters ||= []
@@ -77,6 +77,10 @@ module Kabutops
77
77
 
78
78
  return if page.nil?
79
79
 
80
+ save = (self.class.notify(:store_if, resource, page) || []).all?
81
+
82
+ return unless save
83
+
80
84
  adapters.each do |adapter|
81
85
  adapter.process(resource, page)
82
86
  end
@@ -5,7 +5,8 @@ module Kabutops
5
5
  class Recipe
6
6
  attr_reader :items
7
7
 
8
- def initialize
8
+ def initialize params={}
9
+ @params = Hashie::Mash.new(params)
9
10
  @items = Hashie::Mash.new
10
11
  @nested = false
11
12
  end
@@ -23,10 +24,20 @@ module Kabutops
23
24
  end
24
25
 
25
26
  def process resource, page
27
+ if @params[:each]
28
+ page.xpath(@params[:each]).map{ |n| process_one(resource, n) }
29
+ elsif @params[:each_css]
30
+ page.css(@params[:each_css]).map{ |n| process_one(resource, n) }
31
+ else
32
+ process_one(resource, page)
33
+ end
34
+ end
35
+
36
+ def process_one resource, node
26
37
  result = Hashie::Mash.new
27
38
 
28
39
  @items.each do |name, item|
29
- result[name] = item.process(resource, page)
40
+ result[name] = item.process(resource, node)
30
41
  end
31
42
 
32
43
  result
@@ -18,6 +18,11 @@ module Kabutops
18
18
  super(collection || [{ url: params.url, }])
19
19
  end
20
20
 
21
+ def reset!
22
+ super
23
+ redis.keys.each{ |k| redis.del(k) }
24
+ end
25
+
21
26
  def << resource
22
27
  if resource_status(resource).nil?
23
28
  resource_status(resource, 'new')
@@ -25,6 +30,12 @@ module Kabutops
25
30
  end
26
31
  end
27
32
 
33
+ def follow link
34
+ self << {
35
+ url: URI.join(params.url, URI.escape(link)).to_s
36
+ }
37
+ end
38
+
28
39
  def resource_status resource, status=nil
29
40
  url_status(resource[:url], status)
30
41
  end
@@ -73,12 +84,10 @@ module Kabutops
73
84
 
74
85
  def after_crawl resource, page
75
86
  page.css('a').each do |a|
87
+ next if a['href'].nil?
88
+
76
89
  follow = self.class.notify(:follow_if, a['href']).any?
77
- if follow
78
- self << {
79
- url: URI.join(params.url, URI.escape(a['href'])).to_s
80
- }
81
- end
90
+ self.class.follow(a['href']) if follow
82
91
  end
83
92
  end
84
93
  end
@@ -1,5 +1,5 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
 
3
3
  module Kabutops
4
- VERSION = '0.0.15'
4
+ VERSION = '0.1.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kabutops
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.15
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rene Klacan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-17 00:00:00.000000000 Z
11
+ date: 2014-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize