kabutops 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/kabutops.rb +2 -0
- data/lib/kabutops/adapters/base.rb +3 -1
- data/lib/kabutops/adapters/database_adapter.rb +4 -0
- data/lib/kabutops/adapters/elastic_search.rb +18 -4
- data/lib/kabutops/crawler.rb +3 -0
- data/lib/kabutops/crawler_extensions/debugging.rb +2 -0
- data/lib/kabutops/crawler_extensions/elastic_search.rb +2 -0
- data/lib/kabutops/crawler_extensions/pstore_storage.rb +2 -0
- data/lib/kabutops/extensions/callback_support.rb +30 -2
- data/lib/kabutops/extensions/parameterable.rb +2 -2
- data/lib/kabutops/recipe.rb +7 -18
- data/lib/kabutops/recipe_item.rb +21 -3
- data/lib/kabutops/version.rb +3 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 30185d1be8fe797322576ca2ded85d792f26d6ce
|
4
|
+
data.tar.gz: fba6fe7cf866ad20f8c632f9fc5761c3e5920c6a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ed6ab102fb990e4e83d6efb7eedbe3a84136979452395f448827b529b772dd410ec3eed39ba4f51cd844a2b780ac6e1a7e84d94479fa30993048b20ba6cc1c0f
|
7
|
+
data.tar.gz: 4ec9f7872df1e79a8f4780bf50233d1ce761ee2b8702efb22058bd52900c551147dae3fe174d05fc75b655e44a3e340c8f5cb369758dcfc11ba1f65e34db1a0b
|
data/README.md
CHANGED
data/lib/kabutops.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
|
1
3
|
module Kabutops
|
2
4
|
|
3
5
|
module Adapters
|
@@ -5,6 +7,8 @@ module Kabutops
|
|
5
7
|
class DatabaseAdapter < Base
|
6
8
|
include Extensions::CallbackSupport
|
7
9
|
|
10
|
+
callbacks :after_save
|
11
|
+
|
8
12
|
def data &block
|
9
13
|
@recipe = Recipe.new
|
10
14
|
@recipe.instance_eval &block
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
|
1
3
|
module Kabutops
|
2
4
|
|
3
5
|
module Adapters
|
@@ -8,10 +10,9 @@ module Kabutops
|
|
8
10
|
params :host, :port, :index, :type
|
9
11
|
|
10
12
|
def store result
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
type: @params[:type] || 'default',
|
13
|
+
client.index(
|
14
|
+
index: params[:index] || 'default',
|
15
|
+
type: params[:type] || 'default',
|
15
16
|
id: result[:id],
|
16
17
|
body: result.to_hash,
|
17
18
|
)
|
@@ -20,6 +21,19 @@ module Kabutops
|
|
20
21
|
def nested?
|
21
22
|
true
|
22
23
|
end
|
24
|
+
|
25
|
+
protected
|
26
|
+
|
27
|
+
def client
|
28
|
+
@@client ||= Elasticsearch::Client.new(
|
29
|
+
hosts: [
|
30
|
+
{
|
31
|
+
host: params[:host] || 'localhost',
|
32
|
+
port: params[:port] || '9200',
|
33
|
+
},
|
34
|
+
],
|
35
|
+
)
|
36
|
+
end
|
23
37
|
end
|
24
38
|
|
25
39
|
end
|
data/lib/kabutops/crawler.rb
CHANGED
@@ -1,13 +1,30 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
|
1
3
|
module Kabutops
|
2
4
|
|
3
5
|
module Extensions
|
4
6
|
|
5
7
|
module CallbackSupport
|
6
8
|
|
9
|
+
def self.included base
|
10
|
+
base.extend(ClassMethods)
|
11
|
+
base.class_eval do
|
12
|
+
attr_reader :allowed_callbacks
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
7
16
|
class Manager
|
17
|
+
def initialize allowed=nil
|
18
|
+
@allowed = allowed || []
|
19
|
+
end
|
20
|
+
|
8
21
|
def method_missing name, *args, &block
|
9
22
|
return unless block_given?
|
10
23
|
|
24
|
+
unless @allowed.include?(name)
|
25
|
+
raise "Invalid callback name: #{name}"
|
26
|
+
end
|
27
|
+
|
11
28
|
@map ||= Hashie::Mash.new
|
12
29
|
@map[name] ||= []
|
13
30
|
@map[name] << block
|
@@ -23,14 +40,25 @@ module Kabutops
|
|
23
40
|
end
|
24
41
|
|
25
42
|
def callbacks &block
|
26
|
-
@manager ||= Manager.new
|
43
|
+
@manager ||= Manager.new(allowed_callbacks)
|
27
44
|
@manager.instance_eval &block
|
28
45
|
end
|
29
46
|
|
30
47
|
def notify name, *args
|
31
|
-
@manager ||= Manager.new
|
48
|
+
@manager ||= Manager.new(allowed_callbacks)
|
32
49
|
@manager.notify(name, *args)
|
33
50
|
end
|
51
|
+
|
52
|
+
module ClassMethods
|
53
|
+
|
54
|
+
def callbacks *args
|
55
|
+
define_method :allowed_callbacks do
|
56
|
+
args
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
34
62
|
end
|
35
63
|
|
36
64
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
|
1
3
|
module Kabutops
|
2
4
|
|
3
5
|
module Extensions
|
@@ -14,8 +16,6 @@ module Kabutops
|
|
14
16
|
module ClassMethods
|
15
17
|
|
16
18
|
def params *list
|
17
|
-
return @params if list.empty?
|
18
|
-
|
19
19
|
list.each do |name|
|
20
20
|
define_method name do |*args|
|
21
21
|
@params ||= Hashie::Mash.new
|
data/lib/kabutops/recipe.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
|
1
3
|
module Kabutops
|
2
4
|
|
3
5
|
class Recipe
|
6
|
+
attr_reader :items
|
7
|
+
|
4
8
|
def initialize
|
5
9
|
@items = Hashie::Mash.new
|
6
10
|
@nested = false
|
@@ -10,11 +14,11 @@ module Kabutops
|
|
10
14
|
if block_given?
|
11
15
|
recipe = Recipe.new
|
12
16
|
recipe.instance_eval &block
|
13
|
-
@items[name] = RecipeItem.new(
|
17
|
+
@items[name] = RecipeItem.new(:recipe, recipe)
|
14
18
|
@nested = true
|
15
19
|
else
|
16
20
|
type, value = args[0..1]
|
17
|
-
@items[name] = RecipeItem.new(
|
21
|
+
@items[name] = RecipeItem.new(type, value)
|
18
22
|
end
|
19
23
|
end
|
20
24
|
|
@@ -22,22 +26,7 @@ module Kabutops
|
|
22
26
|
result = Hashie::Mash.new
|
23
27
|
|
24
28
|
@items.each do |name, item|
|
25
|
-
result[name] =
|
26
|
-
when :var
|
27
|
-
resource[item.value]
|
28
|
-
when :recipe
|
29
|
-
item.value.process(resource, page)
|
30
|
-
when :css
|
31
|
-
page.css(item.value).text
|
32
|
-
when :xpath
|
33
|
-
page.xpath(item.value).text
|
34
|
-
when :lambda
|
35
|
-
item.value.call(resource, page)
|
36
|
-
when :proc
|
37
|
-
page.instance_eval &item.value
|
38
|
-
else
|
39
|
-
raise "unknown recipe item type '#{item.type}'"
|
40
|
-
end
|
29
|
+
result[name] = item.process(resource, page)
|
41
30
|
end
|
42
31
|
|
43
32
|
result
|
data/lib/kabutops/recipe_item.rb
CHANGED
@@ -1,13 +1,31 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
|
1
3
|
module Kabutops
|
2
4
|
|
3
5
|
class RecipeItem
|
4
|
-
attr_reader :
|
6
|
+
attr_reader :type, :value
|
5
7
|
|
6
|
-
def initialize
|
7
|
-
@name = name
|
8
|
+
def initialize type, value
|
8
9
|
@type = type
|
9
10
|
@value = value
|
10
11
|
end
|
12
|
+
|
13
|
+
def process resource, page
|
14
|
+
case @type
|
15
|
+
when :var
|
16
|
+
resource[@value]
|
17
|
+
when :recipe
|
18
|
+
@value.process(resource, page)
|
19
|
+
when :css
|
20
|
+
page.css(@value).text
|
21
|
+
when :xpath
|
22
|
+
page.xpath(@value).text
|
23
|
+
when :lambda, :proc
|
24
|
+
@value.call(resource, page)
|
25
|
+
else
|
26
|
+
raise "unknown recipe item type '#{item.type}'"
|
27
|
+
end
|
28
|
+
end
|
11
29
|
end
|
12
30
|
|
13
31
|
end
|
data/lib/kabutops/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kabutops
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rene Klacan
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|