kabutops 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +191 -0
- data/README.md +87 -0
- data/lib/kabutops.rb +18 -0
- data/lib/kabutops/adapters/callback.rb +15 -0
- data/lib/kabutops/adapters/database_adapter.rb +23 -0
- data/lib/kabutops/adapters/elastic_search.rb +23 -0
- data/lib/kabutops/adapters/mysql.rb +13 -0
- data/lib/kabutops/crawler.rb +70 -0
- data/lib/kabutops/crawler_extensions/callback.rb +24 -0
- data/lib/kabutops/crawler_extensions/elastic_search.rb +24 -0
- data/lib/kabutops/crawler_extensions/mysql.rb +0 -0
- data/lib/kabutops/crawler_extensions/pstore_storage.rb +27 -0
- data/lib/kabutops/recipe.rb +49 -0
- data/lib/kabutops/recipe_item.rb +11 -0
- data/lib/kabutops/version.rb +3 -0
- metadata +128 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ddd72a59b7d2055cb55e305f2f365bc2fcbf8b23
|
4
|
+
data.tar.gz: b065d94ff1311e7051e9ba5a88dd5b8079923a75
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 406b830a7b6f936a805c4403e98abfdfd71ee72fffc73f46ae2ac0cb985a1fe0502111767a761cfa319a16cb86986802f0bc11c70847bac99fa5b678fb051d9d
|
7
|
+
data.tar.gz: 3230fd659aa8241ce9dcc6eb174bdb51d1e2b35d9d7bdcac47f33ff304ff14846b166439ecb9d8abdd27294eccaf3e865afc4e47edd3a0c81e9a3615671d23a4
|
data/LICENSE
ADDED
@@ -0,0 +1,191 @@
|
|
1
|
+
Apache License
|
2
|
+
Version 2.0, January 2004
|
3
|
+
http://www.apache.org/licenses/
|
4
|
+
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6
|
+
|
7
|
+
1. Definitions.
|
8
|
+
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction, and
|
10
|
+
distribution as defined by Sections 1 through 9 of this document.
|
11
|
+
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by the copyright
|
13
|
+
owner that is granting the License.
|
14
|
+
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all other entities
|
16
|
+
that control, are controlled by, or are under common control with that entity.
|
17
|
+
For the purposes of this definition, "control" means (i) the power, direct or
|
18
|
+
indirect, to cause the direction or management of such entity, whether by
|
19
|
+
contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
20
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
21
|
+
|
22
|
+
"You" (or "Your") shall mean an individual or Legal Entity exercising
|
23
|
+
permissions granted by this License.
|
24
|
+
|
25
|
+
"Source" form shall mean the preferred form for making modifications, including
|
26
|
+
but not limited to software source code, documentation source, and configuration
|
27
|
+
files.
|
28
|
+
|
29
|
+
"Object" form shall mean any form resulting from mechanical transformation or
|
30
|
+
translation of a Source form, including but not limited to compiled object code,
|
31
|
+
generated documentation, and conversions to other media types.
|
32
|
+
|
33
|
+
"Work" shall mean the work of authorship, whether in Source or Object form, made
|
34
|
+
available under the License, as indicated by a copyright notice that is included
|
35
|
+
in or attached to the work (an example is provided in the Appendix below).
|
36
|
+
|
37
|
+
"Derivative Works" shall mean any work, whether in Source or Object form, that
|
38
|
+
is based on (or derived from) the Work and for which the editorial revisions,
|
39
|
+
annotations, elaborations, or other modifications represent, as a whole, an
|
40
|
+
original work of authorship. For the purposes of this License, Derivative Works
|
41
|
+
shall not include works that remain separable from, or merely link (or bind by
|
42
|
+
name) to the interfaces of, the Work and Derivative Works thereof.
|
43
|
+
|
44
|
+
"Contribution" shall mean any work of authorship, including the original version
|
45
|
+
of the Work and any modifications or additions to that Work or Derivative Works
|
46
|
+
thereof, that is intentionally submitted to Licensor for inclusion in the Work
|
47
|
+
by the copyright owner or by an individual or Legal Entity authorized to submit
|
48
|
+
on behalf of the copyright owner. For the purposes of this definition,
|
49
|
+
"submitted" means any form of electronic, verbal, or written communication sent
|
50
|
+
to the Licensor or its representatives, including but not limited to
|
51
|
+
communication on electronic mailing lists, source code control systems, and
|
52
|
+
issue tracking systems that are managed by, or on behalf of, the Licensor for
|
53
|
+
the purpose of discussing and improving the Work, but excluding communication
|
54
|
+
that is conspicuously marked or otherwise designated in writing by the copyright
|
55
|
+
owner as "Not a Contribution."
|
56
|
+
|
57
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
|
58
|
+
of whom a Contribution has been received by Licensor and subsequently
|
59
|
+
incorporated within the Work.
|
60
|
+
|
61
|
+
2. Grant of Copyright License.
|
62
|
+
|
63
|
+
Subject to the terms and conditions of this License, each Contributor hereby
|
64
|
+
grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
|
65
|
+
irrevocable copyright license to reproduce, prepare Derivative Works of,
|
66
|
+
publicly display, publicly perform, sublicense, and distribute the Work and such
|
67
|
+
Derivative Works in Source or Object form.
|
68
|
+
|
69
|
+
3. Grant of Patent License.
|
70
|
+
|
71
|
+
Subject to the terms and conditions of this License, each Contributor hereby
|
72
|
+
grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
|
73
|
+
irrevocable (except as stated in this section) patent license to make, have
|
74
|
+
made, use, offer to sell, sell, import, and otherwise transfer the Work, where
|
75
|
+
such license applies only to those patent claims licensable by such Contributor
|
76
|
+
that are necessarily infringed by their Contribution(s) alone or by combination
|
77
|
+
of their Contribution(s) with the Work to which such Contribution(s) was
|
78
|
+
submitted. If You institute patent litigation against any entity (including a
|
79
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work or a
|
80
|
+
Contribution incorporated within the Work constitutes direct or contributory
|
81
|
+
patent infringement, then any patent licenses granted to You under this License
|
82
|
+
for that Work shall terminate as of the date such litigation is filed.
|
83
|
+
|
84
|
+
4. Redistribution.
|
85
|
+
|
86
|
+
You may reproduce and distribute copies of the Work or Derivative Works thereof
|
87
|
+
in any medium, with or without modifications, and in Source or Object form,
|
88
|
+
provided that You meet the following conditions:
|
89
|
+
|
90
|
+
You must give any other recipients of the Work or Derivative Works a copy of
|
91
|
+
this License; and
|
92
|
+
You must cause any modified files to carry prominent notices stating that You
|
93
|
+
changed the files; and
|
94
|
+
You must retain, in the Source form of any Derivative Works that You distribute,
|
95
|
+
all copyright, patent, trademark, and attribution notices from the Source form
|
96
|
+
of the Work, excluding those notices that do not pertain to any part of the
|
97
|
+
Derivative Works; and
|
98
|
+
If the Work includes a "NOTICE" text file as part of its distribution, then any
|
99
|
+
Derivative Works that You distribute must include a readable copy of the
|
100
|
+
attribution notices contained within such NOTICE file, excluding those notices
|
101
|
+
that do not pertain to any part of the Derivative Works, in at least one of the
|
102
|
+
following places: within a NOTICE text file distributed as part of the
|
103
|
+
Derivative Works; within the Source form or documentation, if provided along
|
104
|
+
with the Derivative Works; or, within a display generated by the Derivative
|
105
|
+
Works, if and wherever such third-party notices normally appear. The contents of
|
106
|
+
the NOTICE file are for informational purposes only and do not modify the
|
107
|
+
License. You may add Your own attribution notices within Derivative Works that
|
108
|
+
You distribute, alongside or as an addendum to the NOTICE text from the Work,
|
109
|
+
provided that such additional attribution notices cannot be construed as
|
110
|
+
modifying the License.
|
111
|
+
You may add Your own copyright statement to Your modifications and may provide
|
112
|
+
additional or different license terms and conditions for use, reproduction, or
|
113
|
+
distribution of Your modifications, or for any such Derivative Works as a whole,
|
114
|
+
provided Your use, reproduction, and distribution of the Work otherwise complies
|
115
|
+
with the conditions stated in this License.
|
116
|
+
|
117
|
+
5. Submission of Contributions.
|
118
|
+
|
119
|
+
Unless You explicitly state otherwise, any Contribution intentionally submitted
|
120
|
+
for inclusion in the Work by You to the Licensor shall be under the terms and
|
121
|
+
conditions of this License, without any additional terms or conditions.
|
122
|
+
Notwithstanding the above, nothing herein shall supersede or modify the terms of
|
123
|
+
any separate license agreement you may have executed with Licensor regarding
|
124
|
+
such Contributions.
|
125
|
+
|
126
|
+
6. Trademarks.
|
127
|
+
|
128
|
+
This License does not grant permission to use the trade names, trademarks,
|
129
|
+
service marks, or product names of the Licensor, except as required for
|
130
|
+
reasonable and customary use in describing the origin of the Work and
|
131
|
+
reproducing the content of the NOTICE file.
|
132
|
+
|
133
|
+
7. Disclaimer of Warranty.
|
134
|
+
|
135
|
+
Unless required by applicable law or agreed to in writing, Licensor provides the
|
136
|
+
Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
|
137
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
|
138
|
+
including, without limitation, any warranties or conditions of TITLE,
|
139
|
+
NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
|
140
|
+
solely responsible for determining the appropriateness of using or
|
141
|
+
redistributing the Work and assume any risks associated with Your exercise of
|
142
|
+
permissions under this License.
|
143
|
+
|
144
|
+
8. Limitation of Liability.
|
145
|
+
|
146
|
+
In no event and under no legal theory, whether in tort (including negligence),
|
147
|
+
contract, or otherwise, unless required by applicable law (such as deliberate
|
148
|
+
and grossly negligent acts) or agreed to in writing, shall any Contributor be
|
149
|
+
liable to You for damages, including any direct, indirect, special, incidental,
|
150
|
+
or consequential damages of any character arising as a result of this License or
|
151
|
+
out of the use or inability to use the Work (including but not limited to
|
152
|
+
damages for loss of goodwill, work stoppage, computer failure or malfunction, or
|
153
|
+
any and all other commercial damages or losses), even if such Contributor has
|
154
|
+
been advised of the possibility of such damages.
|
155
|
+
|
156
|
+
9. Accepting Warranty or Additional Liability.
|
157
|
+
|
158
|
+
While redistributing the Work or Derivative Works thereof, You may choose to
|
159
|
+
offer, and charge a fee for, acceptance of support, warranty, indemnity, or
|
160
|
+
other liability obligations and/or rights consistent with this License. However,
|
161
|
+
in accepting such obligations, You may act only on Your own behalf and on Your
|
162
|
+
sole responsibility, not on behalf of any other Contributor, and only if You
|
163
|
+
agree to indemnify, defend, and hold each Contributor harmless for any liability
|
164
|
+
incurred by, or claims asserted against, such Contributor by reason of your
|
165
|
+
accepting any such warranty or additional liability.
|
166
|
+
|
167
|
+
END OF TERMS AND CONDITIONS
|
168
|
+
|
169
|
+
APPENDIX: How to apply the Apache License to your work
|
170
|
+
|
171
|
+
To apply the Apache License to your work, attach the following boilerplate
|
172
|
+
notice, with the fields enclosed by brackets "[]" replaced with your own
|
173
|
+
identifying information. (Don't include the brackets!) The text should be
|
174
|
+
enclosed in the appropriate comment syntax for the file format. We also
|
175
|
+
recommend that a file or class name and description of purpose be included on
|
176
|
+
the same "printed page" as the copyright notice for easier identification within
|
177
|
+
third-party archives.
|
178
|
+
|
179
|
+
Copyright [yyyy] [name of copyright owner]
|
180
|
+
|
181
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
182
|
+
you may not use this file except in compliance with the License.
|
183
|
+
You may obtain a copy of the License at
|
184
|
+
|
185
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
186
|
+
|
187
|
+
Unless required by applicable law or agreed to in writing, software
|
188
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
189
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
190
|
+
See the License for the specific language governing permissions and
|
191
|
+
limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
Kabutops
|
2
|
+
========
|
3
|
+
|
4
|
+
Installation
|
5
|
+
------------
|
6
|
+
|
7
|
+
You can install it via gem
|
8
|
+
|
9
|
+
```bash
|
10
|
+
gem install kabutops
|
11
|
+
```
|
12
|
+
|
13
|
+
Or you can put it in your Gemfile
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
gem 'kabutops'
|
17
|
+
```
|
18
|
+
|
19
|
+
Basic example
|
20
|
+
-------------
|
21
|
+
|
22
|
+
Create **fruit_crawler.rb**.
|
23
|
+
|
24
|
+
```ruby
|
25
|
+
require 'kabutops'
|
26
|
+
|
27
|
+
class FruitCrawler < Kabutops::Crawler
|
28
|
+
include Sidekiq::Worker
|
29
|
+
|
30
|
+
collection (1..5).map { |id|
|
31
|
+
{
|
32
|
+
id: id,
|
33
|
+
url: "https://www.example.com/fruits/#{id}",
|
34
|
+
}
|
35
|
+
}.shuffle
|
36
|
+
proxy '127.0.0.1', 81818
|
37
|
+
cache true
|
38
|
+
|
39
|
+
elasticsearch do
|
40
|
+
index :books
|
41
|
+
document :book
|
42
|
+
|
43
|
+
data do
|
44
|
+
id :var, :id
|
45
|
+
url :var, :url
|
46
|
+
some_attr :css, 'h1.bookTitle'
|
47
|
+
grape :lambda, ->(page) {
|
48
|
+
page.css('h3.fruit').split(',').first
|
49
|
+
}
|
50
|
+
|
51
|
+
nested_attr do
|
52
|
+
apple :css, 'h1.bookTitle'
|
53
|
+
banana :xpath, '//table/tr/td[0]'
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
callback do |resource, page|
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
FruitCrawler.crawl!
|
63
|
+
```
|
64
|
+
|
65
|
+
Run it via sidekiq
|
66
|
+
|
67
|
+
```bash
|
68
|
+
bundle exec sidekiq -r ./fruit_crawler.rb -c 10
|
69
|
+
```
|
70
|
+
|
71
|
+
This example will parallely crawl specified urls and result will be
|
72
|
+
stored to the ElasticSearch index named books as a book document.
|
73
|
+
|
74
|
+
One document will look something like this
|
75
|
+
|
76
|
+
```json
|
77
|
+
{
|
78
|
+
'id': '...',
|
79
|
+
'url': '...',
|
80
|
+
'some_attr': '...',
|
81
|
+
'grape': '...',
|
82
|
+
'nested_attr': {
|
83
|
+
'apple': '...',
|
84
|
+
'banana': '...'
|
85
|
+
}
|
86
|
+
}
|
87
|
+
```
|
data/lib/kabutops.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'sidekiq'
|
2
|
+
require 'cachy'
|
3
|
+
require 'moneta'
|
4
|
+
require 'pstore'
|
5
|
+
|
6
|
+
Cachy.cache_store = Moneta.new(:File, dir: 'cache') # temporary
|
7
|
+
|
8
|
+
require 'kabutops/recipe'
|
9
|
+
require 'kabutops/recipe_item'
|
10
|
+
require 'kabutops/adapters/callback'
|
11
|
+
require 'kabutops/adapters/database_adapter'
|
12
|
+
require 'kabutops/adapters/elastic_search'
|
13
|
+
require 'kabutops/adapters/mysql'
|
14
|
+
require 'kabutops/crawler_extensions/callback'
|
15
|
+
require 'kabutops/crawler_extensions/elastic_search'
|
16
|
+
require 'kabutops/crawler_extensions/mysql'
|
17
|
+
require 'kabutops/crawler_extensions/pstore_storage'
|
18
|
+
require 'kabutops/crawler'
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Kabutops
|
2
|
+
module Adapters
|
3
|
+
class DatabaseAdapter
|
4
|
+
def data &block
|
5
|
+
@recipe = Recipe.new
|
6
|
+
@recipe.instance_eval &block
|
7
|
+
end
|
8
|
+
|
9
|
+
def process resource, page
|
10
|
+
result = @recipe.process(resource, page)
|
11
|
+
store(result)
|
12
|
+
end
|
13
|
+
|
14
|
+
def store result
|
15
|
+
raise NotImplementedError
|
16
|
+
end
|
17
|
+
|
18
|
+
def nested?
|
19
|
+
raise NotImplementedError
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Kabutops
|
2
|
+
module Adapters
|
3
|
+
class ElasticSearch < DatabaseAdapter
|
4
|
+
def index value
|
5
|
+
@index = value
|
6
|
+
end
|
7
|
+
|
8
|
+
def document value
|
9
|
+
@document = value
|
10
|
+
end
|
11
|
+
|
12
|
+
def store result
|
13
|
+
p result
|
14
|
+
p result
|
15
|
+
p result
|
16
|
+
end
|
17
|
+
|
18
|
+
def nested?
|
19
|
+
true
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module Kabutops
|
2
|
+
class Crawler
|
3
|
+
include CrawlerExtensions::ElasticSearch
|
4
|
+
include CrawlerExtensions::Callback
|
5
|
+
|
6
|
+
class << self
|
7
|
+
include CrawlerExtensions::PStoreStorage
|
8
|
+
|
9
|
+
attr_reader :params
|
10
|
+
|
11
|
+
[
|
12
|
+
:collection,
|
13
|
+
:workers,
|
14
|
+
:proxy,
|
15
|
+
:cache
|
16
|
+
].each do |name|
|
17
|
+
define_method name do |*args|
|
18
|
+
@params ||= {}
|
19
|
+
if args.size == 1
|
20
|
+
@params[name] = args[0]
|
21
|
+
else
|
22
|
+
@params[name] = args
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def params
|
28
|
+
@params
|
29
|
+
end
|
30
|
+
|
31
|
+
def adapters
|
32
|
+
@adapters
|
33
|
+
end
|
34
|
+
|
35
|
+
def crawl! collection=nil
|
36
|
+
if storage(:status) == :none
|
37
|
+
@collection = collection || @params[:collection] || []
|
38
|
+
@collection.each do |resource|
|
39
|
+
raise "url must be specified" if resource[:id].nil?
|
40
|
+
perform_async(resource)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def << resource
|
46
|
+
perform_async(resource)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def perform resource
|
51
|
+
resource = resource.inject({}) { |h, (k, v)| h[k.to_sym] = v; h }
|
52
|
+
|
53
|
+
content = Cachy.cache_if(self.class.params[:cache], resource[:url]) do
|
54
|
+
agent = Mechanize.new
|
55
|
+
#agent.set_proxy(*self.class.params[:proxy])
|
56
|
+
agent.get(resource[:url]).body
|
57
|
+
end
|
58
|
+
|
59
|
+
page = Nokogiri::HTML(content)
|
60
|
+
|
61
|
+
self.class.adapters.each do |adapter|
|
62
|
+
adapter.process(resource, page)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def << resource
|
67
|
+
self.class.perform_async(resource)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Kabutops
|
2
|
+
|
3
|
+
module CrawlerExtensions
|
4
|
+
|
5
|
+
module Callback
|
6
|
+
|
7
|
+
def self.included base
|
8
|
+
base.extend(ClassMethods)
|
9
|
+
end
|
10
|
+
|
11
|
+
module ClassMethods
|
12
|
+
def callback &block
|
13
|
+
adapter = Adapters::Callback.new(block)
|
14
|
+
|
15
|
+
@adapters ||= []
|
16
|
+
@adapters << adapter
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Kabutops
|
2
|
+
|
3
|
+
module CrawlerExtensions
|
4
|
+
|
5
|
+
module ElasticSearch
|
6
|
+
def self.included base
|
7
|
+
base.extend(ClassMethods)
|
8
|
+
end
|
9
|
+
|
10
|
+
module ClassMethods
|
11
|
+
def elasticsearch &block
|
12
|
+
adapter = Adapters::ElasticSearch.new
|
13
|
+
adapter.instance_eval &block
|
14
|
+
|
15
|
+
@adapters ||= []
|
16
|
+
@adapters << adapter
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
File without changes
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Kabutops
|
2
|
+
|
3
|
+
module CrawlerExtensions
|
4
|
+
|
5
|
+
module PStoreStorage
|
6
|
+
|
7
|
+
def check_storage
|
8
|
+
@storage ||= PStore.new(".kabutopus.config.pstore")
|
9
|
+
end
|
10
|
+
|
11
|
+
def storage= name, value
|
12
|
+
check_storage
|
13
|
+
@storage.transaction do
|
14
|
+
@storage[key.to_sym] = value
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def storage key
|
19
|
+
check_storage
|
20
|
+
@storage.transaction { @storage[key.to_sym] }
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Kabutops
|
2
|
+
class Recipe
|
3
|
+
def initialize
|
4
|
+
@items = {}
|
5
|
+
@nested = false
|
6
|
+
end
|
7
|
+
|
8
|
+
def method_missing name, *args, &block
|
9
|
+
if block_given?
|
10
|
+
recipe = Recipe.new
|
11
|
+
recipe.instance_eval &block
|
12
|
+
@items[name] = RecipeItem.new(name, :recipe, recipe)
|
13
|
+
@nested = true
|
14
|
+
else
|
15
|
+
type, value = args[0..1]
|
16
|
+
@items[name] = RecipeItem.new(name, type, value)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def process resource, page
|
21
|
+
result = {}
|
22
|
+
|
23
|
+
@items.each do |name, item|
|
24
|
+
result[name] = case item.type
|
25
|
+
when :var
|
26
|
+
resource[item.value]
|
27
|
+
when :recipe
|
28
|
+
item.value.process(resource, page)
|
29
|
+
when :css
|
30
|
+
page.css(item.value).text
|
31
|
+
when :xpath
|
32
|
+
page.xpath(item.value).text
|
33
|
+
when :lambda
|
34
|
+
item.value.call(page)
|
35
|
+
when :proc
|
36
|
+
page.instance_eval &item.value
|
37
|
+
else
|
38
|
+
raise "unknown recipe item type '#{item.type}'"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
result
|
43
|
+
end
|
44
|
+
|
45
|
+
def nested?
|
46
|
+
@nested
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
metadata
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: kabutops
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Rene Klacan
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-06-16 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mechanize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: cachy
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: moneta
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: sidekiq
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ~>
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 3.0.0
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ~>
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 3.0.0
|
83
|
+
description: ''
|
84
|
+
email: rene@klacan.sk
|
85
|
+
executables: []
|
86
|
+
extensions: []
|
87
|
+
extra_rdoc_files: []
|
88
|
+
files:
|
89
|
+
- lib/kabutops.rb
|
90
|
+
- lib/kabutops/adapters/database_adapter.rb
|
91
|
+
- lib/kabutops/adapters/callback.rb
|
92
|
+
- lib/kabutops/adapters/mysql.rb
|
93
|
+
- lib/kabutops/adapters/elastic_search.rb
|
94
|
+
- lib/kabutops/crawler_extensions/pstore_storage.rb
|
95
|
+
- lib/kabutops/crawler_extensions/callback.rb
|
96
|
+
- lib/kabutops/crawler_extensions/mysql.rb
|
97
|
+
- lib/kabutops/crawler_extensions/elastic_search.rb
|
98
|
+
- lib/kabutops/recipe.rb
|
99
|
+
- lib/kabutops/version.rb
|
100
|
+
- lib/kabutops/recipe_item.rb
|
101
|
+
- lib/kabutops/crawler.rb
|
102
|
+
- LICENSE
|
103
|
+
- README.md
|
104
|
+
homepage: https://github.com/reneklacan/kabutops
|
105
|
+
licenses:
|
106
|
+
- Beerware
|
107
|
+
metadata: {}
|
108
|
+
post_install_message:
|
109
|
+
rdoc_options: []
|
110
|
+
require_paths:
|
111
|
+
- lib
|
112
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - ~>
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '1.9'
|
117
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - '>='
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
122
|
+
requirements: []
|
123
|
+
rubyforge_project:
|
124
|
+
rubygems_version: 2.0.14
|
125
|
+
signing_key:
|
126
|
+
specification_version: 4
|
127
|
+
summary: ''
|
128
|
+
test_files: []
|