daimon_skycrawlers 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +34 -26
- data/daimon_skycrawlers.gemspec +2 -0
- data/lib/daimon_skycrawlers/cli.rb +4 -0
- data/lib/daimon_skycrawlers/commands/enqueue.rb +58 -0
- data/lib/daimon_skycrawlers/commands/runner.rb +47 -0
- data/lib/daimon_skycrawlers/crawler/default.rb +17 -3
- data/lib/daimon_skycrawlers/filter/base.rb +8 -0
- data/lib/daimon_skycrawlers/filter/duplicate_checker.rb +2 -0
- data/lib/daimon_skycrawlers/filter/robots_txt_checker.rb +29 -0
- data/lib/daimon_skycrawlers/filter/update_checker.rb +3 -0
- data/lib/daimon_skycrawlers/generator/new.rb +0 -3
- data/lib/daimon_skycrawlers/version.rb +1 -1
- data/lib/daimon_skycrawlers.rb +12 -0
- metadata +33 -8
- data/lib/daimon_skycrawlers/generator/templates/new/bin/crawler +0 -10
- data/lib/daimon_skycrawlers/generator/templates/new/bin/enqueue +0 -23
- data/lib/daimon_skycrawlers/generator/templates/new/bin/processor +0 -10
- data/sample/spider/bin/crawler +0 -10
- data/sample/spider/bin/enqueue +0 -23
- data/sample/spider/bin/processor +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4e4bd1308a554f55bce802a5d0a038cbb0f0470e
|
4
|
+
data.tar.gz: a484737c74cc9ff3304a9fda0714282f9f6b0e61
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c9d4b01cb37808ce43a9786324e6e0d842d67bc44269c39964e4570c5dbab1f0b818449ac6a67864d6834e2b67a1a9f857252fd0d3cc91e87961ad252eccc785
|
7
|
+
data.tar.gz: c1babe4300e744678672482e3a82c8bc5fa37a5e52ebe8e634b3ae0cdaea17adc75c5132a5a8918c2b3dce76b47c783fedea8e911b18f2e6f0bd7adee0d0525b
|
data/README.md
CHANGED
@@ -33,52 +33,60 @@ Or install it yourself as:
|
|
33
33
|
|
34
34
|
1. Create project
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
36
|
+
```
|
37
|
+
$ bundle exec daimon-skycrawlers new mycrawlers
|
38
|
+
$ cd mycrawlers
|
39
|
+
```
|
40
|
+
or
|
41
|
+
```
|
42
|
+
$ daimon-skycrawlers new mycrawlers
|
43
|
+
$ cd mycrawlers
|
44
|
+
```
|
45
45
|
|
46
46
|
2. Install dependencies
|
47
47
|
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
```
|
49
|
+
$ bundle install
|
50
|
+
```
|
51
51
|
|
52
52
|
3. Create database
|
53
53
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
54
|
+
```
|
55
|
+
$ bundle exec rake db:create
|
56
|
+
$ bundle exec rake db:migrate
|
57
|
+
```
|
58
58
|
|
59
59
|
4. Open new terminal and run crawler/processor
|
60
60
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
61
|
+
```
|
62
|
+
$ daimon-skycrawlers exec crawler # on new terminal
|
63
|
+
$ daimon-skycrawlers exec processor # on new terminal
|
64
|
+
```
|
65
65
|
|
66
66
|
NOTE: Execute step 5 as soon as possible. Because bin/crawler and
|
67
67
|
bin/processor will stop after 10 seconds by default if their
|
68
68
|
queues are empty.
|
69
69
|
|
70
|
+
NOTE: You can change `shutdown_interval` using following code in config/init.rb:
|
71
|
+
|
72
|
+
```ruby
|
73
|
+
DaimonSkycrawlers.configure do |config|
|
74
|
+
config.shutdown_interval = 30
|
75
|
+
end
|
76
|
+
```
|
77
|
+
|
70
78
|
5. Enqueue task
|
71
79
|
|
72
|
-
|
73
|
-
|
74
|
-
|
80
|
+
```
|
81
|
+
$ daimon-skycrawlers enqueue url http://example.com/
|
82
|
+
```
|
75
83
|
|
76
84
|
6. You'll see `It works with 'http://example.com'` on your terminal which runs your processor!
|
77
85
|
7. You can re-enqueue task for processor
|
78
86
|
|
79
|
-
|
80
|
-
|
81
|
-
|
87
|
+
```
|
88
|
+
$ daimon-skycrawlers enqueue response http://example.com/
|
89
|
+
```
|
82
90
|
|
83
91
|
Display `It works with 'http://example.com'` again on your terminal which runs your processor.
|
84
92
|
|
data/daimon_skycrawlers.gemspec
CHANGED
@@ -28,6 +28,8 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.add_dependency "railties"
|
29
29
|
spec.add_dependency "pg"
|
30
30
|
spec.add_dependency "timers"
|
31
|
+
spec.add_dependency "sitemap-parser"
|
32
|
+
spec.add_dependency "webrobots"
|
31
33
|
|
32
34
|
spec.add_development_dependency "rake", "~> 10.0"
|
33
35
|
spec.add_development_dependency "test-unit"
|
@@ -1,10 +1,14 @@
|
|
1
1
|
require "thor"
|
2
2
|
require "daimon_skycrawlers/generator/new"
|
3
|
+
require "daimon_skycrawlers/commands/enqueue"
|
4
|
+
require "daimon_skycrawlers/commands/runner"
|
3
5
|
require "daimon_skycrawlers/version"
|
4
6
|
|
5
7
|
module DaimonSkycrawlers
|
6
8
|
class CLI < Thor
|
7
9
|
register(Generator::New, "new", "new NAME", "Create new project")
|
10
|
+
register(Commands::Runner, "exec", "exec [COMMAND]", "Execute crawler/processor")
|
11
|
+
register(Commands::Enqueue, "enqueue", "enqueue [TYPE] URL [messages...]", "Enqueue URL")
|
8
12
|
|
9
13
|
desc "version", "Show version"
|
10
14
|
def version
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require "daimon_skycrawlers"
|
2
|
+
require "daimon_skycrawlers/crawler"
|
3
|
+
require "daimon_skycrawlers/processor"
|
4
|
+
require "daimon_skycrawlers/version"
|
5
|
+
require "sitemap-parser"
|
6
|
+
require "webrobots"
|
7
|
+
|
8
|
+
module DaimonSkycrawlers
|
9
|
+
module Commands
|
10
|
+
class Enqueue < Thor
|
11
|
+
desc "url URL [key1:value1 key2:value2...]", "Enqueue URL for URL consumer"
|
12
|
+
def url(url, *rest)
|
13
|
+
load_init
|
14
|
+
message = rest.map {|arg| arg.split(":") }.to_h
|
15
|
+
log.debug("Enqueue URL for crawler: #{url} : #{message}")
|
16
|
+
DaimonSkycrawlers::Crawler.enqueue_url(url, message)
|
17
|
+
end
|
18
|
+
|
19
|
+
desc "response URL [key1:value1 key2:value2...]", "Enqueue URL for HTTP response consumer"
|
20
|
+
def response(url, *rest)
|
21
|
+
load_init
|
22
|
+
message = rest.map {|arg| arg.split(":") }.to_h
|
23
|
+
log.debug("Enqueue URL for processor: #{url} : #{message}")
|
24
|
+
DaimonSkycrawlers::Processor.enqueue_http_response(url, message)
|
25
|
+
end
|
26
|
+
|
27
|
+
desc "sitemap [OPTIONS] URL", "Enqueue URLs from simtemap.xml"
|
28
|
+
method_option("robots-txt", aliases: ["-r"], type: :boolean,
|
29
|
+
desc: "URL for robots.txt. Detect robots.txt automatically if URL is not robots.txt")
|
30
|
+
def sitemap(url)
|
31
|
+
load_init
|
32
|
+
if options["robots-txt"]
|
33
|
+
webrobots = WebRobots.new("DaimonSkycrawlers/#{DaimonSkycrawlers::VERSION}")
|
34
|
+
sitemaps = webrobots.sitemaps(url).uniq
|
35
|
+
else
|
36
|
+
sitemaps = [url]
|
37
|
+
end
|
38
|
+
urls = sitemaps.flat_map do |sitemap|
|
39
|
+
sitemap_parser = SitemapParser.new(sitemap)
|
40
|
+
sitemap_parser.to_a
|
41
|
+
end
|
42
|
+
urls.each do |_url|
|
43
|
+
DaimonSkycrawlers::Crawler.enqueue_url(_url)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def load_init
|
50
|
+
DaimonSkycrawlers.load_init
|
51
|
+
end
|
52
|
+
|
53
|
+
def log
|
54
|
+
DaimonSkycrawlers.configuration.logger
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require "thor"
|
2
|
+
require "daimon_skycrawlers"
|
3
|
+
require "daimon_skycrawlers/crawler"
|
4
|
+
|
5
|
+
module DaimonSkycrawlers
|
6
|
+
module Commands
|
7
|
+
class Runner < Thor
|
8
|
+
namespace "exec"
|
9
|
+
|
10
|
+
desc "crawler", "Execute crawler"
|
11
|
+
def crawler
|
12
|
+
load_init
|
13
|
+
Dir.glob("app/crawlers/**/*.rb") do |path|
|
14
|
+
require(File.expand_path(path, Dir.pwd))
|
15
|
+
log.info("Loaded crawler: #{path}")
|
16
|
+
end
|
17
|
+
DaimonSkycrawlers::Crawler.run
|
18
|
+
rescue => ex
|
19
|
+
puts ex.message
|
20
|
+
exit(false)
|
21
|
+
end
|
22
|
+
|
23
|
+
desc "processor", "Execute processor"
|
24
|
+
def processor
|
25
|
+
load_init
|
26
|
+
Dir.glob("app/processors/**/*.rb") do |path|
|
27
|
+
require(File.expand_path(path, Dir.pwd))
|
28
|
+
log.info("Loaded processor: #{path}")
|
29
|
+
end
|
30
|
+
DaimonSkycrawlers::Processor.run
|
31
|
+
rescue => ex
|
32
|
+
puts ex.message
|
33
|
+
exit(false)
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def load_init
|
39
|
+
DaimonSkycrawlers.load_init
|
40
|
+
end
|
41
|
+
|
42
|
+
def log
|
43
|
+
DaimonSkycrawlers.configuration.logger
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require "daimon_skycrawlers/crawler/base"
|
2
2
|
require "daimon_skycrawlers/filter/update_checker"
|
3
|
+
require "daimon_skycrawlers/filter/robots_txt_checker"
|
3
4
|
|
4
5
|
module DaimonSkycrawlers
|
5
6
|
module Crawler
|
@@ -13,11 +14,16 @@ module DaimonSkycrawlers
|
|
13
14
|
@n_processed_urls += 1
|
14
15
|
@skipped = false
|
15
16
|
url = connection.url_prefix + path
|
17
|
+
if @options[:obey_robots_txt]
|
18
|
+
robots_txt_checker = DaimonSkycrawlers::Filter::RobotsTxtChecker.new(base_url: @base_url)
|
19
|
+
unless robots_txt_checker.call(url)
|
20
|
+
skip(url)
|
21
|
+
return
|
22
|
+
end
|
23
|
+
end
|
16
24
|
update_checker = DaimonSkycrawlers::Filter::UpdateChecker.new(storage: storage)
|
17
25
|
unless update_checker.call(url.to_s, connection: connection)
|
18
|
-
|
19
|
-
@skipped = true
|
20
|
-
schedule_to_process(url.to_s, heartbeat: true)
|
26
|
+
skip(url)
|
21
27
|
return
|
22
28
|
end
|
23
29
|
@prepare.call(connection)
|
@@ -33,6 +39,14 @@ module DaimonSkycrawlers
|
|
33
39
|
message = message.merge(kw)
|
34
40
|
schedule_to_process(url.to_s, message)
|
35
41
|
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def skip(url)
|
46
|
+
log.info("Skip #{url}")
|
47
|
+
@skipped = true
|
48
|
+
schedule_to_process(url.to_s, heartbeat: true)
|
49
|
+
end
|
36
50
|
end
|
37
51
|
end
|
38
52
|
end
|
@@ -3,6 +3,14 @@ require "daimon_skycrawlers/config"
|
|
3
3
|
|
4
4
|
module DaimonSkycrawlers
|
5
5
|
module Filter
|
6
|
+
#
|
7
|
+
# Base class of filters.
|
8
|
+
#
|
9
|
+
# You must implement `#call` in your filter and it must return
|
10
|
+
# true or false. If your filter returns true, processors can
|
11
|
+
# process given URL after your filter. Otherwise framework skips
|
12
|
+
# given URL to skip processors.
|
13
|
+
#
|
6
14
|
class Base
|
7
15
|
include DaimonSkycrawlers::LoggerMixin
|
8
16
|
include DaimonSkycrawlers::ConfigMixin
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require "webrobots"
|
2
|
+
require "daimon_skycrawlers/filter/base"
|
3
|
+
require "daimon_skycrawlers/version"
|
4
|
+
|
5
|
+
module DaimonSkycrawlers
|
6
|
+
module Filter
|
7
|
+
#
|
8
|
+
# This filter provides robots.txt checker for given URL.
|
9
|
+
# We want to obey robots.txt provided by a web site.
|
10
|
+
#
|
11
|
+
class RobotsTxtChecker < Base
|
12
|
+
def initialize(base_url: nil, user_agent: "DaimonSkycrawlers/#{DaimonSkycrawlers::VERSION}")
|
13
|
+
super()
|
14
|
+
@webrobots = WebRobots.new(user_agent)
|
15
|
+
end
|
16
|
+
|
17
|
+
#
|
18
|
+
# @param [String] url
|
19
|
+
# @return [true|false] Return true when web site allows to fetch the URL, otherwise return false
|
20
|
+
#
|
21
|
+
def call(url)
|
22
|
+
unless URI(url).absolute?
|
23
|
+
url = (@base_url + url).to_s
|
24
|
+
end
|
25
|
+
@webrobots.allowed?(url)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -6,6 +6,9 @@ module DaimonSkycrawlers
|
|
6
6
|
#
|
7
7
|
# This filter provides update checker for given URL.
|
8
8
|
#
|
9
|
+
# Skip processing URLs that is latest (not updated since previous
|
10
|
+
# access).
|
11
|
+
#
|
9
12
|
class UpdateChecker < Base
|
10
13
|
def initialize(storage: nil, base_url: nil)
|
11
14
|
super(storage: storage)
|
@@ -40,9 +40,6 @@ module DaimonSkycrawlers
|
|
40
40
|
"Rakefile",
|
41
41
|
"app/crawlers/sample_crawler.rb",
|
42
42
|
"app/processors/sample_processor.rb",
|
43
|
-
"bin/crawler",
|
44
|
-
"bin/enqueue",
|
45
|
-
"bin/processor",
|
46
43
|
"config/init.rb",
|
47
44
|
].each do |path|
|
48
45
|
copy_file(path, "#{name}/#{path}", mode: :preserve)
|
data/lib/daimon_skycrawlers.rb
CHANGED
@@ -62,5 +62,17 @@ module DaimonSkycrawlers
|
|
62
62
|
def configure
|
63
63
|
yield configuration
|
64
64
|
end
|
65
|
+
|
66
|
+
#
|
67
|
+
# Load "config/init.rb"
|
68
|
+
#
|
69
|
+
# @return [void]
|
70
|
+
#
|
71
|
+
def load_init
|
72
|
+
require(File.expand_path("config/init.rb", Dir.pwd))
|
73
|
+
rescue LoadError => ex
|
74
|
+
puts ex.message
|
75
|
+
exit(false)
|
76
|
+
end
|
65
77
|
end
|
66
78
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: daimon_skycrawlers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryunosuke SATO
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -150,6 +150,34 @@ dependencies:
|
|
150
150
|
- - ">="
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: '0'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: sitemap-parser
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ">="
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: webrobots
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - ">="
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
174
|
+
type: :runtime
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - ">="
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0'
|
153
181
|
- !ruby/object:Gem::Dependency
|
154
182
|
name: rake
|
155
183
|
requirement: !ruby/object:Gem::Requirement
|
@@ -281,6 +309,8 @@ files:
|
|
281
309
|
- db/schema.rb
|
282
310
|
- lib/daimon_skycrawlers.rb
|
283
311
|
- lib/daimon_skycrawlers/cli.rb
|
312
|
+
- lib/daimon_skycrawlers/commands/enqueue.rb
|
313
|
+
- lib/daimon_skycrawlers/commands/runner.rb
|
284
314
|
- lib/daimon_skycrawlers/config.rb
|
285
315
|
- lib/daimon_skycrawlers/consumer.rb
|
286
316
|
- lib/daimon_skycrawlers/consumer/base.rb
|
@@ -292,6 +322,7 @@ files:
|
|
292
322
|
- lib/daimon_skycrawlers/filter.rb
|
293
323
|
- lib/daimon_skycrawlers/filter/base.rb
|
294
324
|
- lib/daimon_skycrawlers/filter/duplicate_checker.rb
|
325
|
+
- lib/daimon_skycrawlers/filter/robots_txt_checker.rb
|
295
326
|
- lib/daimon_skycrawlers/filter/update_checker.rb
|
296
327
|
- lib/daimon_skycrawlers/generator/new.rb
|
297
328
|
- lib/daimon_skycrawlers/generator/templates/new/Gemfile
|
@@ -299,9 +330,6 @@ files:
|
|
299
330
|
- lib/daimon_skycrawlers/generator/templates/new/Rakefile
|
300
331
|
- lib/daimon_skycrawlers/generator/templates/new/app/crawlers/sample_crawler.rb
|
301
332
|
- lib/daimon_skycrawlers/generator/templates/new/app/processors/sample_processor.rb
|
302
|
-
- lib/daimon_skycrawlers/generator/templates/new/bin/crawler
|
303
|
-
- lib/daimon_skycrawlers/generator/templates/new/bin/enqueue
|
304
|
-
- lib/daimon_skycrawlers/generator/templates/new/bin/processor
|
305
333
|
- lib/daimon_skycrawlers/generator/templates/new/config/database.yml.erb
|
306
334
|
- lib/daimon_skycrawlers/generator/templates/new/config/init.rb
|
307
335
|
- lib/daimon_skycrawlers/logger.rb
|
@@ -324,9 +352,6 @@ files:
|
|
324
352
|
- sample/spider/Rakefile
|
325
353
|
- sample/spider/app/crawlers/blog_crawler.rb
|
326
354
|
- sample/spider/app/processors/blog_spider.rb
|
327
|
-
- sample/spider/bin/crawler
|
328
|
-
- sample/spider/bin/enqueue
|
329
|
-
- sample/spider/bin/processor
|
330
355
|
- sample/spider/config/database.yml
|
331
356
|
- sample/spider/config/init.rb
|
332
357
|
- sample/spider/db/migrate/20160830155803_create_pages.rb
|
@@ -1,23 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require "thor"
|
4
|
-
|
5
|
-
require_relative "../config/init"
|
6
|
-
require "daimon_skycrawlers/crawler"
|
7
|
-
require "daimon_skycrawlers/processor"
|
8
|
-
|
9
|
-
class Enqueue < Thor
|
10
|
-
desc "url URL [key1:value1 key2:value2...]", "Enqueue URL for URL consumer"
|
11
|
-
def url(url, *rest)
|
12
|
-
message = rest.map {|arg| arg.split(":") }.to_h
|
13
|
-
DaimonSkycrawlers::Crawler.enqueue_url(url, message)
|
14
|
-
end
|
15
|
-
|
16
|
-
desc "response URL [key1:value1 key2:value2...]", "Enqueue URL for HTTP response consumer"
|
17
|
-
def response(url, *rest)
|
18
|
-
message = rest.map {|arg| arg.split(":") }.to_h
|
19
|
-
DaimonSkycrawlers::Processor.enqueue_http_response(url, message)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
Enqueue.start(ARGV)
|
data/sample/spider/bin/crawler
DELETED
data/sample/spider/bin/enqueue
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require "thor"
|
4
|
-
|
5
|
-
require_relative "../config/init"
|
6
|
-
require "daimon_skycrawlers/crawler"
|
7
|
-
require "daimon_skycrawlers/processor"
|
8
|
-
|
9
|
-
class Enqueue < Thor
|
10
|
-
desc "url URL [key1:value1 key2:value2...]", "Enqueue URL for URL consumer"
|
11
|
-
def url(url, *rest)
|
12
|
-
message = rest.map {|arg| arg.split(":") }.to_h
|
13
|
-
DaimonSkycrawlers::Crawler.enqueue_url(url, message)
|
14
|
-
end
|
15
|
-
|
16
|
-
desc "response URL [key1:value1 key2:value2...]", "Enqueue URL for HTTP response consumer"
|
17
|
-
def response(url, *rest)
|
18
|
-
message = rest.map {|arg| arg.split(":") }.to_h
|
19
|
-
DaimonSkycrawlers::Processor.enqueue_http_response(url, message)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
Enqueue.start(ARGV)
|
data/sample/spider/bin/processor
DELETED