daimon_skycrawlers 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +2 -2
  3. data/Gemfile +1 -1
  4. data/README.md +30 -12
  5. data/Rakefile +3 -8
  6. data/bin/daimon-skycrawlers +3 -3
  7. data/daimon_skycrawlers.gemspec +4 -3
  8. data/lib/daimon_skycrawlers/cli.rb +3 -3
  9. data/lib/daimon_skycrawlers/config.rb +8 -0
  10. data/lib/daimon_skycrawlers/consumer/base.rb +16 -0
  11. data/lib/daimon_skycrawlers/consumer/http_response.rb +47 -0
  12. data/lib/daimon_skycrawlers/consumer/url.rb +44 -0
  13. data/lib/daimon_skycrawlers/consumer.rb +4 -0
  14. data/lib/daimon_skycrawlers/crawler/base.rb +75 -0
  15. data/lib/daimon_skycrawlers/crawler/default.rb +33 -0
  16. data/lib/daimon_skycrawlers/crawler.rb +18 -76
  17. data/lib/daimon_skycrawlers/filter/base.rb +24 -0
  18. data/lib/daimon_skycrawlers/filter/duplicate_checker.rb +23 -0
  19. data/lib/daimon_skycrawlers/filter/update_checker.rb +31 -0
  20. data/lib/daimon_skycrawlers/filter.rb +4 -0
  21. data/lib/daimon_skycrawlers/generator/new.rb +3 -2
  22. data/lib/daimon_skycrawlers/generator/templates/new/Gemfile +3 -3
  23. data/lib/daimon_skycrawlers/generator/templates/new/README.md.erb +22 -6
  24. data/lib/daimon_skycrawlers/generator/templates/new/crawler.rb +5 -5
  25. data/lib/daimon_skycrawlers/generator/templates/new/db/migrate/create_pages.rb +6 -6
  26. data/lib/daimon_skycrawlers/generator/templates/new/enqueue.rb +17 -8
  27. data/lib/daimon_skycrawlers/generator/templates/new/init.rb +20 -0
  28. data/lib/daimon_skycrawlers/generator/templates/new/processor.rb +2 -0
  29. data/lib/daimon_skycrawlers/logger.rb +32 -0
  30. data/lib/daimon_skycrawlers/processor/base.rb +19 -0
  31. data/lib/daimon_skycrawlers/processor/default.rb +12 -9
  32. data/lib/daimon_skycrawlers/processor/spider.rb +77 -0
  33. data/lib/daimon_skycrawlers/processor.rb +23 -9
  34. data/lib/daimon_skycrawlers/queue.rb +24 -0
  35. data/lib/daimon_skycrawlers/storage/base.rb +6 -0
  36. data/lib/daimon_skycrawlers/timer.rb +24 -0
  37. data/lib/daimon_skycrawlers/version.rb +1 -1
  38. data/lib/daimon_skycrawlers.rb +24 -4
  39. data/sample/spider/Gemfile +4 -0
  40. data/sample/spider/README.md +50 -0
  41. data/sample/spider/Rakefile +1 -0
  42. data/sample/spider/config/database.yml +26 -0
  43. data/sample/spider/crawler.rb +14 -0
  44. data/sample/spider/db/migrate/20160830155803_create_pages.rb +13 -0
  45. data/sample/spider/db/schema.rb +28 -0
  46. data/sample/spider/enqueue.rb +24 -0
  47. data/sample/spider/init.rb +22 -0
  48. data/sample/spider/processor.rb +34 -0
  49. metadata +47 -12
  50. data/lib/daimon_skycrawlers/configure_songkick_queue.rb +0 -12
  51. data/lib/daimon_skycrawlers/http_response_consumer.rb +0 -38
  52. data/lib/daimon_skycrawlers/parser/base.rb +0 -13
  53. data/lib/daimon_skycrawlers/parser/default.rb +0 -50
  54. data/lib/daimon_skycrawlers/parser.rb +0 -7
  55. data/lib/daimon_skycrawlers/url_consumer.rb +0 -32
@@ -15,20 +15,36 @@ TODO: Write description.
15
15
 
16
16
  1. Install dependencies
17
17
 
18
- $ bundle install
18
+ ```
19
+ $ bundle install
20
+ ```
19
21
 
20
22
  2. Create database
21
23
 
22
- $ bundle exec rake db:create
23
- $ bundle exec rake db:migrate
24
+ ```
25
+ $ bundle exec rake db:create
26
+ $ bundle exec rake db:migrate
27
+ ```
24
28
 
25
29
  3. Open new terminal and run crawler/processor
26
30
 
27
- $ bundle exec ruby crawler.rb # on new terminal
28
- $ bundle exec ruby processor.rb # on new terminal
31
+ ```
32
+ $ bundle exec ruby crawler.rb # on new terminal
33
+ $ bundle exec ruby processor.rb # on new terminal
34
+ ```
29
35
 
30
36
  4. Enqueue task
31
37
 
32
- $ bundle exec ruby enqueue.rb http://example.com/
38
+ ```
39
+ $ bundle exec ruby enqueue.rb http://example.com/
40
+ ```
33
41
 
34
42
  5. You'll see `It works with 'http://example.com'` on your terminal which runs your processor!
43
+
44
+ 6. You can re-enqueue task for processor
45
+
46
+ ```
47
+ $ bundle exec ruby enqueue.rb response http://example.com/
48
+ ```
49
+
50
+ Display `It works with 'http://example.com'` again on your terminal which runs your processor.
@@ -1,13 +1,13 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require "daimon_skycrawlers/crawler"
4
+ require "daimon_skycrawlers/crawler/default"
4
5
 
5
- base_url = 'http://example.com'
6
+ require_relative "./init"
6
7
 
7
- crawler = DaimonSkycrawlers::Crawler.new(base_url)
8
- crawler.parser.append_filter do |url|
9
- url.start_with?(base_url)
10
- end
8
+ base_url = "http://example.com"
9
+
10
+ crawler = DaimonSkycrawlers::Crawler::Default.new(base_url)
11
11
 
12
12
  DaimonSkycrawlers.register_crawler(crawler)
13
13
 
@@ -1,13 +1,13 @@
1
1
  class CreatePages < ActiveRecord::Migration
2
2
  def change
3
3
  create_table :pages do |t|
4
- t.string :url
5
- t.text :headers
6
- t.binary :body, limit: 10 * 1024 ** 2 # 10MiB
7
- t.datetime :last_modified_at
8
- t.string :etag
4
+ t.string :url
5
+ t.text :headers
6
+ t.binary :body, limit: 10 * 1024 ** 2 # 10MiB
7
+ t.datetime :last_modified_at
8
+ t.string :etag
9
9
 
10
- t.timestamps null: false
10
+ t.timestamps null: false
11
11
  end
12
12
  end
13
13
  end
@@ -1,15 +1,24 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ require "thor"
4
+
3
5
  require "daimon_skycrawlers/crawler"
6
+ require "daimon_skycrawlers/processor"
4
7
 
5
- USAGE = "Usage: #{$0} [URL]"
8
+ require_relative "./init"
6
9
 
7
- if ARGV.size < 1
8
- $stderr.puts "#{$0}: missing URL"
9
- $stderr.puts USAGE
10
- exit false
11
- end
10
+ class Enqueue < Thor
11
+ desc "url URL [key1:value1 key2:value2...]", "Enqueue URL for URL consumer"
12
+ def url(url, *rest)
13
+ message = rest.map {|arg| arg.split(":") }.to_h
14
+ DaimonSkycrawlers::Crawler.enqueue_url(url, message)
15
+ end
12
16
 
13
- url = ARGV[0]
17
+ desc "response URL [key1:value1 key2:value2...]", "Enqueue URL for HTTP response consumer"
18
+ def response(url, *rest)
19
+ message = rest.map {|arg| arg.split(":") }.to_h
20
+ DaimonSkycrawlers::Processor.enqueue_http_response(url, message)
21
+ end
22
+ end
14
23
 
15
- DaimonSkycrawlers::Crawler.enqueue_url(url)
24
+ Enqueue.start(ARGV)
@@ -0,0 +1,20 @@
1
+ require "daimon_skycrawlers"
2
+ require "daimon_skycrawlers/logger"
3
+ require "daimon_skycrawlers/queue"
4
+
5
+ DaimonSkycrawlers.configure do |config|
6
+ config.logger = DaimonSkycrawlers::Logger.default
7
+ config.crawler_interval = 1
8
+ end
9
+
10
+ DaimonSkycrawlers::Queue.configure do |config|
11
+ # queue configuration
12
+ config.logger = DaimonSkycrawlers.configuration.logger
13
+ config.host = "127.0.0.1"
14
+ config.port = 5672
15
+ # config.username = 'guest'
16
+ # config.password = 'guest'
17
+ config.vhost = "/"
18
+ config.max_reconnect_attempts = 10
19
+ config.network_recovery_interval = 1.0
20
+ end
@@ -2,6 +2,8 @@
2
2
 
3
3
  require "daimon_skycrawlers/processor"
4
4
 
5
+ require_relative "./init"
6
+
5
7
  DaimonSkycrawlers.register_processor do |data|
6
8
  p "It works with '#{data[:url]}'"
7
9
  end
@@ -0,0 +1,32 @@
1
+ require "delegate"
2
+ require "logger"
3
+
4
+ module DaimonSkycrawlers
5
+ class Logger < SimpleDelegator
6
+ class << self
7
+ def default
8
+ @default ||= DaimonSkycrawlers::Logger.new(STDOUT)
9
+ end
10
+ end
11
+
12
+ def initialize(logdev, shift_age = 0, shift_size = 1048576)
13
+ @log = ::Logger.new(logdev, shift_age, shift_size)
14
+ super(@log)
15
+ end
16
+ end
17
+
18
+ module LoggerMixin
19
+ def initialize
20
+ super
21
+ @log = DaimonSkycrawlers::Logger.default
22
+ end
23
+
24
+ class << self
25
+ def included(base)
26
+ base.module_eval do
27
+ attr_accessor :log
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,19 @@
1
+ require "daimon_skycrawlers/logger"
2
+ require "daimon_skycrawlers/config"
3
+
4
+ module DaimonSkycrawlers
5
+ module Processor
6
+ class Base
7
+ include DaimonSkycrawlers::LoggerMixin
8
+ include DaimonSkycrawlers::ConfigMixin
9
+
10
+ def call(message)
11
+ raise "Implement this method in subclass"
12
+ end
13
+
14
+ def storage
15
+ @storage ||= DaimonSkycrawlers::Storage::RDB.new
16
+ end
17
+ end
18
+ end
19
+ end
@@ -1,19 +1,22 @@
1
1
  require "daimon_skycrawlers/storage/rdb"
2
+ require "daimon_skycrawlers/processor/base"
2
3
 
3
4
  module DaimonSkycrawlers
4
- class Processor
5
- class Default
5
+ module Processor
6
+ class Default < Base
6
7
  def call(message)
8
+ return if message[:heartbeat]
7
9
  url = message[:url]
8
- storage = DaimonSkycrawlers::Storage::RDB.new
9
10
  page = storage.find(url)
10
11
  headers = JSON.parse(page.headers)
11
- puts "URL: #{page.url}"
12
- puts "Body: #{page.body.bytesize} bytes"
13
- puts "Headers:"
14
- headers.each do |key, value|
15
- puts " #{key}: #{value}"
16
- end
12
+ headers_string = headers.map {|key, value| " #{key}: #{value}" }.join("\n")
13
+ dumped_message = <<LOG
14
+ URL: #{page.url}
15
+ Body: #{page.body.bytesize} bytes
16
+ Headers:
17
+ #{headers_string}
18
+ LOG
19
+ log.info(dumped_message)
17
20
  end
18
21
  end
19
22
  end
@@ -0,0 +1,77 @@
1
+ require "nokogiri"
2
+ require "daimon_skycrawlers/crawler"
3
+
4
+ module DaimonSkycrawlers
5
+ module Processor
6
+ class Spider < Base
7
+ attr_accessor :enqueue
8
+
9
+ def initialize
10
+ super
11
+ @filters = []
12
+ @doc = nil
13
+ @links = nil
14
+ @enqueue = true
15
+ end
16
+
17
+ def append_filter(filter = nil, &block)
18
+ if block_given?
19
+ @filters << block
20
+ else
21
+ @filters << filter
22
+ end
23
+ end
24
+
25
+ #
26
+ # @param [Hash] message Must have key :url, :depth
27
+ #
28
+ def call(message)
29
+ key_url = message[:url]
30
+ depth = Integer(message[:depth] || 2)
31
+ return if message[:heartbeat]
32
+ return if depth <= 1
33
+ page = storage.find(key_url)
34
+ @doc = Nokogiri::HTML(page.body)
35
+ new_message = {
36
+ depth: depth - 1,
37
+ }
38
+ links.each do |url|
39
+ enqueue_url(url, new_message)
40
+ end
41
+ end
42
+
43
+ private
44
+
45
+ def links
46
+ return @links if @links
47
+ @links = retrieve_links
48
+ @links
49
+ end
50
+
51
+ def retrieve_links
52
+ urls = @doc.search("a").map do |element|
53
+ element["href"]
54
+ end
55
+ urls.uniq!
56
+ apply_filters(urls) || []
57
+ end
58
+
59
+ def apply_filters(urls)
60
+ return if urls.nil?
61
+ return if urls.empty?
62
+ log.debug("Candidate URLs: #{urls.size}")
63
+ urls = urls.select do |url|
64
+ @filters.inject(true) {|memo, filter| memo & filter.call(url) }
65
+ end
66
+ log.debug("Filtered URLs: #{urls.size}")
67
+ urls
68
+ end
69
+
70
+ def enqueue_url(url, new_message)
71
+ return unless @enqueue
72
+ log.debug("Enqueue: URL:#{url}, message: #{new_message}")
73
+ DaimonSkycrawlers::Crawler.enqueue_url(url, new_message)
74
+ end
75
+ end
76
+ end
77
+ end
@@ -1,17 +1,31 @@
1
- require 'daimon_skycrawlers'
2
- require 'daimon_skycrawlers/configure_songkick_queue'
3
- require 'daimon_skycrawlers/url_consumer'
4
- require 'daimon_skycrawlers/http_response_consumer'
1
+ require "daimon_skycrawlers"
2
+ require "daimon_skycrawlers/queue"
3
+ require "daimon_skycrawlers/timer"
4
+ require "daimon_skycrawlers/consumer/http_response"
5
5
 
6
6
  module DaimonSkycrawlers
7
- class Processor
7
+ module Processor
8
8
  class << self
9
- def run(process_name: 'daimon-skycrawler:http-response')
10
- SongkickQueue::Worker.new(process_name, [HTTPResponseConsumer]).run
9
+ def run(process_name: default_process_name)
10
+ DaimonSkycrawlers::Timer.setup_shutdown_timer(config.queue_name_prefix, interval: config.shutdown_interval)
11
+ SongkickQueue::Worker.new(process_name, [DaimonSkycrawlers::Consumer::HTTPResponse]).run
11
12
  end
12
13
 
13
- def enqueue_http_response(url)
14
- SongkickQueue.publish('daimon-skycrawler.http-response', url: url)
14
+ def enqueue_http_response(url, message = {})
15
+ message[:url] = url
16
+ SongkickQueue.publish(queue_name, message)
17
+ end
18
+
19
+ def config
20
+ DaimonSkycrawlers.configuration
21
+ end
22
+
23
+ def queue_name
24
+ "#{config.queue_name_prefix}.http-response"
25
+ end
26
+
27
+ def default_process_name
28
+ "#{config.queue_name_prefix}:http-response"
15
29
  end
16
30
  end
17
31
  end
@@ -0,0 +1,24 @@
1
+ require "songkick_queue"
2
+
3
+ module DaimonSkycrawlers
4
+ class Queue
5
+ class << self
6
+ def configuration
7
+ @configuration ||= SongkickQueue.configure do |config|
8
+ config.logger = Logger.new(STDOUT)
9
+ config.host = "127.0.0.1"
10
+ config.port = 5672
11
+ # config.username = 'guest'
12
+ # config.password = 'guest'
13
+ config.vhost = "/"
14
+ config.max_reconnect_attempts = 10
15
+ config.network_recovery_interval = 1.0
16
+ end
17
+ end
18
+
19
+ def configure
20
+ yield configuration
21
+ end
22
+ end
23
+ end
24
+ end
@@ -1,6 +1,12 @@
1
+ require "daimon_skycrawlers/logger"
2
+ require "daimon_skycrawlers/config"
3
+
1
4
  module DaimonSkycrawlers
2
5
  module Storage
3
6
  class Base
7
+ include DaimonSkycrawlers::LoggerMixin
8
+ include DaimonSkycrawlers::ConfigMixin
9
+
4
10
  def save(url, headers, body)
5
11
  raise "Implement this in subclass"
6
12
  end
@@ -0,0 +1,24 @@
1
+ require "timers"
2
+ require "daimon_skycrawlers"
3
+
4
+ module DaimonSkycrawlers
5
+ module Timer
6
+ module_function
7
+
8
+ def setup_shutdown_timer(queue_name_prefix, interval: 10)
9
+ timers = Timers::Group.new
10
+ timer = timers.after(interval) do
11
+ Process.kill(:INT, 0)
12
+ end
13
+ Thread.new(timers) do |t|
14
+ loop { t.wait }
15
+ end
16
+ ActiveSupport::Notifications.subscribe("consume_message.songkick_queue") do |*args|
17
+ event = ActiveSupport::Notifications::Event.new(*args)
18
+ queue_name = event.payload[:queue_name]
19
+ DaimonSkycrawlers.configuration.logger.debug("Reset timer: consume message #{queue_name}")
20
+ timer.reset
21
+ end
22
+ end
23
+ end
24
+ end
@@ -1,3 +1,3 @@
1
1
  module DaimonSkycrawlers
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -1,15 +1,35 @@
1
- require 'bundler/setup'
1
+ require "bundler/setup"
2
2
 
3
- require 'daimon_skycrawlers/version'
3
+ require "daimon_skycrawlers/version"
4
+ require "daimon_skycrawlers/logger"
4
5
 
5
6
  module DaimonSkycrawlers
7
+ Configuration = Struct.new(
8
+ :logger,
9
+ :queue_name_prefix,
10
+ :crawler_interval,
11
+ :shutdown_interval
12
+ )
6
13
  class << self
7
14
  def register_processor(processor = nil, &block)
8
- HTTPResponseConsumer.register(processor, &block)
15
+ DaimonSkycrawlers::Consumer::HTTPResponse.register(processor, &block)
9
16
  end
10
17
 
11
18
  def register_crawler(crawler)
12
- URLConsumer.register(crawler)
19
+ DaimonSkycrawlers::Consumer::URL.register(crawler)
20
+ end
21
+
22
+ def configuration
23
+ @configuration ||= DaimonSkycrawlers::Configuration.new.tap do |config|
24
+ config.logger = DaimonSkycrawlers::Logger.default
25
+ config.queue_name_prefix = "daimon-skycrawlers"
26
+ config.crawler_interval = 1
27
+ config.shutdown_interval = 10
28
+ end
29
+ end
30
+
31
+ def configure
32
+ yield configuration
13
33
  end
14
34
  end
15
35
  end
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ gem "rake"
4
+ gem "daimon_skycrawlers", path: "../../"
@@ -0,0 +1,50 @@
1
+ # spider
2
+
3
+ TODO: Write description.
4
+
5
+ ## Requirements
6
+
7
+ - Ruby
8
+ - RabbitMQ
9
+ - RDB
10
+ - PostgreSQL (default)
11
+ - MySQL
12
+ - SQLite3
13
+
14
+ ## Usage
15
+
16
+ 1. Install dependencies
17
+
18
+ ```
19
+ $ bundle install
20
+ ```
21
+
22
+ 2. Create database
23
+
24
+ ```
25
+ $ bundle exec rake db:create
26
+ $ bundle exec rake db:migrate
27
+ ```
28
+
29
+ 3. Open new terminal and run crawler/processor
30
+
31
+ ```
32
+ $ bundle exec ruby crawler.rb # on new terminal
33
+ $ bundle exec ruby processor.rb # on new terminal
34
+ ```
35
+
36
+ 4. Enqueue task
37
+
38
+ ```
39
+ $ bundle exec ruby enqueue.rb http://example.com/
40
+ ```
41
+
42
+ 5. You'll see `It works with 'http://example.com'` on your terminal which runs your processor!
43
+
44
+ 6. You can re-enqueue task for processor
45
+
46
+ ```
47
+ $ bundle exec ruby enqueue.rb response http://example.com/
48
+ ```
49
+
50
+ Display `It works with 'http://example.com'` again on your terminal which runs your processor.
@@ -0,0 +1 @@
1
+ require "daimon_skycrawlers/tasks"
@@ -0,0 +1,26 @@
1
+ # PostgreSQL. Versions 8.2 and up are supported.
2
+ #
3
+ default: &default
4
+ adapter: postgresql
5
+ encoding: unicode
6
+ pool: 5
7
+
8
+ development:
9
+ <<: *default
10
+ database: spider_development
11
+ #username: spider
12
+ #password:
13
+ #host: localhost
14
+ #port: 5432
15
+ #schema_search_path: myapp,sharedapp,public
16
+ #min_messages: notice
17
+
18
+ test:
19
+ <<: *default
20
+ database: spider_test
21
+
22
+ production:
23
+ <<: *default
24
+ database: spider_production
25
+ username: spider
26
+ password: <%= ENV['SPIDER_PASSWORD'] %>
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "daimon_skycrawlers/crawler"
4
+ require "daimon_skycrawlers/crawler/default"
5
+
6
+ require_relative "./init"
7
+
8
+ base_url = "http://www.clear-code.com/blog/"
9
+
10
+ crawler = DaimonSkycrawlers::Crawler::Default.new(base_url)
11
+
12
+ DaimonSkycrawlers.register_crawler(crawler)
13
+
14
+ DaimonSkycrawlers::Crawler.run
@@ -0,0 +1,13 @@
1
+ class CreatePages < ActiveRecord::Migration
2
+ def change
3
+ create_table :pages do |t|
4
+ t.string :url
5
+ t.text :headers
6
+ t.binary :body, limit: 10 * 1024 ** 2 # 10MiB
7
+ t.datetime :last_modified_at
8
+ t.string :etag
9
+
10
+ t.timestamps null: false
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,28 @@
1
+ # This file is auto-generated from the current state of the database. Instead
2
+ # of editing this file, please use the migrations feature of Active Record to
3
+ # incrementally modify your database, and then regenerate this schema definition.
4
+ #
5
+ # Note that this schema.rb definition is the authoritative source for your
6
+ # database schema. If you need to create the application database on another
7
+ # system, you should be using db:schema:load, not running all the migrations
8
+ # from scratch. The latter is a flawed and unsustainable approach (the more migrations
9
+ # you'll amass, the slower it'll run and the greater likelihood for issues).
10
+ #
11
+ # It's strongly recommended that you check this file into your version control system.
12
+
13
+ ActiveRecord::Schema.define(version: 20160830155803) do
14
+
15
+ # These are extensions that must be enabled in order to support this database
16
+ enable_extension "plpgsql"
17
+
18
+ create_table "pages", force: :cascade do |t|
19
+ t.string "url"
20
+ t.text "headers"
21
+ t.binary "body"
22
+ t.datetime "last_modified_at"
23
+ t.string "etag"
24
+ t.datetime "created_at", null: false
25
+ t.datetime "updated_at", null: false
26
+ end
27
+
28
+ end
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "thor"
4
+
5
+ require "daimon_skycrawlers/crawler"
6
+ require "daimon_skycrawlers/processor"
7
+
8
+ require_relative "./init"
9
+
10
+ class Enqueue < Thor
11
+ desc "url URL [key1:value1 key2:value2...]", "Enqueue URL for URL consumer"
12
+ def url(url, *rest)
13
+ message = rest.map {|arg| arg.split(":") }.to_h
14
+ DaimonSkycrawlers::Crawler.enqueue_url(url, message)
15
+ end
16
+
17
+ desc "response URL [key1:value1 key2:value2...]", "Enqueue URL for HTTP response consumer"
18
+ def response(url, *rest)
19
+ message = rest.map {|arg| arg.split(":") }.to_h
20
+ DaimonSkycrawlers::Processor.enqueue_http_response(url, message)
21
+ end
22
+ end
23
+
24
+ Enqueue.start(ARGV)
@@ -0,0 +1,22 @@
1
+ require "daimon_skycrawlers"
2
+ require "daimon_skycrawlers/logger"
3
+ require "daimon_skycrawlers/queue"
4
+
5
+ DaimonSkycrawlers.configure do |config|
6
+ config.logger = DaimonSkycrawlers::Logger.default
7
+ config.logger.level = :debug
8
+ config.crawler_interval = 1
9
+ config.shutdown_interval = 30
10
+ end
11
+
12
+ DaimonSkycrawlers::Queue.configure do |config|
13
+ # queue configuration
14
+ config.logger = DaimonSkycrawlers.configuration.logger
15
+ config.host = "127.0.0.1"
16
+ config.port = 5672
17
+ # config.username = 'guest'
18
+ # config.password = 'guest'
19
+ config.vhost = "/"
20
+ config.max_reconnect_attempts = 10
21
+ config.network_recovery_interval = 1.0
22
+ end