daimon_skycrawlers 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +2 -2
  3. data/Gemfile +1 -1
  4. data/README.md +30 -12
  5. data/Rakefile +3 -8
  6. data/bin/daimon-skycrawlers +3 -3
  7. data/daimon_skycrawlers.gemspec +4 -3
  8. data/lib/daimon_skycrawlers/cli.rb +3 -3
  9. data/lib/daimon_skycrawlers/config.rb +8 -0
  10. data/lib/daimon_skycrawlers/consumer/base.rb +16 -0
  11. data/lib/daimon_skycrawlers/consumer/http_response.rb +47 -0
  12. data/lib/daimon_skycrawlers/consumer/url.rb +44 -0
  13. data/lib/daimon_skycrawlers/consumer.rb +4 -0
  14. data/lib/daimon_skycrawlers/crawler/base.rb +75 -0
  15. data/lib/daimon_skycrawlers/crawler/default.rb +33 -0
  16. data/lib/daimon_skycrawlers/crawler.rb +18 -76
  17. data/lib/daimon_skycrawlers/filter/base.rb +24 -0
  18. data/lib/daimon_skycrawlers/filter/duplicate_checker.rb +23 -0
  19. data/lib/daimon_skycrawlers/filter/update_checker.rb +31 -0
  20. data/lib/daimon_skycrawlers/filter.rb +4 -0
  21. data/lib/daimon_skycrawlers/generator/new.rb +3 -2
  22. data/lib/daimon_skycrawlers/generator/templates/new/Gemfile +3 -3
  23. data/lib/daimon_skycrawlers/generator/templates/new/README.md.erb +22 -6
  24. data/lib/daimon_skycrawlers/generator/templates/new/crawler.rb +5 -5
  25. data/lib/daimon_skycrawlers/generator/templates/new/db/migrate/create_pages.rb +6 -6
  26. data/lib/daimon_skycrawlers/generator/templates/new/enqueue.rb +17 -8
  27. data/lib/daimon_skycrawlers/generator/templates/new/init.rb +20 -0
  28. data/lib/daimon_skycrawlers/generator/templates/new/processor.rb +2 -0
  29. data/lib/daimon_skycrawlers/logger.rb +32 -0
  30. data/lib/daimon_skycrawlers/processor/base.rb +19 -0
  31. data/lib/daimon_skycrawlers/processor/default.rb +12 -9
  32. data/lib/daimon_skycrawlers/processor/spider.rb +77 -0
  33. data/lib/daimon_skycrawlers/processor.rb +23 -9
  34. data/lib/daimon_skycrawlers/queue.rb +24 -0
  35. data/lib/daimon_skycrawlers/storage/base.rb +6 -0
  36. data/lib/daimon_skycrawlers/timer.rb +24 -0
  37. data/lib/daimon_skycrawlers/version.rb +1 -1
  38. data/lib/daimon_skycrawlers.rb +24 -4
  39. data/sample/spider/Gemfile +4 -0
  40. data/sample/spider/README.md +50 -0
  41. data/sample/spider/Rakefile +1 -0
  42. data/sample/spider/config/database.yml +26 -0
  43. data/sample/spider/crawler.rb +14 -0
  44. data/sample/spider/db/migrate/20160830155803_create_pages.rb +13 -0
  45. data/sample/spider/db/schema.rb +28 -0
  46. data/sample/spider/enqueue.rb +24 -0
  47. data/sample/spider/init.rb +22 -0
  48. data/sample/spider/processor.rb +34 -0
  49. metadata +47 -12
  50. data/lib/daimon_skycrawlers/configure_songkick_queue.rb +0 -12
  51. data/lib/daimon_skycrawlers/http_response_consumer.rb +0 -38
  52. data/lib/daimon_skycrawlers/parser/base.rb +0 -13
  53. data/lib/daimon_skycrawlers/parser/default.rb +0 -50
  54. data/lib/daimon_skycrawlers/parser.rb +0 -7
  55. data/lib/daimon_skycrawlers/url_consumer.rb +0 -32
@@ -15,20 +15,36 @@ TODO: Write description.
15
15
 
16
16
  1. Install dependencies
17
17
 
18
- $ bundle install
18
+ ```
19
+ $ bundle install
20
+ ```
19
21
 
20
22
  2. Create database
21
23
 
22
- $ bundle exec rake db:create
23
- $ bundle exec rake db:migrate
24
+ ```
25
+ $ bundle exec rake db:create
26
+ $ bundle exec rake db:migrate
27
+ ```
24
28
 
25
29
  3. Open new terminal and run crawler/processor
26
30
 
27
- $ bundle exec ruby crawler.rb # on new terminal
28
- $ bundle exec ruby processor.rb # on new terminal
31
+ ```
32
+ $ bundle exec ruby crawler.rb # on new terminal
33
+ $ bundle exec ruby processor.rb # on new terminal
34
+ ```
29
35
 
30
36
  4. Enqueue task
31
37
 
32
- $ bundle exec ruby enqueue.rb http://example.com/
38
+ ```
39
+ $ bundle exec ruby enqueue.rb http://example.com/
40
+ ```
33
41
 
34
42
  5. You'll see `It works with 'http://example.com'` on your terminal which runs your processor!
43
+
44
+ 6. You can re-enqueue task for processor
45
+
46
+ ```
47
+ $ bundle exec ruby enqueue.rb response http://example.com/
48
+ ```
49
+
50
+ Display `It works with 'http://example.com'` again on your terminal which runs your processor.
@@ -1,13 +1,13 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require "daimon_skycrawlers/crawler"
4
+ require "daimon_skycrawlers/crawler/default"
4
5
 
5
- base_url = 'http://example.com'
6
+ require_relative "./init"
6
7
 
7
- crawler = DaimonSkycrawlers::Crawler.new(base_url)
8
- crawler.parser.append_filter do |url|
9
- url.start_with?(base_url)
10
- end
8
+ base_url = "http://example.com"
9
+
10
+ crawler = DaimonSkycrawlers::Crawler::Default.new(base_url)
11
11
 
12
12
  DaimonSkycrawlers.register_crawler(crawler)
13
13
 
@@ -1,13 +1,13 @@
1
1
  class CreatePages < ActiveRecord::Migration
2
2
  def change
3
3
  create_table :pages do |t|
4
- t.string :url
5
- t.text :headers
6
- t.binary :body, limit: 10 * 1024 ** 2 # 10MiB
7
- t.datetime :last_modified_at
8
- t.string :etag
4
+ t.string :url
5
+ t.text :headers
6
+ t.binary :body, limit: 10 * 1024 ** 2 # 10MiB
7
+ t.datetime :last_modified_at
8
+ t.string :etag
9
9
 
10
- t.timestamps null: false
10
+ t.timestamps null: false
11
11
  end
12
12
  end
13
13
  end
@@ -1,15 +1,24 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ require "thor"
4
+
3
5
  require "daimon_skycrawlers/crawler"
6
+ require "daimon_skycrawlers/processor"
4
7
 
5
- USAGE = "Usage: #{$0} [URL]"
8
+ require_relative "./init"
6
9
 
7
- if ARGV.size < 1
8
- $stderr.puts "#{$0}: missing URL"
9
- $stderr.puts USAGE
10
- exit false
11
- end
10
+ class Enqueue < Thor
11
+ desc "url URL [key1:value1 key2:value2...]", "Enqueue URL for URL consumer"
12
+ def url(url, *rest)
13
+ message = rest.map {|arg| arg.split(":") }.to_h
14
+ DaimonSkycrawlers::Crawler.enqueue_url(url, message)
15
+ end
12
16
 
13
- url = ARGV[0]
17
+ desc "response URL [key1:value1 key2:value2...]", "Enqueue URL for HTTP response consumer"
18
+ def response(url, *rest)
19
+ message = rest.map {|arg| arg.split(":") }.to_h
20
+ DaimonSkycrawlers::Processor.enqueue_http_response(url, message)
21
+ end
22
+ end
14
23
 
15
- DaimonSkycrawlers::Crawler.enqueue_url(url)
24
+ Enqueue.start(ARGV)
@@ -0,0 +1,20 @@
1
+ require "daimon_skycrawlers"
2
+ require "daimon_skycrawlers/logger"
3
+ require "daimon_skycrawlers/queue"
4
+
5
+ DaimonSkycrawlers.configure do |config|
6
+ config.logger = DaimonSkycrawlers::Logger.default
7
+ config.crawler_interval = 1
8
+ end
9
+
10
+ DaimonSkycrawlers::Queue.configure do |config|
11
+ # queue configuration
12
+ config.logger = DaimonSkycrawlers.configuration.logger
13
+ config.host = "127.0.0.1"
14
+ config.port = 5672
15
+ # config.username = 'guest'
16
+ # config.password = 'guest'
17
+ config.vhost = "/"
18
+ config.max_reconnect_attempts = 10
19
+ config.network_recovery_interval = 1.0
20
+ end
@@ -2,6 +2,8 @@
2
2
 
3
3
  require "daimon_skycrawlers/processor"
4
4
 
5
+ require_relative "./init"
6
+
5
7
  DaimonSkycrawlers.register_processor do |data|
6
8
  p "It works with '#{data[:url]}'"
7
9
  end
@@ -0,0 +1,32 @@
1
+ require "delegate"
2
+ require "logger"
3
+
4
+ module DaimonSkycrawlers
5
+ class Logger < SimpleDelegator
6
+ class << self
7
+ def default
8
+ @default ||= DaimonSkycrawlers::Logger.new(STDOUT)
9
+ end
10
+ end
11
+
12
+ def initialize(logdev, shift_age = 0, shift_size = 1048576)
13
+ @log = ::Logger.new(logdev, shift_age, shift_size)
14
+ super(@log)
15
+ end
16
+ end
17
+
18
+ module LoggerMixin
19
+ def initialize
20
+ super
21
+ @log = DaimonSkycrawlers::Logger.default
22
+ end
23
+
24
+ class << self
25
+ def included(base)
26
+ base.module_eval do
27
+ attr_accessor :log
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,19 @@
1
+ require "daimon_skycrawlers/logger"
2
+ require "daimon_skycrawlers/config"
3
+
4
+ module DaimonSkycrawlers
5
+ module Processor
6
+ class Base
7
+ include DaimonSkycrawlers::LoggerMixin
8
+ include DaimonSkycrawlers::ConfigMixin
9
+
10
+ def call(message)
11
+ raise "Implement this method in subclass"
12
+ end
13
+
14
+ def storage
15
+ @storage ||= DaimonSkycrawlers::Storage::RDB.new
16
+ end
17
+ end
18
+ end
19
+ end
@@ -1,19 +1,22 @@
1
1
  require "daimon_skycrawlers/storage/rdb"
2
+ require "daimon_skycrawlers/processor/base"
2
3
 
3
4
  module DaimonSkycrawlers
4
- class Processor
5
- class Default
5
+ module Processor
6
+ class Default < Base
6
7
  def call(message)
8
+ return if message[:heartbeat]
7
9
  url = message[:url]
8
- storage = DaimonSkycrawlers::Storage::RDB.new
9
10
  page = storage.find(url)
10
11
  headers = JSON.parse(page.headers)
11
- puts "URL: #{page.url}"
12
- puts "Body: #{page.body.bytesize} bytes"
13
- puts "Headers:"
14
- headers.each do |key, value|
15
- puts " #{key}: #{value}"
16
- end
12
+ headers_string = headers.map {|key, value| " #{key}: #{value}" }.join("\n")
13
+ dumped_message = <<LOG
14
+ URL: #{page.url}
15
+ Body: #{page.body.bytesize} bytes
16
+ Headers:
17
+ #{headers_string}
18
+ LOG
19
+ log.info(dumped_message)
17
20
  end
18
21
  end
19
22
  end
@@ -0,0 +1,77 @@
1
+ require "nokogiri"
2
+ require "daimon_skycrawlers/crawler"
3
+
4
+ module DaimonSkycrawlers
5
+ module Processor
6
+ class Spider < Base
7
+ attr_accessor :enqueue
8
+
9
+ def initialize
10
+ super
11
+ @filters = []
12
+ @doc = nil
13
+ @links = nil
14
+ @enqueue = true
15
+ end
16
+
17
+ def append_filter(filter = nil, &block)
18
+ if block_given?
19
+ @filters << block
20
+ else
21
+ @filters << filter
22
+ end
23
+ end
24
+
25
+ #
26
+ # @param [Hash] message Must have key :url, :depth
27
+ #
28
+ def call(message)
29
+ key_url = message[:url]
30
+ depth = Integer(message[:depth] || 2)
31
+ return if message[:heartbeat]
32
+ return if depth <= 1
33
+ page = storage.find(key_url)
34
+ @doc = Nokogiri::HTML(page.body)
35
+ new_message = {
36
+ depth: depth - 1,
37
+ }
38
+ links.each do |url|
39
+ enqueue_url(url, new_message)
40
+ end
41
+ end
42
+
43
+ private
44
+
45
+ def links
46
+ return @links if @links
47
+ @links = retrieve_links
48
+ @links
49
+ end
50
+
51
+ def retrieve_links
52
+ urls = @doc.search("a").map do |element|
53
+ element["href"]
54
+ end
55
+ urls.uniq!
56
+ apply_filters(urls) || []
57
+ end
58
+
59
+ def apply_filters(urls)
60
+ return if urls.nil?
61
+ return if urls.empty?
62
+ log.debug("Candidate URLs: #{urls.size}")
63
+ urls = urls.select do |url|
64
+ @filters.inject(true) {|memo, filter| memo & filter.call(url) }
65
+ end
66
+ log.debug("Filtered URLs: #{urls.size}")
67
+ urls
68
+ end
69
+
70
+ def enqueue_url(url, new_message)
71
+ return unless @enqueue
72
+ log.debug("Enqueue: URL:#{url}, message: #{new_message}")
73
+ DaimonSkycrawlers::Crawler.enqueue_url(url, new_message)
74
+ end
75
+ end
76
+ end
77
+ end
@@ -1,17 +1,31 @@
1
- require 'daimon_skycrawlers'
2
- require 'daimon_skycrawlers/configure_songkick_queue'
3
- require 'daimon_skycrawlers/url_consumer'
4
- require 'daimon_skycrawlers/http_response_consumer'
1
+ require "daimon_skycrawlers"
2
+ require "daimon_skycrawlers/queue"
3
+ require "daimon_skycrawlers/timer"
4
+ require "daimon_skycrawlers/consumer/http_response"
5
5
 
6
6
  module DaimonSkycrawlers
7
- class Processor
7
+ module Processor
8
8
  class << self
9
- def run(process_name: 'daimon-skycrawler:http-response')
10
- SongkickQueue::Worker.new(process_name, [HTTPResponseConsumer]).run
9
+ def run(process_name: default_process_name)
10
+ DaimonSkycrawlers::Timer.setup_shutdown_timer(config.queue_name_prefix, interval: config.shutdown_interval)
11
+ SongkickQueue::Worker.new(process_name, [DaimonSkycrawlers::Consumer::HTTPResponse]).run
11
12
  end
12
13
 
13
- def enqueue_http_response(url)
14
- SongkickQueue.publish('daimon-skycrawler.http-response', url: url)
14
+ def enqueue_http_response(url, message = {})
15
+ message[:url] = url
16
+ SongkickQueue.publish(queue_name, message)
17
+ end
18
+
19
+ def config
20
+ DaimonSkycrawlers.configuration
21
+ end
22
+
23
+ def queue_name
24
+ "#{config.queue_name_prefix}.http-response"
25
+ end
26
+
27
+ def default_process_name
28
+ "#{config.queue_name_prefix}:http-response"
15
29
  end
16
30
  end
17
31
  end
@@ -0,0 +1,24 @@
1
+ require "songkick_queue"
2
+
3
+ module DaimonSkycrawlers
4
+ class Queue
5
+ class << self
6
+ def configuration
7
+ @configuration ||= SongkickQueue.configure do |config|
8
+ config.logger = Logger.new(STDOUT)
9
+ config.host = "127.0.0.1"
10
+ config.port = 5672
11
+ # config.username = 'guest'
12
+ # config.password = 'guest'
13
+ config.vhost = "/"
14
+ config.max_reconnect_attempts = 10
15
+ config.network_recovery_interval = 1.0
16
+ end
17
+ end
18
+
19
+ def configure
20
+ yield configuration
21
+ end
22
+ end
23
+ end
24
+ end
@@ -1,6 +1,12 @@
1
+ require "daimon_skycrawlers/logger"
2
+ require "daimon_skycrawlers/config"
3
+
1
4
  module DaimonSkycrawlers
2
5
  module Storage
3
6
  class Base
7
+ include DaimonSkycrawlers::LoggerMixin
8
+ include DaimonSkycrawlers::ConfigMixin
9
+
4
10
  def save(url, headers, body)
5
11
  raise "Implement this in subclass"
6
12
  end
@@ -0,0 +1,24 @@
1
+ require "timers"
2
+ require "daimon_skycrawlers"
3
+
4
+ module DaimonSkycrawlers
5
+ module Timer
6
+ module_function
7
+
8
+ def setup_shutdown_timer(queue_name_prefix, interval: 10)
9
+ timers = Timers::Group.new
10
+ timer = timers.after(interval) do
11
+ Process.kill(:INT, 0)
12
+ end
13
+ Thread.new(timers) do |t|
14
+ loop { t.wait }
15
+ end
16
+ ActiveSupport::Notifications.subscribe("consume_message.songkick_queue") do |*args|
17
+ event = ActiveSupport::Notifications::Event.new(*args)
18
+ queue_name = event.payload[:queue_name]
19
+ DaimonSkycrawlers.configuration.logger.debug("Reset timer: consume message #{queue_name}")
20
+ timer.reset
21
+ end
22
+ end
23
+ end
24
+ end
@@ -1,3 +1,3 @@
1
1
  module DaimonSkycrawlers
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -1,15 +1,35 @@
1
- require 'bundler/setup'
1
+ require "bundler/setup"
2
2
 
3
- require 'daimon_skycrawlers/version'
3
+ require "daimon_skycrawlers/version"
4
+ require "daimon_skycrawlers/logger"
4
5
 
5
6
  module DaimonSkycrawlers
7
+ Configuration = Struct.new(
8
+ :logger,
9
+ :queue_name_prefix,
10
+ :crawler_interval,
11
+ :shutdown_interval
12
+ )
6
13
  class << self
7
14
  def register_processor(processor = nil, &block)
8
- HTTPResponseConsumer.register(processor, &block)
15
+ DaimonSkycrawlers::Consumer::HTTPResponse.register(processor, &block)
9
16
  end
10
17
 
11
18
  def register_crawler(crawler)
12
- URLConsumer.register(crawler)
19
+ DaimonSkycrawlers::Consumer::URL.register(crawler)
20
+ end
21
+
22
+ def configuration
23
+ @configuration ||= DaimonSkycrawlers::Configuration.new.tap do |config|
24
+ config.logger = DaimonSkycrawlers::Logger.default
25
+ config.queue_name_prefix = "daimon-skycrawlers"
26
+ config.crawler_interval = 1
27
+ config.shutdown_interval = 10
28
+ end
29
+ end
30
+
31
+ def configure
32
+ yield configuration
13
33
  end
14
34
  end
15
35
  end
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ gem "rake"
4
+ gem "daimon_skycrawlers", path: "../../"
@@ -0,0 +1,50 @@
1
+ # spider
2
+
3
+ TODO: Write description.
4
+
5
+ ## Requirements
6
+
7
+ - Ruby
8
+ - RabbitMQ
9
+ - RDB
10
+ - PostgreSQL (default)
11
+ - MySQL
12
+ - SQLite3
13
+
14
+ ## Usage
15
+
16
+ 1. Install dependencies
17
+
18
+ ```
19
+ $ bundle install
20
+ ```
21
+
22
+ 2. Create database
23
+
24
+ ```
25
+ $ bundle exec rake db:create
26
+ $ bundle exec rake db:migrate
27
+ ```
28
+
29
+ 3. Open new terminal and run crawler/processor
30
+
31
+ ```
32
+ $ bundle exec ruby crawler.rb # on new terminal
33
+ $ bundle exec ruby processor.rb # on new terminal
34
+ ```
35
+
36
+ 4. Enqueue task
37
+
38
+ ```
39
+ $ bundle exec ruby enqueue.rb http://example.com/
40
+ ```
41
+
42
+ 5. You'll see `It works with 'http://example.com'` on your terminal which runs your processor!
43
+
44
+ 6. You can re-enqueue task for processor
45
+
46
+ ```
47
+ $ bundle exec ruby enqueue.rb response http://example.com/
48
+ ```
49
+
50
+ Display `It works with 'http://example.com'` again on your terminal which runs your processor.
@@ -0,0 +1 @@
1
+ require "daimon_skycrawlers/tasks"
@@ -0,0 +1,26 @@
1
+ # PostgreSQL. Versions 8.2 and up are supported.
2
+ #
3
+ default: &default
4
+ adapter: postgresql
5
+ encoding: unicode
6
+ pool: 5
7
+
8
+ development:
9
+ <<: *default
10
+ database: spider_development
11
+ #username: spider
12
+ #password:
13
+ #host: localhost
14
+ #port: 5432
15
+ #schema_search_path: myapp,sharedapp,public
16
+ #min_messages: notice
17
+
18
+ test:
19
+ <<: *default
20
+ database: spider_test
21
+
22
+ production:
23
+ <<: *default
24
+ database: spider_production
25
+ username: spider
26
+ password: <%= ENV['SPIDER_PASSWORD'] %>
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "daimon_skycrawlers/crawler"
4
+ require "daimon_skycrawlers/crawler/default"
5
+
6
+ require_relative "./init"
7
+
8
+ base_url = "http://www.clear-code.com/blog/"
9
+
10
+ crawler = DaimonSkycrawlers::Crawler::Default.new(base_url)
11
+
12
+ DaimonSkycrawlers.register_crawler(crawler)
13
+
14
+ DaimonSkycrawlers::Crawler.run
@@ -0,0 +1,13 @@
1
+ class CreatePages < ActiveRecord::Migration
2
+ def change
3
+ create_table :pages do |t|
4
+ t.string :url
5
+ t.text :headers
6
+ t.binary :body, limit: 10 * 1024 ** 2 # 10MiB
7
+ t.datetime :last_modified_at
8
+ t.string :etag
9
+
10
+ t.timestamps null: false
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,28 @@
1
+ # This file is auto-generated from the current state of the database. Instead
2
+ # of editing this file, please use the migrations feature of Active Record to
3
+ # incrementally modify your database, and then regenerate this schema definition.
4
+ #
5
+ # Note that this schema.rb definition is the authoritative source for your
6
+ # database schema. If you need to create the application database on another
7
+ # system, you should be using db:schema:load, not running all the migrations
8
+ # from scratch. The latter is a flawed and unsustainable approach (the more migrations
9
+ # you'll amass, the slower it'll run and the greater likelihood for issues).
10
+ #
11
+ # It's strongly recommended that you check this file into your version control system.
12
+
13
+ ActiveRecord::Schema.define(version: 20160830155803) do
14
+
15
+ # These are extensions that must be enabled in order to support this database
16
+ enable_extension "plpgsql"
17
+
18
+ create_table "pages", force: :cascade do |t|
19
+ t.string "url"
20
+ t.text "headers"
21
+ t.binary "body"
22
+ t.datetime "last_modified_at"
23
+ t.string "etag"
24
+ t.datetime "created_at", null: false
25
+ t.datetime "updated_at", null: false
26
+ end
27
+
28
+ end
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "thor"
4
+
5
+ require "daimon_skycrawlers/crawler"
6
+ require "daimon_skycrawlers/processor"
7
+
8
+ require_relative "./init"
9
+
10
+ class Enqueue < Thor
11
+ desc "url URL [key1:value1 key2:value2...]", "Enqueue URL for URL consumer"
12
+ def url(url, *rest)
13
+ message = rest.map {|arg| arg.split(":") }.to_h
14
+ DaimonSkycrawlers::Crawler.enqueue_url(url, message)
15
+ end
16
+
17
+ desc "response URL [key1:value1 key2:value2...]", "Enqueue URL for HTTP response consumer"
18
+ def response(url, *rest)
19
+ message = rest.map {|arg| arg.split(":") }.to_h
20
+ DaimonSkycrawlers::Processor.enqueue_http_response(url, message)
21
+ end
22
+ end
23
+
24
+ Enqueue.start(ARGV)
@@ -0,0 +1,22 @@
1
+ require "daimon_skycrawlers"
2
+ require "daimon_skycrawlers/logger"
3
+ require "daimon_skycrawlers/queue"
4
+
5
+ DaimonSkycrawlers.configure do |config|
6
+ config.logger = DaimonSkycrawlers::Logger.default
7
+ config.logger.level = :debug
8
+ config.crawler_interval = 1
9
+ config.shutdown_interval = 30
10
+ end
11
+
12
+ DaimonSkycrawlers::Queue.configure do |config|
13
+ # queue configuration
14
+ config.logger = DaimonSkycrawlers.configuration.logger
15
+ config.host = "127.0.0.1"
16
+ config.port = 5672
17
+ # config.username = 'guest'
18
+ # config.password = 'guest'
19
+ config.vhost = "/"
20
+ config.max_reconnect_attempts = 10
21
+ config.network_recovery_interval = 1.0
22
+ end