daimon_skycrawlers 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +50 -1
- data/lib/daimon_skycrawlers/consumer/http_response.rb +2 -1
- data/lib/daimon_skycrawlers/crawler.rb +3 -1
- data/lib/daimon_skycrawlers/crawler/base.rb +2 -2
- data/lib/daimon_skycrawlers/filter/base.rb +1 -1
- data/lib/daimon_skycrawlers/filter/duplicate_checker.rb +6 -5
- data/lib/daimon_skycrawlers/filter/robots_txt_checker.rb +4 -2
- data/lib/daimon_skycrawlers/filter/update_checker.rb +3 -2
- data/lib/daimon_skycrawlers/generator/new.rb +33 -1
- data/lib/daimon_skycrawlers/generator/templates/new/Dockerfile +25 -0
- data/lib/daimon_skycrawlers/generator/templates/new/Dockerfile.db +6 -0
- data/lib/daimon_skycrawlers/generator/templates/new/README.md.erb +34 -0
- data/lib/daimon_skycrawlers/generator/templates/new/app/filters/sample_filter.rb +9 -0
- data/lib/daimon_skycrawlers/generator/templates/new/config/database.yml.erb +1 -0
- data/lib/daimon_skycrawlers/generator/templates/new/config/init.rb +12 -6
- data/lib/daimon_skycrawlers/generator/templates/new/docker-compose.yml.erb +35 -0
- data/lib/daimon_skycrawlers/generator/templates/new/env.db.erb +5 -0
- data/lib/daimon_skycrawlers/generator/templates/new/env.erb +2 -0
- data/lib/daimon_skycrawlers/generator/templates/new/services/common/docker-entrypoint.sh +30 -0
- data/lib/daimon_skycrawlers/generator/templates/new/services/db/init-user-db.sh +9 -0
- data/lib/daimon_skycrawlers/processor.rb +3 -1
- data/lib/daimon_skycrawlers/processor/base.rb +3 -3
- data/lib/daimon_skycrawlers/processor/proc.rb +16 -0
- data/lib/daimon_skycrawlers/tasks/database_tasks.rake +2 -0
- data/lib/daimon_skycrawlers/version.rb +1 -1
- metadata +11 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2fd637d109812fe657a771536f3c3a0041e4868a
|
|
4
|
+
data.tar.gz: 031bf50b2b72e6320ee748cb9044e177f24f5d2f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 59f6404ea231ecc337b1658406daa8c1e3b0d557ddc3d46c37efa91b1f7d3e0f8018d524aa3c2cc227fbee57d04ef51d7561ae879fd5d0fc719f4fca1308afb3
|
|
7
|
+
data.tar.gz: eeb06f74ae722a6cdf18d00f0aeda3e06e06199e02ddd2f0e52fedb3c7a7028c4a99e46a8c19d2a4e6aa0c3979f0602757d5598a5e1de39112457d1b5dce5679
|
data/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
# DaimonSkycrawlers
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
DaimonSkycrawlers is a crawler framework.
|
|
6
6
|
|
|
7
7
|
## Requirements
|
|
8
8
|
|
|
@@ -90,6 +90,55 @@ Or install it yourself as:
|
|
|
90
90
|
|
|
91
91
|
Display `It works with 'http://example.com'` again on your terminal which runs your processor.
|
|
92
92
|
|
|
93
|
+
### docker-compose
|
|
94
|
+
|
|
95
|
+
1. Create project
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
$ bundle exec daimon_skycrawlers new mycrawlers
|
|
99
|
+
$ cd mycrawlers
|
|
100
|
+
```
|
|
101
|
+
or
|
|
102
|
+
```
|
|
103
|
+
$ daimon_skycrawlers new mycrawlers
|
|
104
|
+
$ cd mycrawlers
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
2. Build docker images
|
|
108
|
+
|
|
109
|
+
```
|
|
110
|
+
$ docker-compose build
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
3. Run docker containers
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
$ docker-compose up -d
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
4. Run a command on docker containers
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
$ docker-compose exec <service name> <command>
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
For example,
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
$ docker-compose exec mycrawlers-db bash
|
|
129
|
+
$ docker-compose exec mycrawlers-crawler sh
|
|
130
|
+
$ docker-compose exec mycrawlers-crawler bundle exec daimon_skycrawlers enqueue url http://example.com/
|
|
131
|
+
$ docker-compose exec mycrawlers-crawler bundle exec daimon_skycrawlers enqueue response http://example.com/
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
5. Shutdown docker containers
|
|
135
|
+
|
|
136
|
+
```
|
|
137
|
+
$ docker-compose down
|
|
138
|
+
$ docker-compose down --rmi all # Remove all related images
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
|
|
93
142
|
## Development
|
|
94
143
|
|
|
95
144
|
After checking out the repo, run `bundle install` to install dependencies. Then, run `bundle exec rake test` to run the tests. You can also run `bundle console` for an interactive prompt that will allow you to experiment.
|
|
@@ -2,6 +2,7 @@ require "songkick_queue"
|
|
|
2
2
|
require "daimon_skycrawlers"
|
|
3
3
|
require "daimon_skycrawlers/consumer/base"
|
|
4
4
|
require "daimon_skycrawlers/processor/default"
|
|
5
|
+
require "daimon_skycrawlers/processor/proc"
|
|
5
6
|
|
|
6
7
|
module DaimonSkycrawlers
|
|
7
8
|
module Consumer
|
|
@@ -27,7 +28,7 @@ module DaimonSkycrawlers
|
|
|
27
28
|
#
|
|
28
29
|
def register(processor = nil, &block)
|
|
29
30
|
if block_given?
|
|
30
|
-
processors << block
|
|
31
|
+
processors << DaimonSkycrawlers::Processor::Proc.new(block)
|
|
31
32
|
else
|
|
32
33
|
processors << processor
|
|
33
34
|
end
|
|
@@ -12,7 +12,9 @@ module DaimonSkycrawlers
|
|
|
12
12
|
# @param process_name [String] Process name
|
|
13
13
|
#
|
|
14
14
|
def run(process_name: default_process_name)
|
|
15
|
-
|
|
15
|
+
if config.shutdown_interval > 0
|
|
16
|
+
DaimonSkycrawlers::Timer.setup_shutdown_timer(config.queue_name_prefix, interval: config.shutdown_interval)
|
|
17
|
+
end
|
|
16
18
|
SongkickQueue::Worker.new(process_name, [DaimonSkycrawlers::Consumer::URL]).run
|
|
17
19
|
end
|
|
18
20
|
|
|
@@ -112,13 +112,13 @@ module DaimonSkycrawlers
|
|
|
112
112
|
def apply_filters(url)
|
|
113
113
|
if @options[:obey_robots_txt]
|
|
114
114
|
robots_txt_checker = DaimonSkycrawlers::Filter::RobotsTxtChecker.new(base_url: @base_url)
|
|
115
|
-
unless robots_txt_checker.allowed?(url)
|
|
115
|
+
unless robots_txt_checker.allowed?({ url: url })
|
|
116
116
|
skip(url)
|
|
117
117
|
return
|
|
118
118
|
end
|
|
119
119
|
end
|
|
120
120
|
update_checker = DaimonSkycrawlers::Filter::UpdateChecker.new(storage: storage)
|
|
121
|
-
unless update_checker.updated?(url.to_s, connection: connection)
|
|
121
|
+
unless update_checker.updated?({ url: url.to_s }, connection: connection)
|
|
122
122
|
skip(url)
|
|
123
123
|
return
|
|
124
124
|
end
|
|
@@ -16,11 +16,12 @@ module DaimonSkycrawlers
|
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
#
|
|
19
|
-
# @param [
|
|
19
|
+
# @param [Hash] message to check duplication. If given URL is
|
|
20
20
|
# relative URL, use `@base_url + url` as absolute URL.
|
|
21
21
|
# @return [true|false] Return false when duplicated, otherwise return true.
|
|
22
22
|
#
|
|
23
|
-
def call(
|
|
23
|
+
def call(message)
|
|
24
|
+
url = message[:url]
|
|
24
25
|
unless URI(url).absolute?
|
|
25
26
|
url = (@base_url + url).to_s
|
|
26
27
|
end
|
|
@@ -30,12 +31,12 @@ module DaimonSkycrawlers
|
|
|
30
31
|
end
|
|
31
32
|
|
|
32
33
|
#
|
|
33
|
-
# @param [
|
|
34
|
+
# @param [Hash] message to check duplication. If given URL is
|
|
34
35
|
# relative URL, use `@base_url + url` as absolute URL.
|
|
35
36
|
# @return [true|false] Return true when duplicated, otherwise return false.
|
|
36
37
|
#
|
|
37
|
-
def duplicated?(
|
|
38
|
-
!call(
|
|
38
|
+
def duplicated?(message)
|
|
39
|
+
!call(message)
|
|
39
40
|
end
|
|
40
41
|
end
|
|
41
42
|
end
|
|
@@ -11,14 +11,16 @@ module DaimonSkycrawlers
|
|
|
11
11
|
class RobotsTxtChecker < Base
|
|
12
12
|
def initialize(base_url: nil, user_agent: "DaimonSkycrawlers/#{DaimonSkycrawlers::VERSION}")
|
|
13
13
|
super()
|
|
14
|
+
@base_url = base_url
|
|
14
15
|
@webrobots = WebRobots.new(user_agent)
|
|
15
16
|
end
|
|
16
17
|
|
|
17
18
|
#
|
|
18
|
-
# @param [
|
|
19
|
+
# @param [Hash] message
|
|
19
20
|
# @return [true|false] Return true when web site allows to fetch the URL, otherwise return false
|
|
20
21
|
#
|
|
21
|
-
def call(
|
|
22
|
+
def call(message)
|
|
23
|
+
url = message[:url]
|
|
22
24
|
unless URI(url).absolute?
|
|
23
25
|
url = (@base_url + url).to_s
|
|
24
26
|
end
|
|
@@ -17,11 +17,12 @@ module DaimonSkycrawlers
|
|
|
17
17
|
end
|
|
18
18
|
|
|
19
19
|
#
|
|
20
|
-
# @param [
|
|
20
|
+
# @param [Hash] message
|
|
21
21
|
# @param connection [Faraday]
|
|
22
22
|
# @return [true|false] Return true when need update, otherwise return false
|
|
23
23
|
#
|
|
24
|
-
def call(
|
|
24
|
+
def call(message, connection: nil)
|
|
25
|
+
url = message[:url]
|
|
25
26
|
unless URI(url).absolute?
|
|
26
27
|
url = (@base_url + url).to_s
|
|
27
28
|
end
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
require "securerandom"
|
|
1
2
|
require "thor"
|
|
2
3
|
require "rails/generators"
|
|
3
4
|
require "rails/generators/actions"
|
|
@@ -16,11 +17,21 @@ module DaimonSkycrawlers
|
|
|
16
17
|
end
|
|
17
18
|
|
|
18
19
|
def create_files
|
|
20
|
+
config = {
|
|
21
|
+
password: SecureRandom.urlsafe_base64
|
|
22
|
+
}
|
|
19
23
|
[
|
|
20
24
|
"README.md",
|
|
21
25
|
"config/database.yml",
|
|
26
|
+
"docker-compose.yml",
|
|
27
|
+
"env",
|
|
28
|
+
"env.db",
|
|
22
29
|
].each do |path|
|
|
23
|
-
|
|
30
|
+
if path.start_with?("env")
|
|
31
|
+
template("#{path}.erb", "#{name}/.#{path}", config)
|
|
32
|
+
else
|
|
33
|
+
template("#{path}.erb", "#{name}/#{path}", config)
|
|
34
|
+
end
|
|
24
35
|
end
|
|
25
36
|
migration_options = {
|
|
26
37
|
destination_root: File.join(destination_root, name),
|
|
@@ -55,15 +66,36 @@ module DaimonSkycrawlers
|
|
|
55
66
|
|
|
56
67
|
def copy_files
|
|
57
68
|
[
|
|
69
|
+
"Dockerfile",
|
|
70
|
+
"Dockerfile.db",
|
|
58
71
|
"Gemfile",
|
|
59
72
|
"Rakefile",
|
|
60
73
|
"app/crawlers/sample_crawler.rb",
|
|
74
|
+
"app/filters/sample_filter.rb",
|
|
61
75
|
"app/processors/sample_processor.rb",
|
|
62
76
|
"config/init.rb",
|
|
77
|
+
"services/common/docker-entrypoint.sh",
|
|
78
|
+
"services/db/init-user-db.sh"
|
|
63
79
|
].each do |path|
|
|
64
80
|
copy_file(path, "#{name}/#{path}", mode: :preserve)
|
|
65
81
|
end
|
|
66
82
|
end
|
|
83
|
+
|
|
84
|
+
def create_directories
|
|
85
|
+
[
|
|
86
|
+
"vendor/bundle",
|
|
87
|
+
"docker-cache/bundle",
|
|
88
|
+
"docker-cache/.bundle"
|
|
89
|
+
].each do |entry|
|
|
90
|
+
empty_directory("#{name}/#{entry}")
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def display_post_message
|
|
95
|
+
puts <<MESSAGE
|
|
96
|
+
Check .env and .env.db before run `docker-compose build` or `docker-compose up`.
|
|
97
|
+
MESSAGE
|
|
98
|
+
end
|
|
67
99
|
end
|
|
68
100
|
|
|
69
101
|
class MigrationGenerator < ActiveRecord::Generators::MigrationGenerator
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
FROM ruby:2.3.1-alpine
|
|
2
|
+
|
|
3
|
+
RUN apk --no-cache --update add build-base ruby-dev libxml2-dev postgresql-dev libcurl git
|
|
4
|
+
|
|
5
|
+
RUN adduser -D -h /home/crawler -g "DaimonSkycrawlers user" -s /bin/sh crawler crawler
|
|
6
|
+
|
|
7
|
+
ARG SKYCRAWLERS_ENV=production
|
|
8
|
+
ARG SKYCRAWLERS_MAIN=crawler
|
|
9
|
+
ENV SKYCRAWLERS_ENV=$SKYCRAWLERS_ENV \
|
|
10
|
+
SKYCRAWLERS_MAIN=$SKYCRAWLERS_MAIN \
|
|
11
|
+
BUNDLE_JOBS=4
|
|
12
|
+
|
|
13
|
+
USER crawler
|
|
14
|
+
WORKDIR /home/crawler
|
|
15
|
+
COPY ./Gemfile* ./
|
|
16
|
+
|
|
17
|
+
RUN if [ "$SKYCRAWLERS_ENV" = "production" ]; then \
|
|
18
|
+
bundle install --without development:test; \
|
|
19
|
+
fi
|
|
20
|
+
|
|
21
|
+
COPY . .
|
|
22
|
+
|
|
23
|
+
ADD services/common/docker-entrypoint.sh /docker-entrypoint.sh
|
|
24
|
+
ENTRYPOINT ["/docker-entrypoint.sh"]
|
|
25
|
+
CMD ["$SKYCRAWLERS_MAIN"]
|
|
@@ -48,3 +48,37 @@ $ bin/enqueue response http://example.com/
|
|
|
48
48
|
```
|
|
49
49
|
|
|
50
50
|
Display `It works with 'http://example.com'` again on your terminal which runs your processor.
|
|
51
|
+
|
|
52
|
+
## Usage with docker-compose
|
|
53
|
+
|
|
54
|
+
1. Build docker images
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
$ docker-compose build
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
2. Run docker containers
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
$ docker-compose up -d
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
3. Run a command on docker containers
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
$ docker-compose exec <service name> <command>
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
For example,
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
$ docker-compose exec <%= name %>-db bash
|
|
76
|
+
$ docker-compose exec <%= name %>-crawler sh
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
4. Shutdown docker containers
|
|
80
|
+
|
|
81
|
+
```
|
|
82
|
+
$ docker-compose down
|
|
83
|
+
$ docker-compose down --rmi all # Remove all related images
|
|
84
|
+
```
|
|
@@ -6,16 +6,22 @@ require "daimon_skycrawlers/queue"
|
|
|
6
6
|
DaimonSkycrawlers.configure do |config|
|
|
7
7
|
config.logger = DaimonSkycrawlers::Logger.default
|
|
8
8
|
config.crawler_interval = 1
|
|
9
|
+
config.shutdown_interval = 300
|
|
9
10
|
end
|
|
10
11
|
|
|
11
12
|
DaimonSkycrawlers::Queue.configure do |config|
|
|
12
|
-
|
|
13
|
+
if ENV["CLOUDAMQP_URL"]
|
|
14
|
+
amqp_uri = URI(ENV["CLOUDAMQP_URL"])
|
|
15
|
+
config.host = amqp_uri.host
|
|
16
|
+
config.username = amqp_uri.user
|
|
17
|
+
config.password = amqp_uri.password
|
|
18
|
+
config.vhost = amqp_uri.user
|
|
19
|
+
else
|
|
20
|
+
config.port = 5672
|
|
21
|
+
config.host = ENV["SKYCRAWLERS_RABBITMQ_HOST"] || "localhost"
|
|
22
|
+
config.vhost = "/"
|
|
23
|
+
end
|
|
13
24
|
config.logger = DaimonSkycrawlers.configuration.logger
|
|
14
|
-
config.host = "127.0.0.1"
|
|
15
|
-
config.port = 5672
|
|
16
|
-
# config.username = 'guest'
|
|
17
|
-
# config.password = 'guest'
|
|
18
|
-
config.vhost = "/"
|
|
19
25
|
config.max_reconnect_attempts = 10
|
|
20
26
|
config.network_recovery_interval = 1.0
|
|
21
27
|
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
version: "2"
|
|
2
|
+
services:
|
|
3
|
+
<%= name %>-rabbitmq:
|
|
4
|
+
image: rabbitmq
|
|
5
|
+
|
|
6
|
+
<%= name %>-db:
|
|
7
|
+
build:
|
|
8
|
+
context: .
|
|
9
|
+
dockerfile: Dockerfile.db
|
|
10
|
+
env_file: .env.db
|
|
11
|
+
|
|
12
|
+
<%= name %>-common: &common
|
|
13
|
+
build:
|
|
14
|
+
context: .
|
|
15
|
+
args:
|
|
16
|
+
- SKYCRAWLERS_ENV=development
|
|
17
|
+
links:
|
|
18
|
+
- <%= name %>-rabbitmq
|
|
19
|
+
- <%= name %>-db
|
|
20
|
+
volumes:
|
|
21
|
+
- ./:/home/crawler
|
|
22
|
+
- ./docker-cache/.bundle:/home/crawler/.bundle
|
|
23
|
+
- ./docker-cache/bundle:/home/crawler/vendor/bundle
|
|
24
|
+
working_dir: /home/crawler
|
|
25
|
+
env_file: .env
|
|
26
|
+
|
|
27
|
+
<%= name %>-crawler:
|
|
28
|
+
<<: *common
|
|
29
|
+
command: crawler
|
|
30
|
+
|
|
31
|
+
<%= name %>-processor:
|
|
32
|
+
<<: *common
|
|
33
|
+
depends_on:
|
|
34
|
+
- <%= name %>-crawler
|
|
35
|
+
command: processor
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/bin/sh
|
|
2
|
+
|
|
3
|
+
set -x
|
|
4
|
+
|
|
5
|
+
MAIN=$1
|
|
6
|
+
case $MAIN in
|
|
7
|
+
crawler)
|
|
8
|
+
bundle check || bundle install --retry=3 --path=vendor/bundle \
|
|
9
|
+
&& bundle exec rake db:schema:load || bundle exec rake db:migrate
|
|
10
|
+
bundle exec daimon_skycrawlers exec $MAIN
|
|
11
|
+
;;
|
|
12
|
+
processor)
|
|
13
|
+
while [ ! -e Gemfile.lock ]; do
|
|
14
|
+
sleep 5
|
|
15
|
+
done
|
|
16
|
+
bundle check || bundle install --retry=3 --path=vendor/bundle \
|
|
17
|
+
&& bundle exec rake db:schema:load || bundle exec rake db:migrate
|
|
18
|
+
bundle exec daimon_skycrawlers exec $MAIN
|
|
19
|
+
;;
|
|
20
|
+
setup)
|
|
21
|
+
bundle install --path=vendor/bundle
|
|
22
|
+
bundle exec rake db:schema:load
|
|
23
|
+
;;
|
|
24
|
+
none)
|
|
25
|
+
echo NOP
|
|
26
|
+
;;
|
|
27
|
+
sleep)
|
|
28
|
+
sleep 1d
|
|
29
|
+
;;
|
|
30
|
+
esac
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
set -e
|
|
4
|
+
|
|
5
|
+
createuser -e -U $POSTGRES_USER -d $DATABASE_USER
|
|
6
|
+
createdb -e -U $POSTGRES_USER -E UTF-8 -O $DATABASE_USER $DATABASE_USER
|
|
7
|
+
createdb -e -U $POSTGRES_USER -E UTF-8 -O $DATABASE_USER ${DATABASE_PREFIX}_development
|
|
8
|
+
createdb -e -U $POSTGRES_USER -E UTF-8 -O $DATABASE_USER ${DATABASE_PREFIX}_test
|
|
9
|
+
psql -U postgres -c "ALTER ROLE $DATABASE_USER WITH PASSWORD '$DATABASE_PASSWORD';"
|
|
@@ -12,7 +12,9 @@ module DaimonSkycrawlers
|
|
|
12
12
|
# @param process_name [String] Process name
|
|
13
13
|
#
|
|
14
14
|
def run(process_name: default_process_name)
|
|
15
|
-
|
|
15
|
+
if config.shutdown_interval > 0
|
|
16
|
+
DaimonSkycrawlers::Timer.setup_shutdown_timer(config.queue_name_prefix, interval: config.shutdown_interval)
|
|
17
|
+
end
|
|
16
18
|
SongkickQueue::Worker.new(process_name, [DaimonSkycrawlers::Consumer::HTTPResponse]).run
|
|
17
19
|
end
|
|
18
20
|
|
|
@@ -21,7 +21,7 @@ module DaimonSkycrawlers
|
|
|
21
21
|
end
|
|
22
22
|
|
|
23
23
|
def process(message)
|
|
24
|
-
return unless apply_before_filters(message
|
|
24
|
+
return unless apply_before_filters(message)
|
|
25
25
|
call(message)
|
|
26
26
|
end
|
|
27
27
|
|
|
@@ -35,9 +35,9 @@ module DaimonSkycrawlers
|
|
|
35
35
|
|
|
36
36
|
private
|
|
37
37
|
|
|
38
|
-
def apply_before_filters(
|
|
38
|
+
def apply_before_filters(message)
|
|
39
39
|
@before_process_filters.all? do |filter|
|
|
40
|
-
filter.call(
|
|
40
|
+
filter.call(message)
|
|
41
41
|
end
|
|
42
42
|
end
|
|
43
43
|
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
require "daimon_skycrawlers/processor/base"
|
|
2
|
+
|
|
3
|
+
module DaimonSkycrawlers
|
|
4
|
+
module Processor
|
|
5
|
+
class Proc < Base
|
|
6
|
+
def initialize(handler)
|
|
7
|
+
super()
|
|
8
|
+
@handler = handler
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def call(message)
|
|
12
|
+
@handler.call(message)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: daimon_skycrawlers
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.9.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- daimon developers
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-11-
|
|
11
|
+
date: 2016-11-24 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: thor
|
|
@@ -328,18 +328,27 @@ files:
|
|
|
328
328
|
- lib/daimon_skycrawlers/generator/new.rb
|
|
329
329
|
- lib/daimon_skycrawlers/generator/processor.rb
|
|
330
330
|
- lib/daimon_skycrawlers/generator/templates/crawler.rb.erb
|
|
331
|
+
- lib/daimon_skycrawlers/generator/templates/new/Dockerfile
|
|
332
|
+
- lib/daimon_skycrawlers/generator/templates/new/Dockerfile.db
|
|
331
333
|
- lib/daimon_skycrawlers/generator/templates/new/Gemfile
|
|
332
334
|
- lib/daimon_skycrawlers/generator/templates/new/README.md.erb
|
|
333
335
|
- lib/daimon_skycrawlers/generator/templates/new/Rakefile
|
|
334
336
|
- lib/daimon_skycrawlers/generator/templates/new/app/crawlers/sample_crawler.rb
|
|
337
|
+
- lib/daimon_skycrawlers/generator/templates/new/app/filters/sample_filter.rb
|
|
335
338
|
- lib/daimon_skycrawlers/generator/templates/new/app/processors/sample_processor.rb
|
|
336
339
|
- lib/daimon_skycrawlers/generator/templates/new/config/database.yml.erb
|
|
337
340
|
- lib/daimon_skycrawlers/generator/templates/new/config/init.rb
|
|
341
|
+
- lib/daimon_skycrawlers/generator/templates/new/docker-compose.yml.erb
|
|
342
|
+
- lib/daimon_skycrawlers/generator/templates/new/env.db.erb
|
|
343
|
+
- lib/daimon_skycrawlers/generator/templates/new/env.erb
|
|
344
|
+
- lib/daimon_skycrawlers/generator/templates/new/services/common/docker-entrypoint.sh
|
|
345
|
+
- lib/daimon_skycrawlers/generator/templates/new/services/db/init-user-db.sh
|
|
338
346
|
- lib/daimon_skycrawlers/generator/templates/processor.rb.erb
|
|
339
347
|
- lib/daimon_skycrawlers/logger.rb
|
|
340
348
|
- lib/daimon_skycrawlers/processor.rb
|
|
341
349
|
- lib/daimon_skycrawlers/processor/base.rb
|
|
342
350
|
- lib/daimon_skycrawlers/processor/default.rb
|
|
351
|
+
- lib/daimon_skycrawlers/processor/proc.rb
|
|
343
352
|
- lib/daimon_skycrawlers/processor/spider.rb
|
|
344
353
|
- lib/daimon_skycrawlers/queue.rb
|
|
345
354
|
- lib/daimon_skycrawlers/sitemap_parser.rb
|