daimon_skycrawlers 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/daimon_skycrawlers/cli.rb +2 -0
- data/lib/daimon_skycrawlers/consumer/http_response.rb +1 -1
- data/lib/daimon_skycrawlers/consumer/url.rb +1 -4
- data/lib/daimon_skycrawlers/crawler/base.rb +48 -7
- data/lib/daimon_skycrawlers/crawler/default.rb +2 -30
- data/lib/daimon_skycrawlers/filter/duplicate_checker.rb +9 -0
- data/lib/daimon_skycrawlers/filter/robots_txt_checker.rb +2 -0
- data/lib/daimon_skycrawlers/filter/update_checker.rb +10 -4
- data/lib/daimon_skycrawlers/generator/crawler.rb +22 -0
- data/lib/daimon_skycrawlers/generator/generate.rb +12 -0
- data/lib/daimon_skycrawlers/generator/new.rb +7 -4
- data/lib/daimon_skycrawlers/generator/processor.rb +22 -0
- data/lib/daimon_skycrawlers/generator/templates/crawler.rb.erb +13 -0
- data/lib/daimon_skycrawlers/generator/templates/processor.rb.erb +13 -0
- data/lib/daimon_skycrawlers/processor/base.rb +26 -0
- data/lib/daimon_skycrawlers/processor/spider.rb +7 -7
- data/lib/daimon_skycrawlers/version.rb +1 -1
- data/sample/spider/app/processors/blog_spider.rb +4 -4
- metadata +7 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cbbd476464b9b44d5bc5f71bc999820126379e2d
|
|
4
|
+
data.tar.gz: 473ac2bd6f9c63a7307357aa2fae7da2be6efde7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 788e296ef3fbd73c39db3ca0f6e6507e9c2074893c4895598c3e418849cf70a8951cc65c5fbff88d1a832c721edd53bd7d4078699090b6da36efaf460027a944
|
|
7
|
+
data.tar.gz: 42ba6b8ad282060811d2817d85c8b276ccc0890c4a15c546571f22594cb1a552be2838c166ced8fa609807dc0399b910acc53ba7f56e927222955af2008a831a
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
require "thor"
|
|
2
2
|
require "daimon_skycrawlers/generator/new"
|
|
3
|
+
require "daimon_skycrawlers/generator/generate"
|
|
3
4
|
require "daimon_skycrawlers/commands/enqueue"
|
|
4
5
|
require "daimon_skycrawlers/commands/runner"
|
|
5
6
|
require "daimon_skycrawlers/version"
|
|
@@ -7,6 +8,7 @@ require "daimon_skycrawlers/version"
|
|
|
7
8
|
module DaimonSkycrawlers
|
|
8
9
|
class CLI < Thor
|
|
9
10
|
register(Generator::New, "new", "new NAME", "Create new project")
|
|
11
|
+
register(Generator::Generate, "generate", "generate COMMAND", "Generate new code")
|
|
10
12
|
register(Commands::Runner, "exec", "exec [COMMAND]", "Execute crawler/processor")
|
|
11
13
|
register(Commands::Enqueue, "enqueue", "enqueue [TYPE] URL [messages...]", "Enqueue URL")
|
|
12
14
|
|
|
@@ -44,14 +44,11 @@ module DaimonSkycrawlers
|
|
|
44
44
|
# @private
|
|
45
45
|
#
|
|
46
46
|
def process(message)
|
|
47
|
-
url = message[:url]
|
|
48
|
-
depth = Integer(message[:depth] || 0)
|
|
49
|
-
|
|
50
47
|
crawler_interval = DaimonSkycrawlers.configuration.crawler_interval
|
|
51
48
|
|
|
52
49
|
# XXX When several crawlers are registered, how should they behave?
|
|
53
50
|
self.class.crawlers.each do |crawler|
|
|
54
|
-
crawler.
|
|
51
|
+
crawler.process(message)
|
|
55
52
|
if crawler.skipped?
|
|
56
53
|
sleep(crawler_interval) if crawler.n_processed_urls % 50 == 0
|
|
57
54
|
else
|
|
@@ -20,13 +20,19 @@ module DaimonSkycrawlers
|
|
|
20
20
|
# @return [void]
|
|
21
21
|
attr_writer :storage
|
|
22
22
|
|
|
23
|
+
# @!attribute [r] n_processed_urls
|
|
24
|
+
# The number of processed URLs.
|
|
25
|
+
# @return [Integer]
|
|
26
|
+
attr_reader :n_processed_urls
|
|
27
|
+
|
|
23
28
|
#
|
|
24
29
|
# @param [String] Base URL for crawler
|
|
25
30
|
# @param [Hash] options for Faraday
|
|
26
31
|
#
|
|
27
|
-
def initialize(base_url = nil, options
|
|
32
|
+
def initialize(base_url = nil, faraday_options: {}, options: {})
|
|
28
33
|
super()
|
|
29
34
|
@base_url = base_url
|
|
35
|
+
@faraday_options = faraday_options
|
|
30
36
|
@options = options
|
|
31
37
|
@prepare = ->(connection) {}
|
|
32
38
|
@skipped = false
|
|
@@ -41,7 +47,9 @@ module DaimonSkycrawlers
|
|
|
41
47
|
# @yieldparam faraday [Faraday]
|
|
42
48
|
#
|
|
43
49
|
def setup_connection(options = {})
|
|
44
|
-
|
|
50
|
+
merged_options = @faraday_options.merge(options)
|
|
51
|
+
faraday_options = merged_options.empty? ? nil : merged_options
|
|
52
|
+
@connection = Faraday.new(@base_url, faraday_options) do |faraday|
|
|
45
53
|
yield faraday
|
|
46
54
|
end
|
|
47
55
|
end
|
|
@@ -66,10 +74,26 @@ module DaimonSkycrawlers
|
|
|
66
74
|
end
|
|
67
75
|
|
|
68
76
|
def connection
|
|
69
|
-
@connection ||= Faraday.new(@base_url, @
|
|
77
|
+
@connection ||= Faraday.new(@base_url, @faraday_options)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def process(message, &block)
|
|
81
|
+
url = message.delete(:url)
|
|
82
|
+
|
|
83
|
+
@skipped = false
|
|
84
|
+
@n_processed_urls += 1
|
|
85
|
+
# url can be a path
|
|
86
|
+
url = connection.url_prefix + url
|
|
87
|
+
|
|
88
|
+
apply_filters(url)
|
|
89
|
+
|
|
90
|
+
unless skipped?
|
|
91
|
+
@prepare.call(connection)
|
|
92
|
+
fetch(url, message, &block)
|
|
93
|
+
end
|
|
70
94
|
end
|
|
71
95
|
|
|
72
|
-
def fetch(path,
|
|
96
|
+
def fetch(path, message = {})
|
|
73
97
|
raise NotImplementedError, "Must implement this method in subclass"
|
|
74
98
|
end
|
|
75
99
|
|
|
@@ -81,11 +105,28 @@ module DaimonSkycrawlers
|
|
|
81
105
|
@connection.post(path, params)
|
|
82
106
|
end
|
|
83
107
|
|
|
84
|
-
|
|
85
|
-
|
|
108
|
+
private
|
|
109
|
+
|
|
110
|
+
def apply_filters(url)
|
|
111
|
+
if @options[:obey_robots_txt]
|
|
112
|
+
robots_txt_checker = DaimonSkycrawlers::Filter::RobotsTxtChecker.new(base_url: @base_url)
|
|
113
|
+
unless robots_txt_checker.allowed?(url)
|
|
114
|
+
skip(url)
|
|
115
|
+
return
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
update_checker = DaimonSkycrawlers::Filter::UpdateChecker.new(storage: storage)
|
|
119
|
+
unless update_checker.updated?(url.to_s, connection: connection)
|
|
120
|
+
skip(url)
|
|
121
|
+
return
|
|
122
|
+
end
|
|
86
123
|
end
|
|
87
124
|
|
|
88
|
-
|
|
125
|
+
def skip(url)
|
|
126
|
+
log.info("Skip #{url}")
|
|
127
|
+
@skipped = true
|
|
128
|
+
schedule_to_process(url.to_s, heartbeat: true)
|
|
129
|
+
end
|
|
89
130
|
|
|
90
131
|
def schedule_to_process(url, message = {})
|
|
91
132
|
DaimonSkycrawlers::Processor.enqueue_http_response(url, message)
|
|
@@ -10,43 +10,15 @@ module DaimonSkycrawlers
|
|
|
10
10
|
# This crawler can GET given URL and store response to storage
|
|
11
11
|
#
|
|
12
12
|
class Default < Base
|
|
13
|
-
def fetch(
|
|
14
|
-
|
|
15
|
-
@skipped = false
|
|
16
|
-
url = connection.url_prefix + path
|
|
17
|
-
if @options[:obey_robots_txt]
|
|
18
|
-
robots_txt_checker = DaimonSkycrawlers::Filter::RobotsTxtChecker.new(base_url: @base_url)
|
|
19
|
-
unless robots_txt_checker.call(url)
|
|
20
|
-
skip(url)
|
|
21
|
-
return
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
update_checker = DaimonSkycrawlers::Filter::UpdateChecker.new(storage: storage)
|
|
25
|
-
unless update_checker.call(url.to_s, connection: connection)
|
|
26
|
-
skip(url)
|
|
27
|
-
return
|
|
28
|
-
end
|
|
29
|
-
@prepare.call(connection)
|
|
30
|
-
response = get(path)
|
|
13
|
+
def fetch(url, message)
|
|
14
|
+
response = get(url)
|
|
31
15
|
data = [url.to_s, response.headers, response.body]
|
|
32
16
|
|
|
33
17
|
yield(*data) if block_given?
|
|
34
18
|
|
|
35
19
|
storage.save(*data)
|
|
36
|
-
message = {
|
|
37
|
-
depth: depth
|
|
38
|
-
}
|
|
39
|
-
message = message.merge(kw)
|
|
40
20
|
schedule_to_process(url.to_s, message)
|
|
41
21
|
end
|
|
42
|
-
|
|
43
|
-
private
|
|
44
|
-
|
|
45
|
-
def skip(url)
|
|
46
|
-
log.info("Skip #{url}")
|
|
47
|
-
@skipped = true
|
|
48
|
-
schedule_to_process(url.to_s, heartbeat: true)
|
|
49
|
-
end
|
|
50
22
|
end
|
|
51
23
|
end
|
|
52
24
|
end
|
|
@@ -28,6 +28,15 @@ module DaimonSkycrawlers
|
|
|
28
28
|
@urls << url
|
|
29
29
|
true
|
|
30
30
|
end
|
|
31
|
+
|
|
32
|
+
#
|
|
33
|
+
# @param [String] url to check duplication. If given URL is
|
|
34
|
+
# relative URL, use `@base_url + url` as absolute URL.
|
|
35
|
+
# @return [true|false] Return true when duplicated, otherwise return false.
|
|
36
|
+
#
|
|
37
|
+
def duplicated?(url)
|
|
38
|
+
!call(url)
|
|
39
|
+
end
|
|
31
40
|
end
|
|
32
41
|
end
|
|
33
42
|
end
|
|
@@ -32,11 +32,17 @@ module DaimonSkycrawlers
|
|
|
32
32
|
else
|
|
33
33
|
headers = Faraday.head(url)
|
|
34
34
|
end
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
35
|
+
case
|
|
36
|
+
when headers.key?("etag") && page.etag
|
|
37
|
+
headers["etag"] != page.etag
|
|
38
|
+
when headers.key?("last-modified") && page.last_modified_at
|
|
39
|
+
headers["last-modified"] > page.last_modified_at
|
|
40
|
+
else
|
|
41
|
+
true
|
|
42
|
+
end
|
|
39
43
|
end
|
|
44
|
+
|
|
45
|
+
alias updated? call
|
|
40
46
|
end
|
|
41
47
|
end
|
|
42
48
|
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require "thor"
|
|
2
|
+
|
|
3
|
+
module DaimonSkycrawlers
|
|
4
|
+
module Generator
|
|
5
|
+
class Crawler < Thor::Group
|
|
6
|
+
include Thor::Actions
|
|
7
|
+
|
|
8
|
+
argument :name
|
|
9
|
+
|
|
10
|
+
def self.source_root
|
|
11
|
+
File.join(__dir__, "templates")
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def create_files
|
|
15
|
+
config = {
|
|
16
|
+
class_name: name.classify,
|
|
17
|
+
}
|
|
18
|
+
template("crawler.rb.erb", "app/crawlers/#{name.underscore}.rb", config)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
require "thor"
|
|
2
|
+
require "daimon_skycrawlers/generator/crawler"
|
|
3
|
+
require "daimon_skycrawlers/generator/processor"
|
|
4
|
+
|
|
5
|
+
module DaimonSkycrawlers
|
|
6
|
+
module Generator
|
|
7
|
+
class Generate < Thor
|
|
8
|
+
register(Crawler, "crawler", "crawler NAME", "Generate new crawler")
|
|
9
|
+
register(Processor, "processor", "processor NAME", "Generate new processor")
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
@@ -22,16 +22,19 @@ module DaimonSkycrawlers
|
|
|
22
22
|
].each do |path|
|
|
23
23
|
template("#{path}.erb", "#{name}/#{path}")
|
|
24
24
|
end
|
|
25
|
+
migration_options = {
|
|
26
|
+
destination_root: File.join(destination_root, name),
|
|
27
|
+
timestamps: true
|
|
28
|
+
}
|
|
25
29
|
invoke(MigrationGenerator, [
|
|
26
30
|
"CreatePage",
|
|
27
31
|
"url:string",
|
|
28
32
|
"headers:text",
|
|
29
33
|
"body:binary",
|
|
30
34
|
"last_modified_at:datetime",
|
|
31
|
-
"etag:string"
|
|
32
|
-
"timestamps"
|
|
35
|
+
"etag:string"
|
|
33
36
|
],
|
|
34
|
-
|
|
37
|
+
migration_options)
|
|
35
38
|
end
|
|
36
39
|
|
|
37
40
|
def copy_files
|
|
@@ -56,7 +59,7 @@ module DaimonSkycrawlers
|
|
|
56
59
|
set_local_assigns!
|
|
57
60
|
validate_file_name!
|
|
58
61
|
dest = options[:destination_root]
|
|
59
|
-
migration_template
|
|
62
|
+
migration_template(@migration_template, "#{dest}/db/migrate/#{file_name}.rb")
|
|
60
63
|
end
|
|
61
64
|
end
|
|
62
65
|
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require "thor"
|
|
2
|
+
|
|
3
|
+
module DaimonSkycrawlers
|
|
4
|
+
module Generator
|
|
5
|
+
class Processor < Thor::Group
|
|
6
|
+
include Thor::Actions
|
|
7
|
+
|
|
8
|
+
argument :name
|
|
9
|
+
|
|
10
|
+
def self.source_root
|
|
11
|
+
File.join(__dir__, "templates")
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def create_files
|
|
15
|
+
config = {
|
|
16
|
+
class_name: name.classify,
|
|
17
|
+
}
|
|
18
|
+
template("processor.rb.erb", "app/processors/#{name.underscore}.rb", config)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
require "daimon_skycrawlers"
|
|
2
|
+
require "daimon_skycrawlers/crawler"
|
|
3
|
+
require "daimon_skycrawlers/crawler/base"
|
|
4
|
+
|
|
5
|
+
class <%= config[:class_name] %> < DaimonSkycrawlers::Crawler::Base
|
|
6
|
+
def fetch(path, **kw)
|
|
7
|
+
# Implement your crawler here
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
base_url = ""
|
|
12
|
+
crawler = <%= config[:class_name] %>.new(base_url)
|
|
13
|
+
DaimonSkycrawlers.register_crawler(crawler)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
require "daimon_skycrawlers"
|
|
2
|
+
require "daimon_skycrawlers/processor"
|
|
3
|
+
require "daimon_skycrawlers/processor/base"
|
|
4
|
+
|
|
5
|
+
class <%= config[:class_name] %> < DaimonSkycrawlers::Processor::Base
|
|
6
|
+
def call(message)
|
|
7
|
+
# Implement your processor here
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
base_url = ""
|
|
12
|
+
processor = <%= config[:class_name] %>.new(base_url)
|
|
13
|
+
DaimonSkycrawlers.register_processor(processor)
|
|
@@ -7,6 +7,24 @@ module DaimonSkycrawlers
|
|
|
7
7
|
include DaimonSkycrawlers::LoggerMixin
|
|
8
8
|
include DaimonSkycrawlers::ConfigMixin
|
|
9
9
|
|
|
10
|
+
def initialize
|
|
11
|
+
super
|
|
12
|
+
@before_process_filters = []
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def before_process(filter = nil, &block)
|
|
16
|
+
if block_given?
|
|
17
|
+
@before_process_filters << block
|
|
18
|
+
else
|
|
19
|
+
@before_process_filters << filter if filter.respond_to?(:call)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def process(message)
|
|
24
|
+
return unless apply_before_filters(message[:url])
|
|
25
|
+
call(message)
|
|
26
|
+
end
|
|
27
|
+
|
|
10
28
|
def call(message)
|
|
11
29
|
raise "Implement this method in subclass"
|
|
12
30
|
end
|
|
@@ -14,6 +32,14 @@ module DaimonSkycrawlers
|
|
|
14
32
|
def storage
|
|
15
33
|
@storage ||= DaimonSkycrawlers::Storage::RDB.new
|
|
16
34
|
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
def apply_before_filters(url)
|
|
39
|
+
@before_process_filters.all? do |filter|
|
|
40
|
+
filter.call(url)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
17
43
|
end
|
|
18
44
|
end
|
|
19
45
|
end
|
|
@@ -8,17 +8,17 @@ module DaimonSkycrawlers
|
|
|
8
8
|
|
|
9
9
|
def initialize
|
|
10
10
|
super
|
|
11
|
-
@
|
|
11
|
+
@link_filters = []
|
|
12
12
|
@doc = nil
|
|
13
13
|
@links = nil
|
|
14
14
|
@enqueue = true
|
|
15
15
|
end
|
|
16
16
|
|
|
17
|
-
def
|
|
17
|
+
def append_link_filter(filter = nil, &block)
|
|
18
18
|
if block_given?
|
|
19
|
-
@
|
|
19
|
+
@link_filters << block
|
|
20
20
|
else
|
|
21
|
-
@
|
|
21
|
+
@link_filters << filter if filter.respond_to?(:call)
|
|
22
22
|
end
|
|
23
23
|
end
|
|
24
24
|
|
|
@@ -53,15 +53,15 @@ module DaimonSkycrawlers
|
|
|
53
53
|
element["href"]
|
|
54
54
|
end
|
|
55
55
|
urls.uniq!
|
|
56
|
-
|
|
56
|
+
apply_link_filters(urls) || []
|
|
57
57
|
end
|
|
58
58
|
|
|
59
|
-
def
|
|
59
|
+
def apply_link_filters(urls)
|
|
60
60
|
return if urls.nil?
|
|
61
61
|
return if urls.empty?
|
|
62
62
|
log.debug("Candidate URLs: #{urls.size}")
|
|
63
63
|
urls = urls.select do |url|
|
|
64
|
-
@
|
|
64
|
+
@link_filters.all? {|filter| filter.call(url) }
|
|
65
65
|
end
|
|
66
66
|
log.debug("Filtered URLs: #{urls.size}")
|
|
67
67
|
urls
|
|
@@ -6,11 +6,11 @@ require "daimon_skycrawlers/filter/update_checker"
|
|
|
6
6
|
default_processor = DaimonSkycrawlers::Processor::Default.new
|
|
7
7
|
spider = DaimonSkycrawlers::Processor::Spider.new
|
|
8
8
|
#spider.enqueue = false
|
|
9
|
-
spider.
|
|
9
|
+
spider.append_link_filter do |url|
|
|
10
10
|
uri = URI(url)
|
|
11
11
|
uri.host.nil? || uri.host == "www.clear-code.com"
|
|
12
12
|
end
|
|
13
|
-
spider.
|
|
13
|
+
spider.append_link_filter do |url|
|
|
14
14
|
case url
|
|
15
15
|
when %r!\A(\.\./|/|#)!
|
|
16
16
|
false
|
|
@@ -19,9 +19,9 @@ spider.append_filter do |url|
|
|
|
19
19
|
end
|
|
20
20
|
end
|
|
21
21
|
duplicate_checker = DaimonSkycrawlers::Filter::DuplicateChecker.new(base_url: "http://www.clear-code.com/blog/")
|
|
22
|
-
spider.
|
|
22
|
+
spider.append_link_filter(duplicate_checker)
|
|
23
23
|
update_checker = DaimonSkycrawlers::Filter::UpdateChecker.new(base_url: "http://www.clear-code.com/blog/")
|
|
24
|
-
spider.
|
|
24
|
+
spider.append_link_filter(update_checker)
|
|
25
25
|
|
|
26
26
|
DaimonSkycrawlers.register_processor(default_processor)
|
|
27
27
|
DaimonSkycrawlers.register_processor(spider)
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: daimon_skycrawlers
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ryunosuke SATO
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-10-
|
|
11
|
+
date: 2016-10-18 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -324,7 +324,11 @@ files:
|
|
|
324
324
|
- lib/daimon_skycrawlers/filter/duplicate_checker.rb
|
|
325
325
|
- lib/daimon_skycrawlers/filter/robots_txt_checker.rb
|
|
326
326
|
- lib/daimon_skycrawlers/filter/update_checker.rb
|
|
327
|
+
- lib/daimon_skycrawlers/generator/crawler.rb
|
|
328
|
+
- lib/daimon_skycrawlers/generator/generate.rb
|
|
327
329
|
- lib/daimon_skycrawlers/generator/new.rb
|
|
330
|
+
- lib/daimon_skycrawlers/generator/processor.rb
|
|
331
|
+
- lib/daimon_skycrawlers/generator/templates/crawler.rb.erb
|
|
328
332
|
- lib/daimon_skycrawlers/generator/templates/new/Gemfile
|
|
329
333
|
- lib/daimon_skycrawlers/generator/templates/new/README.md.erb
|
|
330
334
|
- lib/daimon_skycrawlers/generator/templates/new/Rakefile
|
|
@@ -332,6 +336,7 @@ files:
|
|
|
332
336
|
- lib/daimon_skycrawlers/generator/templates/new/app/processors/sample_processor.rb
|
|
333
337
|
- lib/daimon_skycrawlers/generator/templates/new/config/database.yml.erb
|
|
334
338
|
- lib/daimon_skycrawlers/generator/templates/new/config/init.rb
|
|
339
|
+
- lib/daimon_skycrawlers/generator/templates/processor.rb.erb
|
|
335
340
|
- lib/daimon_skycrawlers/logger.rb
|
|
336
341
|
- lib/daimon_skycrawlers/processor.rb
|
|
337
342
|
- lib/daimon_skycrawlers/processor/base.rb
|