crawl_station 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +1 -0
- data/lib/crawl_station/application_record.rb +1 -1
- data/lib/crawl_station/cache_adapters/abstract_adapter.rb +2 -0
- data/lib/crawl_station/cache_adapters/db_adapter.rb +16 -0
- data/lib/crawl_station/command/create.rb +23 -6
- data/lib/crawl_station/command/generate.rb +12 -15
- data/lib/crawl_station/command.rb +0 -4
- data/lib/crawl_station/concerns/crawl_station_class.rb +105 -0
- data/lib/crawl_station/concerns/parser_class_concern.rb +20 -0
- data/lib/crawl_station/db/migrate/001_create_crawl_station_schedules.rb +11 -0
- data/lib/crawl_station/db/migrate/002_create_crawl_station_caches.rb +10 -0
- data/lib/crawl_station/fundation/parse_struct.rb +1 -10
- data/lib/crawl_station/model/cache.rb +7 -0
- data/lib/crawl_station/model/schedule.rb +17 -0
- data/lib/crawl_station/schedule_adapters/db_adapter.rb +31 -3
- data/lib/crawl_station/tasks/db.rake +29 -11
- data/lib/crawl_station/templates/create/database.erb.yml +19 -0
- data/lib/crawl_station/templates/create/{Gemfile → dirs/Gemfile} +0 -0
- data/lib/crawl_station/templates/create/{Rakefile → dirs/Rakefile} +0 -0
- data/lib/crawl_station/templates/create/{config → dirs/config}/boot.rb +0 -0
- data/lib/crawl_station/templates/create/{config → dirs/config}/initializers/station_config.rb +0 -0
- data/lib/crawl_station/templates/create/{lib → dirs/lib}/tasks/.gitkeep +0 -0
- data/lib/crawl_station/templates/create/{module → dirs/module}/.gitkeep +0 -0
- data/lib/crawl_station/utils.rb +24 -2
- data/lib/crawl_station/version.rb +1 -1
- data/lib/crawl_station.rb +8 -94
- metadata +14 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 386f97d639bbb07e7f7b7f3026d2f79ae69603e9
|
|
4
|
+
data.tar.gz: 2ba26dd0a58d0a9ceaf3569d7a4412d154360b66
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e8fc613c1eb904446d2d5a2bbc9eaa81edca8204490d79f83be273a96129269d56b890fdcd9616f051de5c83268bd64e077ff61555e00073eff29490b071e104
|
|
7
|
+
data.tar.gz: 9e5bba4b62586acc547d24a819dca6e147240dd12a44cfd8aed2a63030a54079a2aeb851df04bd3c8fe58dbdf3eac5d8c6f578bf68dd663f2724d87ff173b2f5
|
data/.gitignore
CHANGED
data/README.md
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# CrawlStation
|
|
2
2
|
|
|
3
|
+
[](https://badge.fury.io/rb/crawl_station)
|
|
3
4
|
[](https://travis-ci.org/watsy0007/crawl_station)
|
|
4
5
|
[](https://codeclimate.com/github/watsy0007/crawl_station)
|
|
5
6
|
[](https://codeclimate.com/github/watsy0007/crawl_station/coverage)
|
|
@@ -1,6 +1,22 @@
|
|
|
1
1
|
module CrawlStation
|
|
2
2
|
module CacheAdapters
|
|
3
3
|
class DbAdapter < AbstractAdapter
|
|
4
|
+
def [](key)
|
|
5
|
+
recent_schedule.where(link: key).first
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def include?(key)
|
|
9
|
+
recent_schedule.exists?(link: key)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
private
|
|
13
|
+
def recent_schedule
|
|
14
|
+
schedule.progressed.recent_1_day
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def schedule
|
|
18
|
+
@_schedule ||= CS::Model::Schedule
|
|
19
|
+
end
|
|
4
20
|
end
|
|
5
21
|
end
|
|
6
22
|
end
|
|
@@ -5,15 +5,32 @@ module CrawlStation
|
|
|
5
5
|
desc 'create station', 'station [create|new] hello'
|
|
6
6
|
def create(args)
|
|
7
7
|
dir_root = args.first
|
|
8
|
-
|
|
9
|
-
template_create_path = "#{
|
|
8
|
+
logs "create project #{dir_root}"
|
|
9
|
+
template_create_path = "#{utils.templates_path}/create/dirs"
|
|
10
10
|
FileUtils.copy_entry template_create_path, dir_root
|
|
11
|
-
|
|
11
|
+
render_database(dir_root)
|
|
12
|
+
logs "cd #{dir_root}"
|
|
12
13
|
path = "#{Dir.pwd}/#{dir_root}"
|
|
13
14
|
Dir.chdir(path)
|
|
14
|
-
|
|
15
|
-
IO.popen('bundle install').each { |line|
|
|
16
|
-
|
|
15
|
+
logs 'bundle install'
|
|
16
|
+
IO.popen('bundle install').each { |line| logs line.chomp }
|
|
17
|
+
logs 'done'
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
private
|
|
21
|
+
def utils
|
|
22
|
+
CS::Utils
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def logs(msg)
|
|
26
|
+
CS.logger.debug msg
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def render_database(dir_root)
|
|
30
|
+
template_db_path = "#{utils.templates_path}/create/database.erb.yml"
|
|
31
|
+
opts = { project_name: dir_root }
|
|
32
|
+
context = utils.render_context(template_db_path, opts)
|
|
33
|
+
utils.render("#{CS.root}/#{dir_root}/config/database.yml", context)
|
|
17
34
|
end
|
|
18
35
|
end
|
|
19
36
|
end
|
|
@@ -9,7 +9,7 @@ module CrawlStation
|
|
|
9
9
|
module_name, file_name = args.shift, args.shift
|
|
10
10
|
|
|
11
11
|
file_path = dest_path(module_name, file_name)
|
|
12
|
-
template_path =
|
|
12
|
+
template_path = utils.template_filepath('generate/migration.erb')
|
|
13
13
|
render(file_path, template_path, class_name: file_name.camelize)
|
|
14
14
|
logs "generate migration #{module_name}:#{file_name} done"
|
|
15
15
|
end
|
|
@@ -18,10 +18,10 @@ module CrawlStation
|
|
|
18
18
|
def new_module(args)
|
|
19
19
|
raise "create module #{args} error" if !args.is_a?(Array) || args.empty?
|
|
20
20
|
module_name = args.shift
|
|
21
|
-
m_path =
|
|
21
|
+
m_path = utils.module_path(module_name)
|
|
22
22
|
return logs("#{module_name} module exist!") if File.exist?(m_path)
|
|
23
23
|
logs "create new module #{module_name}"
|
|
24
|
-
template_m_path = "#{
|
|
24
|
+
template_m_path = "#{utils.templates_path}/generate/module"
|
|
25
25
|
FileUtils.copy_entry template_m_path, m_path
|
|
26
26
|
logs "create #{module_name} done"
|
|
27
27
|
end
|
|
@@ -31,8 +31,8 @@ module CrawlStation
|
|
|
31
31
|
raise "generate parser #{args} error" if !args.is_a?(Array) || args.size < 2
|
|
32
32
|
module_name, parser_name = args.shift, args.shift
|
|
33
33
|
logs "create #{module_name} parser #{parser_name}"
|
|
34
|
-
template_parser_path =
|
|
35
|
-
template_item_path =
|
|
34
|
+
template_parser_path = utils.template_filepath('generate/parser.erb')
|
|
35
|
+
template_item_path = utils.template_filepath('generate/item.erb')
|
|
36
36
|
opts = {
|
|
37
37
|
module_class_name: module_name.camelize,
|
|
38
38
|
class_name: parser_name.camelize
|
|
@@ -54,23 +54,20 @@ module CrawlStation
|
|
|
54
54
|
protected
|
|
55
55
|
|
|
56
56
|
def render(file_path, template_path, opts = {})
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
f.write render_context(template_path, opts)
|
|
60
|
-
end
|
|
57
|
+
context = utils.render_context(template_path, opts)
|
|
58
|
+
utils.render(file_path, context)
|
|
61
59
|
end
|
|
62
60
|
|
|
63
|
-
def
|
|
64
|
-
|
|
65
|
-
ERB.new(template).result(OpenStruct.new(opts).instance_eval { binding })
|
|
61
|
+
def utils
|
|
62
|
+
CS::Utils
|
|
66
63
|
end
|
|
67
64
|
|
|
68
65
|
def logs(msg)
|
|
69
|
-
|
|
66
|
+
CS.logger.debug msg
|
|
70
67
|
end
|
|
71
68
|
|
|
72
69
|
def dest_path(module_name, file_name)
|
|
73
|
-
m_path =
|
|
70
|
+
m_path = utils.module_path(module_name)
|
|
74
71
|
raise "module: #{module_name} not exist" unless Dir.exist?(m_path)
|
|
75
72
|
migrate_path = "#{m_path}/db/migrate"
|
|
76
73
|
raise "module: migration path #{migrate_path} not exist" unless Dir.exist?(migrate_path)
|
|
@@ -79,7 +76,7 @@ module CrawlStation
|
|
|
79
76
|
end
|
|
80
77
|
|
|
81
78
|
def parser_path(module_name, file_name, type = 'parser')
|
|
82
|
-
m_path =
|
|
79
|
+
m_path = utils.module_path(module_name)
|
|
83
80
|
raise "module: #{module_name} not exist" unless Dir.exist?(m_path)
|
|
84
81
|
"#{m_path}/#{type}/#{file_name}.rb"
|
|
85
82
|
end
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
module CrawlStation
|
|
2
2
|
module Command
|
|
3
|
-
class CommandError < Exception
|
|
4
|
-
end
|
|
5
3
|
extend self
|
|
6
4
|
ALIASES = { 'module' => 'new_module' }
|
|
7
5
|
|
|
@@ -11,8 +9,6 @@ module CrawlStation
|
|
|
11
9
|
obj = klass.new
|
|
12
10
|
return obj.send(m_name, args) if obj.respond_to?(m_name)
|
|
13
11
|
true
|
|
14
|
-
rescue NameError => e
|
|
15
|
-
Logger.warn "#{command}: #{args} not exist"
|
|
16
12
|
rescue Errno::ENOENT, RuntimeError => e
|
|
17
13
|
Logger.warn "#{e.message}\n#{e.backtrace[0..10].join("\n")}"
|
|
18
14
|
end
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
require 'active_support/concern'
|
|
2
|
+
module CrawlStation
|
|
3
|
+
module Concerns
|
|
4
|
+
module CrawlStationClass
|
|
5
|
+
extend ActiveSupport::Concern
|
|
6
|
+
|
|
7
|
+
class_methods do
|
|
8
|
+
def config
|
|
9
|
+
yield @config if block_given?
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def env
|
|
13
|
+
@_env ||= ActiveSupport::StringInquirer.new(ENV['CRAWL_STATION_ENV'] || 'development')
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def env=(environment)
|
|
17
|
+
@_env = ActiveSupport::StringInquirer.new(environment)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def root
|
|
21
|
+
Pathname.new(File.expand_path('.'))
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def logger
|
|
25
|
+
@_logger ||= begin
|
|
26
|
+
CrawlStation::Logger.logger ||= ::Logger.new(STDERR) do |log|
|
|
27
|
+
log.level = ::Logger.DEBUG
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def logger=(logger)
|
|
33
|
+
@_logger = CrawlStation::Logger.logger = logger
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def concurrent_count
|
|
37
|
+
@_concurrent_count ||= ENV['CRAWL_STATION_CONCURRENT_COUNT'] || 1
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def concurrent_count=(count)
|
|
41
|
+
@_concurrent_count = count
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def schedule
|
|
45
|
+
Schedule.adapter
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def schedule=(item)
|
|
49
|
+
Schedule.adapter = item
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def cache
|
|
53
|
+
Cache.adapter
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def cache=(item)
|
|
57
|
+
Cache.adapter = item
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def proxies
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def proxies(item)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def load_tasks
|
|
67
|
+
task_path = File.expand_path('../../', __FILE__)
|
|
68
|
+
[
|
|
69
|
+
"#{task_path}/tasks/*.rake",
|
|
70
|
+
"#{CrawlStation.root}/lib/tasks/**/*.rake"
|
|
71
|
+
].each { |path| Dir[path].each { |f| load f } }
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def load_modules
|
|
75
|
+
%w(item parser config).each do |path|
|
|
76
|
+
Dir["#{CS.root}/module/*/#{path}/**/*.rb"].each { |f| require f }
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def init_application
|
|
81
|
+
@config ||= CrawlStation::Configuration
|
|
82
|
+
Dir["#{CS.root}/config/initializers/**/*.rb"].each { |f| require f }
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def config_adapter
|
|
86
|
+
adapter = @config.adapter || 'memory'
|
|
87
|
+
CS.schedule = adapter
|
|
88
|
+
CS.cache = adapter
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def config_parsers
|
|
92
|
+
parsers = @config.parsers || []
|
|
93
|
+
parsers.each { |p| schedule.push p }
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def boot
|
|
97
|
+
init_application
|
|
98
|
+
load_modules
|
|
99
|
+
config_adapter
|
|
100
|
+
config_parsers
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
require 'active_support/concern'
|
|
2
|
+
module CrawlStation
|
|
3
|
+
module Concerns
|
|
4
|
+
module ParserClassConcern
|
|
5
|
+
extend ActiveSupport::Concern
|
|
6
|
+
|
|
7
|
+
included do
|
|
8
|
+
def parser_class
|
|
9
|
+
path = "#{namespace}/parser/#{parser}"
|
|
10
|
+
path.camelize.constantize
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def item_class
|
|
14
|
+
path = "#{namespace}/item/#{parser}"
|
|
15
|
+
path.camelize.constantize
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
class CreateCrawlStationSchedules < ActiveRecord::Migration[5.0]
|
|
2
|
+
def change
|
|
3
|
+
create_table :crawl_station_schedules do |t|
|
|
4
|
+
t.string :namespace
|
|
5
|
+
t.string :parser
|
|
6
|
+
t.string :link, index: true
|
|
7
|
+
t.integer :status, default: 0
|
|
8
|
+
t.timestamps null: false
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
end
|
|
@@ -2,6 +2,7 @@ require 'ostruct'
|
|
|
2
2
|
module CrawlStation
|
|
3
3
|
class ParseStruct
|
|
4
4
|
extend Forwardable
|
|
5
|
+
include Concerns::ParserClassConcern
|
|
5
6
|
attr_accessor :parse
|
|
6
7
|
|
|
7
8
|
%w(namespace parser item link).each do |method_name|
|
|
@@ -14,16 +15,6 @@ module CrawlStation
|
|
|
14
15
|
@parse.deep_symbolize_keys!
|
|
15
16
|
end
|
|
16
17
|
|
|
17
|
-
def parser_class
|
|
18
|
-
path = "#{namespace}/parser/#{parser}"
|
|
19
|
-
path.camelize.constantize
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def item_class
|
|
23
|
-
path = "#{namespace}/item/#{parser}"
|
|
24
|
-
path.camelize.constantize
|
|
25
|
-
end
|
|
26
|
-
|
|
27
18
|
def [](item)
|
|
28
19
|
get_value(item)
|
|
29
20
|
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module CrawlStation
|
|
2
|
+
module Model
|
|
3
|
+
class Schedule < ApplicationRecord
|
|
4
|
+
include Concerns::ParserClassConcern
|
|
5
|
+
establish_connection database_config
|
|
6
|
+
|
|
7
|
+
enum status: [:waiting, :progressing, :done, :failed]
|
|
8
|
+
|
|
9
|
+
default_scope -> { order(id: :desc) }
|
|
10
|
+
|
|
11
|
+
scope :waitings, -> { where(status: :waiting) }
|
|
12
|
+
scope :progressings, -> { where(status: :progressing) }
|
|
13
|
+
scope :progressed, -> { where(status: [:done]) }
|
|
14
|
+
scope :recent_1_day, -> { where('created_at > ?', 1.day.ago) }
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -1,17 +1,45 @@
|
|
|
1
1
|
module CrawlStation
|
|
2
2
|
module ScheduleAdapters
|
|
3
3
|
class DbAdapter < AbstractAdapter
|
|
4
|
-
def push(
|
|
4
|
+
def push(item)
|
|
5
|
+
item = ParseStruct.new(item) if item.is_a?(Hash)
|
|
6
|
+
schedule.new(
|
|
7
|
+
parser: item.parser,
|
|
8
|
+
namespace: item.namespace,
|
|
9
|
+
link: item.link
|
|
10
|
+
).save
|
|
5
11
|
end
|
|
6
12
|
def pop
|
|
13
|
+
schedule.transaction do
|
|
14
|
+
model = schedule.waitings.first
|
|
15
|
+
model.progressing!
|
|
16
|
+
model
|
|
17
|
+
end
|
|
7
18
|
end
|
|
8
19
|
def empty?
|
|
20
|
+
schedule.waitings.size.zero?
|
|
9
21
|
end
|
|
10
22
|
|
|
11
|
-
def failed(
|
|
23
|
+
def failed(item)
|
|
24
|
+
return if item.nil?
|
|
25
|
+
schedule.transaction do
|
|
26
|
+
model = schedule.find_by(id: item.id)
|
|
27
|
+
model.failed! if item.present?
|
|
28
|
+
end
|
|
12
29
|
end
|
|
13
30
|
|
|
14
|
-
def done(
|
|
31
|
+
def done(item)
|
|
32
|
+
return if item.nil?
|
|
33
|
+
schedule.transaction do
|
|
34
|
+
model = schedule.find_by(id: item.id)
|
|
35
|
+
model.done! if item.present?
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def schedule
|
|
42
|
+
@_schedule ||= CS::Model::Schedule
|
|
15
43
|
end
|
|
16
44
|
end
|
|
17
45
|
end
|
|
@@ -1,28 +1,46 @@
|
|
|
1
1
|
namespace :db do
|
|
2
|
+
def exec_task(task, args)
|
|
3
|
+
Rake::Task[task].reenable
|
|
4
|
+
Rake::Task[task].invoke(args)
|
|
5
|
+
end
|
|
6
|
+
|
|
2
7
|
def db_operation(operator)
|
|
3
|
-
|
|
8
|
+
task_name = "db:#{operator}"
|
|
9
|
+
exec_task(task_name, 'crawl_station')
|
|
4
10
|
Dir["#{CrawlStation.root}/module/*"].each do |dir|
|
|
5
|
-
module_name =
|
|
6
|
-
|
|
11
|
+
module_name = dir.split('/').last
|
|
12
|
+
exec_task(task_name, module_name)
|
|
7
13
|
end
|
|
8
14
|
end
|
|
9
15
|
|
|
10
16
|
desc 'db:create[module_name] if module_name is nil, create all module databases'
|
|
11
|
-
task :create, [:module_name]
|
|
17
|
+
task :create, [:module_name] do |_t, args|
|
|
12
18
|
m_name = args[:module_name]
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
19
|
+
if m_name.nil?
|
|
20
|
+
CS.logger.debug 'db create all database'
|
|
21
|
+
db_operation(:create)
|
|
22
|
+
CS.logger.debug 'db create all done'
|
|
23
|
+
else
|
|
24
|
+
m_name = nil if m_name == 'crawl_station'
|
|
25
|
+
CS.logger.debug("create database #{m_name}")
|
|
26
|
+
CrawlStation::Utils.create_database(m_name)
|
|
27
|
+
CS.logger.debug("create database #{m_name} done")
|
|
28
|
+
end
|
|
16
29
|
end
|
|
17
30
|
|
|
18
31
|
desc 'db:migrate[module_name] if module_name is nil, migrate all module migrations'
|
|
19
32
|
task :migrate, [:module_name] => :environment do |_t, args|
|
|
20
33
|
version = ENV['VERSION']
|
|
21
34
|
module_name = args[:module_name]
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
35
|
+
if module_name.nil?
|
|
36
|
+
CS.logger.debug 'db migrate all migration'
|
|
37
|
+
db_operation(:migrate)
|
|
38
|
+
CS.logger.debug 'db migrate all done'
|
|
39
|
+
else
|
|
40
|
+
path = "#{CrawlStation::Utils.module_path(module_name)}/db/migrate"
|
|
41
|
+
path = "#{CrawlStation::Utils.gem_path}/db/migrate" if module_name == 'crawl_station'
|
|
42
|
+
ActiveRecord::Migrator.migrate(path, version ? version.to_i : nil)
|
|
43
|
+
end
|
|
26
44
|
end
|
|
27
45
|
|
|
28
46
|
task :environment, [:module_name] do |_t, args|
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
default: &default
|
|
2
|
+
adapter: 'mysql2'
|
|
3
|
+
host: <%%= ENV['CRAWL_STATION_DBHOST'] || '0.0.0.0' %>
|
|
4
|
+
port: <%%= ENV['CRAWL_STATION_DBPORT'] || 3306 %>
|
|
5
|
+
username: <%%= ENV['CRAWL_STATION_DBUSERNAME'] || 'root' %>
|
|
6
|
+
password: <%%= ENV['CRAWL_STATION_DBPASSWORD'] || 'my-secret-pw' %>
|
|
7
|
+
database: <%%= ENV['CRAWL_STATION_DBDATABASE'] || '<%= project_name %>_default' %>
|
|
8
|
+
|
|
9
|
+
development:
|
|
10
|
+
<<: *default
|
|
11
|
+
database: <%%= ENV['CRAWL_STATION_DBDATABASE'] || '<%= project_name %>_development' %>
|
|
12
|
+
|
|
13
|
+
test:
|
|
14
|
+
<<: *default
|
|
15
|
+
database: <%%= ENV['CRAWL_STATION_DBDATABASE'] || '<%= project_name %>_test' %>
|
|
16
|
+
|
|
17
|
+
product:
|
|
18
|
+
<<: *default
|
|
19
|
+
database: <%%= ENV['CRAWL_STATION_DBDATABASE'] || '<%= project_name %>_product' %>
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
data/lib/crawl_station/templates/create/{config → dirs/config}/initializers/station_config.rb
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
data/lib/crawl_station/utils.rb
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
1
2
|
require 'yaml'
|
|
2
3
|
require 'active_support/core_ext'
|
|
3
4
|
module CrawlStation
|
|
@@ -20,8 +21,20 @@ module CrawlStation
|
|
|
20
21
|
::YAML.load(result).deep_symbolize_keys[CS.env.to_sym]
|
|
21
22
|
end
|
|
22
23
|
|
|
24
|
+
def create_database(module_name)
|
|
25
|
+
config = database_config(module_name)
|
|
26
|
+
ActiveRecord::Base.logger = CS.logger
|
|
27
|
+
begin
|
|
28
|
+
ActiveRecord::Base.establish_connection config
|
|
29
|
+
ActiveRecord::Base.connection
|
|
30
|
+
rescue
|
|
31
|
+
ActiveRecord::Base.establish_connection config.merge(database: nil)
|
|
32
|
+
ActiveRecord::Base.connection.create_database config[:database]
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
23
36
|
def templates_path
|
|
24
|
-
"#{gem_path}/
|
|
37
|
+
"#{gem_path}/templates"
|
|
25
38
|
end
|
|
26
39
|
|
|
27
40
|
def template_filepath(path)
|
|
@@ -29,7 +42,16 @@ module CrawlStation
|
|
|
29
42
|
end
|
|
30
43
|
|
|
31
44
|
def gem_path
|
|
32
|
-
File.expand_path('../../', __FILE__)
|
|
45
|
+
File.expand_path('../../crawl_station/', __FILE__)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def render_context(path, opts = {})
|
|
49
|
+
template = IO.read(path)
|
|
50
|
+
ERB.new(template).result(OpenStruct.new(opts).instance_eval { binding })
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def render(dest_path, context)
|
|
54
|
+
File.open(dest_path,'w+') { |f| f.write context }
|
|
33
55
|
end
|
|
34
56
|
end
|
|
35
57
|
end
|
data/lib/crawl_station.rb
CHANGED
|
@@ -48,105 +48,19 @@ module CrawlStation # :nodoc:
|
|
|
48
48
|
extend ActiveSupport::Autoload
|
|
49
49
|
|
|
50
50
|
autoload :AdapterConcern
|
|
51
|
+
autoload :ParserClassConcern
|
|
52
|
+
autoload :CrawlStationClass
|
|
51
53
|
end
|
|
52
54
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
yield @config if block_given?
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
def env
|
|
59
|
-
@_env ||= ActiveSupport::StringInquirer.new(ENV['CRAWL_STATION_ENV'] || 'development')
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
def env=(environment)
|
|
63
|
-
@_env = ActiveSupport::StringInquirer.new(environment)
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
def root
|
|
67
|
-
Pathname.new(File.expand_path('.'))
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
def logger
|
|
71
|
-
@_logger ||= begin
|
|
72
|
-
CrawlStation::Logger.logger ||= ::Logger.new(STDERR) do |log|
|
|
73
|
-
log.level = ::Logger.DEBUG
|
|
74
|
-
end
|
|
75
|
-
end
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
def logger=(logger)
|
|
79
|
-
@_logger = CrawlStation::Logger.logger = logger
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
def concurrent_count
|
|
83
|
-
@_concurrent_count ||= ENV['CRAWL_STATION_CONCURRENT_COUNT'] || 1
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
def concurrent_count=(count)
|
|
87
|
-
@_concurrent_count = count
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
def schedule
|
|
91
|
-
Schedule.adapter
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
def schedule=(item)
|
|
95
|
-
Schedule.adpater(item)
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
def cache
|
|
99
|
-
Cache.adapter
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
def cache=(item)
|
|
103
|
-
Cache.adapter(item)
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
def proxies
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
def proxies(item)
|
|
110
|
-
end
|
|
111
|
-
|
|
112
|
-
def load_tasks
|
|
113
|
-
task_path = File.expand_path('../', __FILE__)
|
|
114
|
-
[
|
|
115
|
-
"#{task_path}/crawl_station/tasks/*.rake",
|
|
116
|
-
"#{CrawlStation.root}/lib/tasks/**/*.rake"
|
|
117
|
-
].each { |path| Dir[path].each { |f| load f } }
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
def load_modules
|
|
121
|
-
%w(item parser config).each do |path|
|
|
122
|
-
Dir["#{CS.root}/module/*/#{path}/**/*.rb"].each { |f| require f }
|
|
123
|
-
end
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
def init_application
|
|
127
|
-
@config ||= CrawlStation::Configuration
|
|
128
|
-
Dir["#{CS.root}/config/initializers/**/*.rb"].each { |f| require f }
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
def config_adapter
|
|
132
|
-
adapter = @config.adapter || 'memory'
|
|
133
|
-
schedule = adapter
|
|
134
|
-
cache = adapter
|
|
135
|
-
end
|
|
136
|
-
|
|
137
|
-
def config_parsers
|
|
138
|
-
parsers = @config.parsers || []
|
|
139
|
-
parsers.each { |p| schedule.push p }
|
|
140
|
-
end
|
|
55
|
+
module Model
|
|
56
|
+
extend ActiveSupport::Autoload
|
|
141
57
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
load_modules
|
|
145
|
-
config_adapter
|
|
146
|
-
config_parsers
|
|
147
|
-
end
|
|
58
|
+
autoload :Cache
|
|
59
|
+
autoload :Schedule
|
|
148
60
|
end
|
|
149
61
|
end
|
|
150
62
|
|
|
151
63
|
CS = CrawlStation
|
|
64
|
+
CS.send(:include, CS::Concerns::CrawlStationClass)
|
|
65
|
+
|
|
152
66
|
Celluloid.boot
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: crawl_station
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- watsy0007
|
|
@@ -141,10 +141,16 @@ files:
|
|
|
141
141
|
- lib/crawl_station/command/create.rb
|
|
142
142
|
- lib/crawl_station/command/generate.rb
|
|
143
143
|
- lib/crawl_station/concerns/adapter_concern.rb
|
|
144
|
+
- lib/crawl_station/concerns/crawl_station_class.rb
|
|
145
|
+
- lib/crawl_station/concerns/parser_class_concern.rb
|
|
144
146
|
- lib/crawl_station/configuration.rb
|
|
147
|
+
- lib/crawl_station/db/migrate/001_create_crawl_station_schedules.rb
|
|
148
|
+
- lib/crawl_station/db/migrate/002_create_crawl_station_caches.rb
|
|
145
149
|
- lib/crawl_station/fundation/parse_struct.rb
|
|
146
150
|
- lib/crawl_station/launcher.rb
|
|
147
151
|
- lib/crawl_station/logger.rb
|
|
152
|
+
- lib/crawl_station/model/cache.rb
|
|
153
|
+
- lib/crawl_station/model/schedule.rb
|
|
148
154
|
- lib/crawl_station/producer.rb
|
|
149
155
|
- lib/crawl_station/ruby_version_check.rb
|
|
150
156
|
- lib/crawl_station/schedule.rb
|
|
@@ -155,12 +161,13 @@ files:
|
|
|
155
161
|
- lib/crawl_station/tasks/db.rake
|
|
156
162
|
- lib/crawl_station/tasks/launcher.rake
|
|
157
163
|
- lib/crawl_station/tasks/module.rake
|
|
158
|
-
- lib/crawl_station/templates/create/
|
|
159
|
-
- lib/crawl_station/templates/create/
|
|
160
|
-
- lib/crawl_station/templates/create/
|
|
161
|
-
- lib/crawl_station/templates/create/config/
|
|
162
|
-
- lib/crawl_station/templates/create/
|
|
163
|
-
- lib/crawl_station/templates/create/
|
|
164
|
+
- lib/crawl_station/templates/create/database.erb.yml
|
|
165
|
+
- lib/crawl_station/templates/create/dirs/Gemfile
|
|
166
|
+
- lib/crawl_station/templates/create/dirs/Rakefile
|
|
167
|
+
- lib/crawl_station/templates/create/dirs/config/boot.rb
|
|
168
|
+
- lib/crawl_station/templates/create/dirs/config/initializers/station_config.rb
|
|
169
|
+
- lib/crawl_station/templates/create/dirs/lib/tasks/.gitkeep
|
|
170
|
+
- lib/crawl_station/templates/create/dirs/module/.gitkeep
|
|
164
171
|
- lib/crawl_station/templates/generate/item.erb
|
|
165
172
|
- lib/crawl_station/templates/generate/migration.erb
|
|
166
173
|
- lib/crawl_station/templates/generate/module/config/.gitkeep
|