dca 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +2 -0
- data/Gemfile +48 -0
- data/Gemfile.lock +126 -0
- data/Guardfile +8 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +48 -0
- data/VERSION +1 -0
- data/bin/dca +5 -0
- data/dca.gemspec +160 -0
- data/lib/dca.rb +64 -0
- data/lib/dca/cli.rb +32 -0
- data/lib/dca/commands/area.rb +133 -0
- data/lib/dca/commands/templates/area/analyzer.rb.erb +34 -0
- data/lib/dca/commands/templates/area/area.rb.erb +2 -0
- data/lib/dca/commands/templates/area/models.rb.erb +2 -0
- data/lib/dca/commands/templates/area/page.rb.erb +17 -0
- data/lib/dca/commands/templates/area/position.rb.erb +8 -0
- data/lib/dca/commands/templates/config.yml.erb +38 -0
- data/lib/dca/commands/templates/spec/analyzer_spec.rb.erb +15 -0
- data/lib/dca/commands/templates/spec/spec_helper.rb.erb +2 -0
- data/lib/dca/config.rb +20 -0
- data/lib/dca/helpers.rb +2 -0
- data/lib/dca/helpers/logger.rb +50 -0
- data/lib/dca/jobs.rb +3 -0
- data/lib/dca/jobs/analyzer_job.rb +119 -0
- data/lib/dca/jobs/job.rb +62 -0
- data/lib/dca/models.rb +5 -0
- data/lib/dca/models/base_model.rb +73 -0
- data/lib/dca/models/binder.rb +68 -0
- data/lib/dca/models/binder_helper.rb +48 -0
- data/lib/dca/models/nokogiri_binder.rb +43 -0
- data/lib/dca/models/position.rb +15 -0
- data/lib/dca/net.rb +1 -0
- data/lib/dca/net/browser_helper.rb +20 -0
- data/lib/dca/notifier.rb +2 -0
- data/lib/dca/notifier/notifier.rb +11 -0
- data/lib/dca/notifier/redis/models/analyze_notify.rb +12 -0
- data/lib/dca/notifier/redis/models/failure_notify.rb +8 -0
- data/lib/dca/notifier/redis/models/fetch_notify.rb +15 -0
- data/lib/dca/notifier/redis/models/session.rb +52 -0
- data/lib/dca/notifier/redis/notifier.rb +25 -0
- data/lib/dca/notifier/redis_notifier.rb +9 -0
- data/lib/dca/storage.rb +3 -0
- data/lib/dca/storage/elasticsearch_storage.rb +80 -0
- data/lib/dca/storage/mongo_storage.rb +51 -0
- data/lib/dca/storage/storage.rb +55 -0
- data/spec/analyzer_spec.rb +64 -0
- data/spec/area_task_spec.rb +45 -0
- data/spec/base_model_spec.rb +34 -0
- data/spec/binder_spec.rb +69 -0
- data/spec/config.yml +18 -0
- data/spec/elasticsearch_storage_spec.rb +28 -0
- data/spec/fixtures/page.html +12 -0
- data/spec/fixtures/positions.yml +13 -0
- data/spec/fixtures/positions_with_error.yml +14 -0
- data/spec/fixtures/states.yml +3 -0
- data/spec/job_spec.rb +31 -0
- data/spec/mock/analyzer_job.rb +30 -0
- data/spec/mock/file_storage.rb +28 -0
- data/spec/mock/notify_object.rb +13 -0
- data/spec/mock/page.rb +13 -0
- data/spec/mock/position.rb +40 -0
- data/spec/mock/web_notifier.rb +30 -0
- data/spec/mongo_storage_spec.rb +20 -0
- data/spec/redis_notifier_spec.rb +98 -0
- data/spec/spec_helper.rb +27 -0
- data/spec/support/storage_examples.rb +103 -0
- metadata +408 -0
data/lib/dca/jobs/job.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
module DCA
|
2
|
+
module Jobs
|
3
|
+
class Job
|
4
|
+
include Helpers::Logger
|
5
|
+
logger_name :queue
|
6
|
+
|
7
|
+
attr_reader :options
|
8
|
+
|
9
|
+
def self.queue
|
10
|
+
self.to_s.split("::")[-2]
|
11
|
+
end
|
12
|
+
|
13
|
+
# Add a job to queue. Queue name is a class module name
|
14
|
+
def self.create(options={})
|
15
|
+
Resque.enqueue(self, options)
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.perform(options={})
|
19
|
+
instance = new options.symbolize_keys!
|
20
|
+
instance.safe_perform!
|
21
|
+
instance
|
22
|
+
end
|
23
|
+
|
24
|
+
def initialize(options = {})
|
25
|
+
@options = options
|
26
|
+
|
27
|
+
trap('QUIT') { shutdown }
|
28
|
+
end
|
29
|
+
|
30
|
+
def safe_perform!
|
31
|
+
perform
|
32
|
+
on_success if respond_to?(:on_success)
|
33
|
+
rescue Exception => exception
|
34
|
+
if respond_to?(:on_failure)
|
35
|
+
on_failure(exception)
|
36
|
+
else
|
37
|
+
raise exception
|
38
|
+
end
|
39
|
+
ensure
|
40
|
+
destroy
|
41
|
+
end
|
42
|
+
|
43
|
+
def perform
|
44
|
+
raise NotImplementedError
|
45
|
+
end
|
46
|
+
|
47
|
+
def destroy
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
def shutdown?
|
52
|
+
@shutdown
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def shutdown
|
58
|
+
@shutdown = true
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
data/lib/dca/models.rb
ADDED
@@ -0,0 +1,5 @@
|
|
1
|
+
require File.expand_path('../models/binder', __FILE__)
|
2
|
+
require File.expand_path('../models/binder_helper', __FILE__)
|
3
|
+
require File.expand_path('../models/nokogiri_binder', __FILE__)
|
4
|
+
require File.expand_path('../models/base_model', __FILE__)
|
5
|
+
require File.expand_path('../models/position', __FILE__)
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module DCA
|
2
|
+
module Models
|
3
|
+
class BaseModel
|
4
|
+
extend ActiveModel::Naming
|
5
|
+
extend ActiveModel::Translation
|
6
|
+
include ActiveModel::Validations
|
7
|
+
include ActiveModel::Conversion
|
8
|
+
include ActiveModel::Serialization
|
9
|
+
include Binder
|
10
|
+
include DCA::Storage
|
11
|
+
|
12
|
+
establish_connection
|
13
|
+
|
14
|
+
set_callback :validate, :before, :validate_associations
|
15
|
+
|
16
|
+
attr_accessor :id, :base_id, :created_at, :updated_at
|
17
|
+
|
18
|
+
def initialize(params={})
|
19
|
+
params.each { |attr, value| self.instance_variable_set "@#{attr}", value } if params
|
20
|
+
end
|
21
|
+
|
22
|
+
def persisted?
|
23
|
+
true
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_hash
|
27
|
+
include = []
|
28
|
+
self.class.associations(true).each { |field, options| include << field.to_s}
|
29
|
+
self.serializable_hash include: include
|
30
|
+
end
|
31
|
+
|
32
|
+
def attributes
|
33
|
+
return @attributes unless @attributes.nil?
|
34
|
+
|
35
|
+
@attributes = Hash[instance_variables.map { |var| [var.to_s.delete('@'), instance_variable_get(var)]}]
|
36
|
+
@attributes.delete 'errors'
|
37
|
+
@attributes.delete 'validation_context'
|
38
|
+
|
39
|
+
@attributes
|
40
|
+
end
|
41
|
+
|
42
|
+
def before_update
|
43
|
+
self.updated_at = Time.now.utc
|
44
|
+
end
|
45
|
+
|
46
|
+
def before_create
|
47
|
+
self.created_at = Time.now.utc
|
48
|
+
end
|
49
|
+
|
50
|
+
def validate_associations
|
51
|
+
self.class.associations.each do |field, options|
|
52
|
+
object = self.send(field)
|
53
|
+
next if object.nil?
|
54
|
+
|
55
|
+
if object.is_a? Array
|
56
|
+
object.each { |item| validate_child item, field }
|
57
|
+
else
|
58
|
+
validate_child object, field
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def validate_child object, field
|
66
|
+
if object.respond_to?(:invalid?) && object.invalid?
|
67
|
+
self.errors.add field, object.errors.full_messages
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module DCA
|
2
|
+
module Models
|
3
|
+
module Binder
|
4
|
+
extend ActiveSupport::Concern
|
5
|
+
|
6
|
+
COMPLEX_TYPE = [:integer, :float, :string, :symbol, :datetime]
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
def binder name = :nokogiri
|
10
|
+
@binder ||= "DCA::Models::#{name.to_s.camelize}Binder".constantize
|
11
|
+
end
|
12
|
+
|
13
|
+
def has_one field, *args
|
14
|
+
options = args.extract_options!
|
15
|
+
type = args.first
|
16
|
+
add_association field, :one, type, options
|
17
|
+
end
|
18
|
+
|
19
|
+
def has_many field, *args
|
20
|
+
options = args.extract_options!
|
21
|
+
type = args.first
|
22
|
+
add_association field, :many, type, options
|
23
|
+
end
|
24
|
+
|
25
|
+
def associations complex = false
|
26
|
+
@associations ||= {}
|
27
|
+
|
28
|
+
return associations.select { |field, options| !COMPLEX_TYPE.include?(options[:type]) } if complex
|
29
|
+
|
30
|
+
@associations
|
31
|
+
end
|
32
|
+
|
33
|
+
def inherited(child)
|
34
|
+
associations.each { |field, options| child.associations[field] = options}
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def add_association field, association, type, options = {}
|
40
|
+
associations[field] = { :association => association, :field => field, :type => type, :options => options }
|
41
|
+
instance_eval do
|
42
|
+
attr_accessor field.to_sym unless instance_variable_defined? "@#{field}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def bind content
|
48
|
+
self.class.associations.each do |field, options|
|
49
|
+
update field, self.class.binder.parse(self, content, options), options[:options][:append]
|
50
|
+
end
|
51
|
+
self
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def update field, value, append = false
|
57
|
+
if append
|
58
|
+
new_value = self.instance_variable_get("@#{field.to_s}")
|
59
|
+
new_value = new_value ? new_value + value : value
|
60
|
+
self.instance_variable_set "@#{field.to_s}", new_value
|
61
|
+
else
|
62
|
+
self.instance_variable_set "@#{field.to_s}", value
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module DCA
|
2
|
+
module Models
|
3
|
+
module BinderHelper
|
4
|
+
extend ActiveSupport::Concern
|
5
|
+
|
6
|
+
module ClassMethods
|
7
|
+
def convert value, type
|
8
|
+
case type
|
9
|
+
when :integer
|
10
|
+
value.to_s.gsub(/[^\d.,]/,'').gsub(/,/,'.').to_i
|
11
|
+
when :float
|
12
|
+
value.to_s.gsub(/[^\d.,]/,'').gsub(/,/,'.').to_f
|
13
|
+
when :string
|
14
|
+
value.to_s.strip
|
15
|
+
when :symbol
|
16
|
+
value.to_s.to_sym
|
17
|
+
when :datetime
|
18
|
+
DateTime.parse(value).to_time.utc unless value.nil?
|
19
|
+
else
|
20
|
+
value
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse_options object, value, options
|
25
|
+
result = value
|
26
|
+
if result.nil?
|
27
|
+
result = options[:default] unless options[:default].nil?
|
28
|
+
else
|
29
|
+
result = value[options[:regex], 1] unless options[:regex].nil?
|
30
|
+
end
|
31
|
+
|
32
|
+
result = object.send(options[:parser], result) unless options[:parser].nil?
|
33
|
+
|
34
|
+
result
|
35
|
+
end
|
36
|
+
|
37
|
+
def find_type object, field, polymorphic = nil
|
38
|
+
type_name = field.to_s.singularize.camelize
|
39
|
+
type_name = "#{object.send(polymorphic).to_s.camelize}#{type_name}" if polymorphic
|
40
|
+
type = type_name.safe_constantize
|
41
|
+
type = "#{object.class.to_s.deconstantize}::#{type_name}".constantize if type.nil?
|
42
|
+
|
43
|
+
type
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module DCA
|
2
|
+
module Models
|
3
|
+
class NokogiriBinder
|
4
|
+
include BinderHelper
|
5
|
+
|
6
|
+
def self.parse object, content, params
|
7
|
+
if params[:association] == :one
|
8
|
+
parse_one object, content, params
|
9
|
+
elsif params[:association] == :many
|
10
|
+
parse_many object, content, params
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.parse_one object, content, params
|
15
|
+
result = nil
|
16
|
+
|
17
|
+
options = params[:options] || {}
|
18
|
+
selector = options[:selector]
|
19
|
+
element = selector.nil? ? content : content.at_css(selector)
|
20
|
+
unless element.nil?
|
21
|
+
result = options[:attribute].nil? ? element.content : element[options[:attribute]]
|
22
|
+
end
|
23
|
+
result = self.parse_options object, result, options
|
24
|
+
|
25
|
+
convert result, params[:type]
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.parse_many object, content, params
|
29
|
+
result = nil
|
30
|
+
|
31
|
+
options = params[:options] || {}
|
32
|
+
selector = options[:selector]
|
33
|
+
type = params[:type]
|
34
|
+
type = find_type object, params[:field], options[:polymorphic] if type.nil? || options[:polymorphic]
|
35
|
+
|
36
|
+
result = content.css(selector).map { |node| type.new.bind node } unless selector.nil?
|
37
|
+
result
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module DCA
|
2
|
+
module Models
|
3
|
+
class Position < BaseModel
|
4
|
+
attr_accessor :checksum, :published_at
|
5
|
+
|
6
|
+
validates_presence_of :base_id, :checksum
|
7
|
+
|
8
|
+
def initialize(*args)
|
9
|
+
# set instance variable id, need to place this attribute to hash, when id is not set.
|
10
|
+
@id = nil
|
11
|
+
super
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/dca/net.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require File.expand_path('../net/browser_helper', __FILE__)
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module DCA
|
2
|
+
module Net
|
3
|
+
module BrowserHelper
|
4
|
+
#def included(base)
|
5
|
+
# base.extend ClassMethods
|
6
|
+
#end
|
7
|
+
|
8
|
+
#module ClassMethods
|
9
|
+
#end
|
10
|
+
|
11
|
+
def browser(name = :ff, profile = 'default')
|
12
|
+
@browser ||= Watir::Browser.new name, :profile => profile
|
13
|
+
end
|
14
|
+
|
15
|
+
def browser_close
|
16
|
+
@browser.close if @browser
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/dca/notifier.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
module DCA
|
2
|
+
module Redis
|
3
|
+
class Session < Ohm::Model
|
4
|
+
attribute :uid
|
5
|
+
attribute :created
|
6
|
+
attribute :project
|
7
|
+
attribute :area
|
8
|
+
|
9
|
+
index :uid
|
10
|
+
index :project
|
11
|
+
index :area
|
12
|
+
|
13
|
+
set :analyzed, DCA::Redis::AnalyzeNotify
|
14
|
+
set :fetched, DCA::Redis::FetchNotify
|
15
|
+
set :failures, DCA::Redis::FailureNotify
|
16
|
+
|
17
|
+
def validate
|
18
|
+
assert_present :uid
|
19
|
+
end
|
20
|
+
|
21
|
+
def analyze_state state
|
22
|
+
self.analyzed.find(:state => state).first
|
23
|
+
end
|
24
|
+
|
25
|
+
def fetch_state state
|
26
|
+
self.fetched.find(:state => state).first
|
27
|
+
end
|
28
|
+
|
29
|
+
def inc_analyze state
|
30
|
+
notify = self.analyzed.find(:state => state).first
|
31
|
+
if notify.nil?
|
32
|
+
notify = AnalyzeNotify.create(:state => state)
|
33
|
+
self.analyzed.add notify
|
34
|
+
end
|
35
|
+
notify.incr :count
|
36
|
+
end
|
37
|
+
|
38
|
+
def inc_fetch state, result
|
39
|
+
notify = self.fetched.find(:state => state).first
|
40
|
+
if notify.nil?
|
41
|
+
notify = FetchNotify.create(:state => state)
|
42
|
+
self.fetched.add notify
|
43
|
+
end
|
44
|
+
notify.incr result
|
45
|
+
end
|
46
|
+
|
47
|
+
def add_failure exception
|
48
|
+
self.failures.add FailureNotify.create(:message => exception.message, :stack => exception.backtrace)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|