dca 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +2 -0
- data/Gemfile +48 -0
- data/Gemfile.lock +126 -0
- data/Guardfile +8 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +48 -0
- data/VERSION +1 -0
- data/bin/dca +5 -0
- data/dca.gemspec +160 -0
- data/lib/dca.rb +64 -0
- data/lib/dca/cli.rb +32 -0
- data/lib/dca/commands/area.rb +133 -0
- data/lib/dca/commands/templates/area/analyzer.rb.erb +34 -0
- data/lib/dca/commands/templates/area/area.rb.erb +2 -0
- data/lib/dca/commands/templates/area/models.rb.erb +2 -0
- data/lib/dca/commands/templates/area/page.rb.erb +17 -0
- data/lib/dca/commands/templates/area/position.rb.erb +8 -0
- data/lib/dca/commands/templates/config.yml.erb +38 -0
- data/lib/dca/commands/templates/spec/analyzer_spec.rb.erb +15 -0
- data/lib/dca/commands/templates/spec/spec_helper.rb.erb +2 -0
- data/lib/dca/config.rb +20 -0
- data/lib/dca/helpers.rb +2 -0
- data/lib/dca/helpers/logger.rb +50 -0
- data/lib/dca/jobs.rb +3 -0
- data/lib/dca/jobs/analyzer_job.rb +119 -0
- data/lib/dca/jobs/job.rb +62 -0
- data/lib/dca/models.rb +5 -0
- data/lib/dca/models/base_model.rb +73 -0
- data/lib/dca/models/binder.rb +68 -0
- data/lib/dca/models/binder_helper.rb +48 -0
- data/lib/dca/models/nokogiri_binder.rb +43 -0
- data/lib/dca/models/position.rb +15 -0
- data/lib/dca/net.rb +1 -0
- data/lib/dca/net/browser_helper.rb +20 -0
- data/lib/dca/notifier.rb +2 -0
- data/lib/dca/notifier/notifier.rb +11 -0
- data/lib/dca/notifier/redis/models/analyze_notify.rb +12 -0
- data/lib/dca/notifier/redis/models/failure_notify.rb +8 -0
- data/lib/dca/notifier/redis/models/fetch_notify.rb +15 -0
- data/lib/dca/notifier/redis/models/session.rb +52 -0
- data/lib/dca/notifier/redis/notifier.rb +25 -0
- data/lib/dca/notifier/redis_notifier.rb +9 -0
- data/lib/dca/storage.rb +3 -0
- data/lib/dca/storage/elasticsearch_storage.rb +80 -0
- data/lib/dca/storage/mongo_storage.rb +51 -0
- data/lib/dca/storage/storage.rb +55 -0
- data/spec/analyzer_spec.rb +64 -0
- data/spec/area_task_spec.rb +45 -0
- data/spec/base_model_spec.rb +34 -0
- data/spec/binder_spec.rb +69 -0
- data/spec/config.yml +18 -0
- data/spec/elasticsearch_storage_spec.rb +28 -0
- data/spec/fixtures/page.html +12 -0
- data/spec/fixtures/positions.yml +13 -0
- data/spec/fixtures/positions_with_error.yml +14 -0
- data/spec/fixtures/states.yml +3 -0
- data/spec/job_spec.rb +31 -0
- data/spec/mock/analyzer_job.rb +30 -0
- data/spec/mock/file_storage.rb +28 -0
- data/spec/mock/notify_object.rb +13 -0
- data/spec/mock/page.rb +13 -0
- data/spec/mock/position.rb +40 -0
- data/spec/mock/web_notifier.rb +30 -0
- data/spec/mongo_storage_spec.rb +20 -0
- data/spec/redis_notifier_spec.rb +98 -0
- data/spec/spec_helper.rb +27 -0
- data/spec/support/storage_examples.rb +103 -0
- metadata +408 -0
data/lib/dca/jobs/job.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
module DCA
|
2
|
+
module Jobs
|
3
|
+
class Job
|
4
|
+
include Helpers::Logger
|
5
|
+
logger_name :queue
|
6
|
+
|
7
|
+
attr_reader :options
|
8
|
+
|
9
|
+
def self.queue
|
10
|
+
self.to_s.split("::")[-2]
|
11
|
+
end
|
12
|
+
|
13
|
+
# Add a job to queue. Queue name is a class module name
|
14
|
+
def self.create(options={})
|
15
|
+
Resque.enqueue(self, options)
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.perform(options={})
|
19
|
+
instance = new options.symbolize_keys!
|
20
|
+
instance.safe_perform!
|
21
|
+
instance
|
22
|
+
end
|
23
|
+
|
24
|
+
def initialize(options = {})
|
25
|
+
@options = options
|
26
|
+
|
27
|
+
trap('QUIT') { shutdown }
|
28
|
+
end
|
29
|
+
|
30
|
+
def safe_perform!
|
31
|
+
perform
|
32
|
+
on_success if respond_to?(:on_success)
|
33
|
+
rescue Exception => exception
|
34
|
+
if respond_to?(:on_failure)
|
35
|
+
on_failure(exception)
|
36
|
+
else
|
37
|
+
raise exception
|
38
|
+
end
|
39
|
+
ensure
|
40
|
+
destroy
|
41
|
+
end
|
42
|
+
|
43
|
+
def perform
|
44
|
+
raise NotImplementedError
|
45
|
+
end
|
46
|
+
|
47
|
+
def destroy
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
def shutdown?
|
52
|
+
@shutdown
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def shutdown
|
58
|
+
@shutdown = true
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
data/lib/dca/models.rb
ADDED
@@ -0,0 +1,5 @@
|
|
1
|
+
require File.expand_path('../models/binder', __FILE__)
|
2
|
+
require File.expand_path('../models/binder_helper', __FILE__)
|
3
|
+
require File.expand_path('../models/nokogiri_binder', __FILE__)
|
4
|
+
require File.expand_path('../models/base_model', __FILE__)
|
5
|
+
require File.expand_path('../models/position', __FILE__)
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module DCA
|
2
|
+
module Models
|
3
|
+
class BaseModel
|
4
|
+
extend ActiveModel::Naming
|
5
|
+
extend ActiveModel::Translation
|
6
|
+
include ActiveModel::Validations
|
7
|
+
include ActiveModel::Conversion
|
8
|
+
include ActiveModel::Serialization
|
9
|
+
include Binder
|
10
|
+
include DCA::Storage
|
11
|
+
|
12
|
+
establish_connection
|
13
|
+
|
14
|
+
set_callback :validate, :before, :validate_associations
|
15
|
+
|
16
|
+
attr_accessor :id, :base_id, :created_at, :updated_at
|
17
|
+
|
18
|
+
def initialize(params={})
|
19
|
+
params.each { |attr, value| self.instance_variable_set "@#{attr}", value } if params
|
20
|
+
end
|
21
|
+
|
22
|
+
def persisted?
|
23
|
+
true
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_hash
|
27
|
+
include = []
|
28
|
+
self.class.associations(true).each { |field, options| include << field.to_s}
|
29
|
+
self.serializable_hash include: include
|
30
|
+
end
|
31
|
+
|
32
|
+
def attributes
|
33
|
+
return @attributes unless @attributes.nil?
|
34
|
+
|
35
|
+
@attributes = Hash[instance_variables.map { |var| [var.to_s.delete('@'), instance_variable_get(var)]}]
|
36
|
+
@attributes.delete 'errors'
|
37
|
+
@attributes.delete 'validation_context'
|
38
|
+
|
39
|
+
@attributes
|
40
|
+
end
|
41
|
+
|
42
|
+
def before_update
|
43
|
+
self.updated_at = Time.now.utc
|
44
|
+
end
|
45
|
+
|
46
|
+
def before_create
|
47
|
+
self.created_at = Time.now.utc
|
48
|
+
end
|
49
|
+
|
50
|
+
def validate_associations
|
51
|
+
self.class.associations.each do |field, options|
|
52
|
+
object = self.send(field)
|
53
|
+
next if object.nil?
|
54
|
+
|
55
|
+
if object.is_a? Array
|
56
|
+
object.each { |item| validate_child item, field }
|
57
|
+
else
|
58
|
+
validate_child object, field
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def validate_child object, field
|
66
|
+
if object.respond_to?(:invalid?) && object.invalid?
|
67
|
+
self.errors.add field, object.errors.full_messages
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module DCA
|
2
|
+
module Models
|
3
|
+
module Binder
|
4
|
+
extend ActiveSupport::Concern
|
5
|
+
|
6
|
+
COMPLEX_TYPE = [:integer, :float, :string, :symbol, :datetime]
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
def binder name = :nokogiri
|
10
|
+
@binder ||= "DCA::Models::#{name.to_s.camelize}Binder".constantize
|
11
|
+
end
|
12
|
+
|
13
|
+
def has_one field, *args
|
14
|
+
options = args.extract_options!
|
15
|
+
type = args.first
|
16
|
+
add_association field, :one, type, options
|
17
|
+
end
|
18
|
+
|
19
|
+
def has_many field, *args
|
20
|
+
options = args.extract_options!
|
21
|
+
type = args.first
|
22
|
+
add_association field, :many, type, options
|
23
|
+
end
|
24
|
+
|
25
|
+
def associations complex = false
|
26
|
+
@associations ||= {}
|
27
|
+
|
28
|
+
return associations.select { |field, options| !COMPLEX_TYPE.include?(options[:type]) } if complex
|
29
|
+
|
30
|
+
@associations
|
31
|
+
end
|
32
|
+
|
33
|
+
def inherited(child)
|
34
|
+
associations.each { |field, options| child.associations[field] = options}
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def add_association field, association, type, options = {}
|
40
|
+
associations[field] = { :association => association, :field => field, :type => type, :options => options }
|
41
|
+
instance_eval do
|
42
|
+
attr_accessor field.to_sym unless instance_variable_defined? "@#{field}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def bind content
|
48
|
+
self.class.associations.each do |field, options|
|
49
|
+
update field, self.class.binder.parse(self, content, options), options[:options][:append]
|
50
|
+
end
|
51
|
+
self
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def update field, value, append = false
|
57
|
+
if append
|
58
|
+
new_value = self.instance_variable_get("@#{field.to_s}")
|
59
|
+
new_value = new_value ? new_value + value : value
|
60
|
+
self.instance_variable_set "@#{field.to_s}", new_value
|
61
|
+
else
|
62
|
+
self.instance_variable_set "@#{field.to_s}", value
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module DCA
|
2
|
+
module Models
|
3
|
+
module BinderHelper
|
4
|
+
extend ActiveSupport::Concern
|
5
|
+
|
6
|
+
module ClassMethods
|
7
|
+
def convert value, type
|
8
|
+
case type
|
9
|
+
when :integer
|
10
|
+
value.to_s.gsub(/[^\d.,]/,'').gsub(/,/,'.').to_i
|
11
|
+
when :float
|
12
|
+
value.to_s.gsub(/[^\d.,]/,'').gsub(/,/,'.').to_f
|
13
|
+
when :string
|
14
|
+
value.to_s.strip
|
15
|
+
when :symbol
|
16
|
+
value.to_s.to_sym
|
17
|
+
when :datetime
|
18
|
+
DateTime.parse(value).to_time.utc unless value.nil?
|
19
|
+
else
|
20
|
+
value
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse_options object, value, options
|
25
|
+
result = value
|
26
|
+
if result.nil?
|
27
|
+
result = options[:default] unless options[:default].nil?
|
28
|
+
else
|
29
|
+
result = value[options[:regex], 1] unless options[:regex].nil?
|
30
|
+
end
|
31
|
+
|
32
|
+
result = object.send(options[:parser], result) unless options[:parser].nil?
|
33
|
+
|
34
|
+
result
|
35
|
+
end
|
36
|
+
|
37
|
+
def find_type object, field, polymorphic = nil
|
38
|
+
type_name = field.to_s.singularize.camelize
|
39
|
+
type_name = "#{object.send(polymorphic).to_s.camelize}#{type_name}" if polymorphic
|
40
|
+
type = type_name.safe_constantize
|
41
|
+
type = "#{object.class.to_s.deconstantize}::#{type_name}".constantize if type.nil?
|
42
|
+
|
43
|
+
type
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module DCA
|
2
|
+
module Models
|
3
|
+
class NokogiriBinder
|
4
|
+
include BinderHelper
|
5
|
+
|
6
|
+
def self.parse object, content, params
|
7
|
+
if params[:association] == :one
|
8
|
+
parse_one object, content, params
|
9
|
+
elsif params[:association] == :many
|
10
|
+
parse_many object, content, params
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.parse_one object, content, params
|
15
|
+
result = nil
|
16
|
+
|
17
|
+
options = params[:options] || {}
|
18
|
+
selector = options[:selector]
|
19
|
+
element = selector.nil? ? content : content.at_css(selector)
|
20
|
+
unless element.nil?
|
21
|
+
result = options[:attribute].nil? ? element.content : element[options[:attribute]]
|
22
|
+
end
|
23
|
+
result = self.parse_options object, result, options
|
24
|
+
|
25
|
+
convert result, params[:type]
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.parse_many object, content, params
|
29
|
+
result = nil
|
30
|
+
|
31
|
+
options = params[:options] || {}
|
32
|
+
selector = options[:selector]
|
33
|
+
type = params[:type]
|
34
|
+
type = find_type object, params[:field], options[:polymorphic] if type.nil? || options[:polymorphic]
|
35
|
+
|
36
|
+
result = content.css(selector).map { |node| type.new.bind node } unless selector.nil?
|
37
|
+
result
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module DCA
|
2
|
+
module Models
|
3
|
+
class Position < BaseModel
|
4
|
+
attr_accessor :checksum, :published_at
|
5
|
+
|
6
|
+
validates_presence_of :base_id, :checksum
|
7
|
+
|
8
|
+
def initialize(*args)
|
9
|
+
# set instance variable id, need to place this attribute to hash, when id is not set.
|
10
|
+
@id = nil
|
11
|
+
super
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/dca/net.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require File.expand_path('../net/browser_helper', __FILE__)
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module DCA
|
2
|
+
module Net
|
3
|
+
module BrowserHelper
|
4
|
+
#def included(base)
|
5
|
+
# base.extend ClassMethods
|
6
|
+
#end
|
7
|
+
|
8
|
+
#module ClassMethods
|
9
|
+
#end
|
10
|
+
|
11
|
+
def browser(name = :ff, profile = 'default')
|
12
|
+
@browser ||= Watir::Browser.new name, :profile => profile
|
13
|
+
end
|
14
|
+
|
15
|
+
def browser_close
|
16
|
+
@browser.close if @browser
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/dca/notifier.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
module DCA
|
2
|
+
module Redis
|
3
|
+
class Session < Ohm::Model
|
4
|
+
attribute :uid
|
5
|
+
attribute :created
|
6
|
+
attribute :project
|
7
|
+
attribute :area
|
8
|
+
|
9
|
+
index :uid
|
10
|
+
index :project
|
11
|
+
index :area
|
12
|
+
|
13
|
+
set :analyzed, DCA::Redis::AnalyzeNotify
|
14
|
+
set :fetched, DCA::Redis::FetchNotify
|
15
|
+
set :failures, DCA::Redis::FailureNotify
|
16
|
+
|
17
|
+
def validate
|
18
|
+
assert_present :uid
|
19
|
+
end
|
20
|
+
|
21
|
+
def analyze_state state
|
22
|
+
self.analyzed.find(:state => state).first
|
23
|
+
end
|
24
|
+
|
25
|
+
def fetch_state state
|
26
|
+
self.fetched.find(:state => state).first
|
27
|
+
end
|
28
|
+
|
29
|
+
def inc_analyze state
|
30
|
+
notify = self.analyzed.find(:state => state).first
|
31
|
+
if notify.nil?
|
32
|
+
notify = AnalyzeNotify.create(:state => state)
|
33
|
+
self.analyzed.add notify
|
34
|
+
end
|
35
|
+
notify.incr :count
|
36
|
+
end
|
37
|
+
|
38
|
+
def inc_fetch state, result
|
39
|
+
notify = self.fetched.find(:state => state).first
|
40
|
+
if notify.nil?
|
41
|
+
notify = FetchNotify.create(:state => state)
|
42
|
+
self.fetched.add notify
|
43
|
+
end
|
44
|
+
notify.incr result
|
45
|
+
end
|
46
|
+
|
47
|
+
def add_failure exception
|
48
|
+
self.failures.add FailureNotify.create(:message => exception.message, :stack => exception.backtrace)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|