dca 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. data/.document +5 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +48 -0
  4. data/Gemfile.lock +126 -0
  5. data/Guardfile +8 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.rdoc +19 -0
  8. data/Rakefile +48 -0
  9. data/VERSION +1 -0
  10. data/bin/dca +5 -0
  11. data/dca.gemspec +160 -0
  12. data/lib/dca.rb +64 -0
  13. data/lib/dca/cli.rb +32 -0
  14. data/lib/dca/commands/area.rb +133 -0
  15. data/lib/dca/commands/templates/area/analyzer.rb.erb +34 -0
  16. data/lib/dca/commands/templates/area/area.rb.erb +2 -0
  17. data/lib/dca/commands/templates/area/models.rb.erb +2 -0
  18. data/lib/dca/commands/templates/area/page.rb.erb +17 -0
  19. data/lib/dca/commands/templates/area/position.rb.erb +8 -0
  20. data/lib/dca/commands/templates/config.yml.erb +38 -0
  21. data/lib/dca/commands/templates/spec/analyzer_spec.rb.erb +15 -0
  22. data/lib/dca/commands/templates/spec/spec_helper.rb.erb +2 -0
  23. data/lib/dca/config.rb +20 -0
  24. data/lib/dca/helpers.rb +2 -0
  25. data/lib/dca/helpers/logger.rb +50 -0
  26. data/lib/dca/jobs.rb +3 -0
  27. data/lib/dca/jobs/analyzer_job.rb +119 -0
  28. data/lib/dca/jobs/job.rb +62 -0
  29. data/lib/dca/models.rb +5 -0
  30. data/lib/dca/models/base_model.rb +73 -0
  31. data/lib/dca/models/binder.rb +68 -0
  32. data/lib/dca/models/binder_helper.rb +48 -0
  33. data/lib/dca/models/nokogiri_binder.rb +43 -0
  34. data/lib/dca/models/position.rb +15 -0
  35. data/lib/dca/net.rb +1 -0
  36. data/lib/dca/net/browser_helper.rb +20 -0
  37. data/lib/dca/notifier.rb +2 -0
  38. data/lib/dca/notifier/notifier.rb +11 -0
  39. data/lib/dca/notifier/redis/models/analyze_notify.rb +12 -0
  40. data/lib/dca/notifier/redis/models/failure_notify.rb +8 -0
  41. data/lib/dca/notifier/redis/models/fetch_notify.rb +15 -0
  42. data/lib/dca/notifier/redis/models/session.rb +52 -0
  43. data/lib/dca/notifier/redis/notifier.rb +25 -0
  44. data/lib/dca/notifier/redis_notifier.rb +9 -0
  45. data/lib/dca/storage.rb +3 -0
  46. data/lib/dca/storage/elasticsearch_storage.rb +80 -0
  47. data/lib/dca/storage/mongo_storage.rb +51 -0
  48. data/lib/dca/storage/storage.rb +55 -0
  49. data/spec/analyzer_spec.rb +64 -0
  50. data/spec/area_task_spec.rb +45 -0
  51. data/spec/base_model_spec.rb +34 -0
  52. data/spec/binder_spec.rb +69 -0
  53. data/spec/config.yml +18 -0
  54. data/spec/elasticsearch_storage_spec.rb +28 -0
  55. data/spec/fixtures/page.html +12 -0
  56. data/spec/fixtures/positions.yml +13 -0
  57. data/spec/fixtures/positions_with_error.yml +14 -0
  58. data/spec/fixtures/states.yml +3 -0
  59. data/spec/job_spec.rb +31 -0
  60. data/spec/mock/analyzer_job.rb +30 -0
  61. data/spec/mock/file_storage.rb +28 -0
  62. data/spec/mock/notify_object.rb +13 -0
  63. data/spec/mock/page.rb +13 -0
  64. data/spec/mock/position.rb +40 -0
  65. data/spec/mock/web_notifier.rb +30 -0
  66. data/spec/mongo_storage_spec.rb +20 -0
  67. data/spec/redis_notifier_spec.rb +98 -0
  68. data/spec/spec_helper.rb +27 -0
  69. data/spec/support/storage_examples.rb +103 -0
  70. metadata +408 -0
@@ -0,0 +1,62 @@
1
+ module DCA
2
+ module Jobs
3
+ class Job
4
+ include Helpers::Logger
5
+ logger_name :queue
6
+
7
+ attr_reader :options
8
+
9
+ def self.queue
10
+ self.to_s.split("::")[-2]
11
+ end
12
+
13
+ # Add a job to queue. Queue name is a class module name
14
+ def self.create(options={})
15
+ Resque.enqueue(self, options)
16
+ end
17
+
18
+ def self.perform(options={})
19
+ instance = new options.symbolize_keys!
20
+ instance.safe_perform!
21
+ instance
22
+ end
23
+
24
+ def initialize(options = {})
25
+ @options = options
26
+
27
+ trap('QUIT') { shutdown }
28
+ end
29
+
30
+ def safe_perform!
31
+ perform
32
+ on_success if respond_to?(:on_success)
33
+ rescue Exception => exception
34
+ if respond_to?(:on_failure)
35
+ on_failure(exception)
36
+ else
37
+ raise exception
38
+ end
39
+ ensure
40
+ destroy
41
+ end
42
+
43
+ def perform
44
+ raise NotImplementedError
45
+ end
46
+
47
+ def destroy
48
+
49
+ end
50
+
51
+ def shutdown?
52
+ @shutdown
53
+ end
54
+
55
+ private
56
+
57
+ def shutdown
58
+ @shutdown = true
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,5 @@
1
+ require File.expand_path('../models/binder', __FILE__)
2
+ require File.expand_path('../models/binder_helper', __FILE__)
3
+ require File.expand_path('../models/nokogiri_binder', __FILE__)
4
+ require File.expand_path('../models/base_model', __FILE__)
5
+ require File.expand_path('../models/position', __FILE__)
@@ -0,0 +1,73 @@
1
+ module DCA
2
+ module Models
3
+ class BaseModel
4
+ extend ActiveModel::Naming
5
+ extend ActiveModel::Translation
6
+ include ActiveModel::Validations
7
+ include ActiveModel::Conversion
8
+ include ActiveModel::Serialization
9
+ include Binder
10
+ include DCA::Storage
11
+
12
+ establish_connection
13
+
14
+ set_callback :validate, :before, :validate_associations
15
+
16
+ attr_accessor :id, :base_id, :created_at, :updated_at
17
+
18
+ def initialize(params={})
19
+ params.each { |attr, value| self.instance_variable_set "@#{attr}", value } if params
20
+ end
21
+
22
+ def persisted?
23
+ true
24
+ end
25
+
26
+ def to_hash
27
+ include = []
28
+ self.class.associations(true).each { |field, options| include << field.to_s}
29
+ self.serializable_hash include: include
30
+ end
31
+
32
+ def attributes
33
+ return @attributes unless @attributes.nil?
34
+
35
+ @attributes = Hash[instance_variables.map { |var| [var.to_s.delete('@'), instance_variable_get(var)]}]
36
+ @attributes.delete 'errors'
37
+ @attributes.delete 'validation_context'
38
+
39
+ @attributes
40
+ end
41
+
42
+ def before_update
43
+ self.updated_at = Time.now.utc
44
+ end
45
+
46
+ def before_create
47
+ self.created_at = Time.now.utc
48
+ end
49
+
50
+ def validate_associations
51
+ self.class.associations.each do |field, options|
52
+ object = self.send(field)
53
+ next if object.nil?
54
+
55
+ if object.is_a? Array
56
+ object.each { |item| validate_child item, field }
57
+ else
58
+ validate_child object, field
59
+ end
60
+ end
61
+ end
62
+
63
+ private
64
+
65
+ def validate_child object, field
66
+ if object.respond_to?(:invalid?) && object.invalid?
67
+ self.errors.add field, object.errors.full_messages
68
+ end
69
+ end
70
+
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,68 @@
1
+ module DCA
2
+ module Models
3
+ module Binder
4
+ extend ActiveSupport::Concern
5
+
6
+ COMPLEX_TYPE = [:integer, :float, :string, :symbol, :datetime]
7
+
8
+ module ClassMethods
9
+ def binder name = :nokogiri
10
+ @binder ||= "DCA::Models::#{name.to_s.camelize}Binder".constantize
11
+ end
12
+
13
+ def has_one field, *args
14
+ options = args.extract_options!
15
+ type = args.first
16
+ add_association field, :one, type, options
17
+ end
18
+
19
+ def has_many field, *args
20
+ options = args.extract_options!
21
+ type = args.first
22
+ add_association field, :many, type, options
23
+ end
24
+
25
+ def associations complex = false
26
+ @associations ||= {}
27
+
28
+ return associations.select { |field, options| !COMPLEX_TYPE.include?(options[:type]) } if complex
29
+
30
+ @associations
31
+ end
32
+
33
+ def inherited(child)
34
+ associations.each { |field, options| child.associations[field] = options}
35
+ end
36
+
37
+ private
38
+
39
+ def add_association field, association, type, options = {}
40
+ associations[field] = { :association => association, :field => field, :type => type, :options => options }
41
+ instance_eval do
42
+ attr_accessor field.to_sym unless instance_variable_defined? "@#{field}"
43
+ end
44
+ end
45
+ end
46
+
47
+ def bind content
48
+ self.class.associations.each do |field, options|
49
+ update field, self.class.binder.parse(self, content, options), options[:options][:append]
50
+ end
51
+ self
52
+ end
53
+
54
+ private
55
+
56
+ def update field, value, append = false
57
+ if append
58
+ new_value = self.instance_variable_get("@#{field.to_s}")
59
+ new_value = new_value ? new_value + value : value
60
+ self.instance_variable_set "@#{field.to_s}", new_value
61
+ else
62
+ self.instance_variable_set "@#{field.to_s}", value
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+
@@ -0,0 +1,48 @@
1
+ module DCA
2
+ module Models
3
+ module BinderHelper
4
+ extend ActiveSupport::Concern
5
+
6
+ module ClassMethods
7
+ def convert value, type
8
+ case type
9
+ when :integer
10
+ value.to_s.gsub(/[^\d.,]/,'').gsub(/,/,'.').to_i
11
+ when :float
12
+ value.to_s.gsub(/[^\d.,]/,'').gsub(/,/,'.').to_f
13
+ when :string
14
+ value.to_s.strip
15
+ when :symbol
16
+ value.to_s.to_sym
17
+ when :datetime
18
+ DateTime.parse(value).to_time.utc unless value.nil?
19
+ else
20
+ value
21
+ end
22
+ end
23
+
24
+ def parse_options object, value, options
25
+ result = value
26
+ if result.nil?
27
+ result = options[:default] unless options[:default].nil?
28
+ else
29
+ result = value[options[:regex], 1] unless options[:regex].nil?
30
+ end
31
+
32
+ result = object.send(options[:parser], result) unless options[:parser].nil?
33
+
34
+ result
35
+ end
36
+
37
+ def find_type object, field, polymorphic = nil
38
+ type_name = field.to_s.singularize.camelize
39
+ type_name = "#{object.send(polymorphic).to_s.camelize}#{type_name}" if polymorphic
40
+ type = type_name.safe_constantize
41
+ type = "#{object.class.to_s.deconstantize}::#{type_name}".constantize if type.nil?
42
+
43
+ type
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,43 @@
1
+ module DCA
2
+ module Models
3
+ class NokogiriBinder
4
+ include BinderHelper
5
+
6
+ def self.parse object, content, params
7
+ if params[:association] == :one
8
+ parse_one object, content, params
9
+ elsif params[:association] == :many
10
+ parse_many object, content, params
11
+ end
12
+ end
13
+
14
+ def self.parse_one object, content, params
15
+ result = nil
16
+
17
+ options = params[:options] || {}
18
+ selector = options[:selector]
19
+ element = selector.nil? ? content : content.at_css(selector)
20
+ unless element.nil?
21
+ result = options[:attribute].nil? ? element.content : element[options[:attribute]]
22
+ end
23
+ result = self.parse_options object, result, options
24
+
25
+ convert result, params[:type]
26
+ end
27
+
28
+ def self.parse_many object, content, params
29
+ result = nil
30
+
31
+ options = params[:options] || {}
32
+ selector = options[:selector]
33
+ type = params[:type]
34
+ type = find_type object, params[:field], options[:polymorphic] if type.nil? || options[:polymorphic]
35
+
36
+ result = content.css(selector).map { |node| type.new.bind node } unless selector.nil?
37
+ result
38
+ end
39
+
40
+ end
41
+ end
42
+ end
43
+
@@ -0,0 +1,15 @@
1
+ module DCA
2
+ module Models
3
+ class Position < BaseModel
4
+ attr_accessor :checksum, :published_at
5
+
6
+ validates_presence_of :base_id, :checksum
7
+
8
+ def initialize(*args)
9
+ # set instance variable id, need to place this attribute to hash, when id is not set.
10
+ @id = nil
11
+ super
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1 @@
1
+ require File.expand_path('../net/browser_helper', __FILE__)
@@ -0,0 +1,20 @@
1
+ module DCA
2
+ module Net
3
+ module BrowserHelper
4
+ #def included(base)
5
+ # base.extend ClassMethods
6
+ #end
7
+
8
+ #module ClassMethods
9
+ #end
10
+
11
+ def browser(name = :ff, profile = 'default')
12
+ @browser ||= Watir::Browser.new name, :profile => profile
13
+ end
14
+
15
+ def browser_close
16
+ @browser.close if @browser
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,2 @@
1
+ require File.expand_path('../notifier/notifier', __FILE__)
2
+ require File.expand_path('../notifier/redis_notifier', __FILE__)
@@ -0,0 +1,11 @@
1
+ module DCA
2
+ class Notifier
3
+ def self.create config
4
+ @driver = "DCA::#{config[:driver]}Notifier".constantize.new config
5
+ end
6
+
7
+ def self.push object, event, options = {}
8
+ @driver.push object, event, options unless @driver.nil?
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,12 @@
1
+ module DCA
2
+ module Redis
3
+ class AnalyzeNotify < Ohm::Model
4
+ attribute :state # create, update, unchange, failed
5
+
6
+ index :state
7
+
8
+ counter :count
9
+ #reference :session, Session
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,8 @@
1
+ module DCA
2
+ module Redis
3
+ class FailureNotify < Ohm::Model
4
+ attribute :message
5
+ attribute :stack
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,15 @@
1
+ module DCA
2
+ module Redis
3
+ class FetchNotify < Ohm::Model
4
+ attribute :state
5
+
6
+ index :state
7
+
8
+ counter :success
9
+ counter :failure
10
+ #attribute :message
11
+ #attribute :stack
12
+ #reference :session, Session
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,52 @@
1
+ module DCA
2
+ module Redis
3
+ class Session < Ohm::Model
4
+ attribute :uid
5
+ attribute :created
6
+ attribute :project
7
+ attribute :area
8
+
9
+ index :uid
10
+ index :project
11
+ index :area
12
+
13
+ set :analyzed, DCA::Redis::AnalyzeNotify
14
+ set :fetched, DCA::Redis::FetchNotify
15
+ set :failures, DCA::Redis::FailureNotify
16
+
17
+ def validate
18
+ assert_present :uid
19
+ end
20
+
21
+ def analyze_state state
22
+ self.analyzed.find(:state => state).first
23
+ end
24
+
25
+ def fetch_state state
26
+ self.fetched.find(:state => state).first
27
+ end
28
+
29
+ def inc_analyze state
30
+ notify = self.analyzed.find(:state => state).first
31
+ if notify.nil?
32
+ notify = AnalyzeNotify.create(:state => state)
33
+ self.analyzed.add notify
34
+ end
35
+ notify.incr :count
36
+ end
37
+
38
+ def inc_fetch state, result
39
+ notify = self.fetched.find(:state => state).first
40
+ if notify.nil?
41
+ notify = FetchNotify.create(:state => state)
42
+ self.fetched.add notify
43
+ end
44
+ notify.incr result
45
+ end
46
+
47
+ def add_failure exception
48
+ self.failures.add FailureNotify.create(:message => exception.message, :stack => exception.backtrace)
49
+ end
50
+ end
51
+ end
52
+ end