dca 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. data/.document +5 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +48 -0
  4. data/Gemfile.lock +126 -0
  5. data/Guardfile +8 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.rdoc +19 -0
  8. data/Rakefile +48 -0
  9. data/VERSION +1 -0
  10. data/bin/dca +5 -0
  11. data/dca.gemspec +160 -0
  12. data/lib/dca.rb +64 -0
  13. data/lib/dca/cli.rb +32 -0
  14. data/lib/dca/commands/area.rb +133 -0
  15. data/lib/dca/commands/templates/area/analyzer.rb.erb +34 -0
  16. data/lib/dca/commands/templates/area/area.rb.erb +2 -0
  17. data/lib/dca/commands/templates/area/models.rb.erb +2 -0
  18. data/lib/dca/commands/templates/area/page.rb.erb +17 -0
  19. data/lib/dca/commands/templates/area/position.rb.erb +8 -0
  20. data/lib/dca/commands/templates/config.yml.erb +38 -0
  21. data/lib/dca/commands/templates/spec/analyzer_spec.rb.erb +15 -0
  22. data/lib/dca/commands/templates/spec/spec_helper.rb.erb +2 -0
  23. data/lib/dca/config.rb +20 -0
  24. data/lib/dca/helpers.rb +2 -0
  25. data/lib/dca/helpers/logger.rb +50 -0
  26. data/lib/dca/jobs.rb +3 -0
  27. data/lib/dca/jobs/analyzer_job.rb +119 -0
  28. data/lib/dca/jobs/job.rb +62 -0
  29. data/lib/dca/models.rb +5 -0
  30. data/lib/dca/models/base_model.rb +73 -0
  31. data/lib/dca/models/binder.rb +68 -0
  32. data/lib/dca/models/binder_helper.rb +48 -0
  33. data/lib/dca/models/nokogiri_binder.rb +43 -0
  34. data/lib/dca/models/position.rb +15 -0
  35. data/lib/dca/net.rb +1 -0
  36. data/lib/dca/net/browser_helper.rb +20 -0
  37. data/lib/dca/notifier.rb +2 -0
  38. data/lib/dca/notifier/notifier.rb +11 -0
  39. data/lib/dca/notifier/redis/models/analyze_notify.rb +12 -0
  40. data/lib/dca/notifier/redis/models/failure_notify.rb +8 -0
  41. data/lib/dca/notifier/redis/models/fetch_notify.rb +15 -0
  42. data/lib/dca/notifier/redis/models/session.rb +52 -0
  43. data/lib/dca/notifier/redis/notifier.rb +25 -0
  44. data/lib/dca/notifier/redis_notifier.rb +9 -0
  45. data/lib/dca/storage.rb +3 -0
  46. data/lib/dca/storage/elasticsearch_storage.rb +80 -0
  47. data/lib/dca/storage/mongo_storage.rb +51 -0
  48. data/lib/dca/storage/storage.rb +55 -0
  49. data/spec/analyzer_spec.rb +64 -0
  50. data/spec/area_task_spec.rb +45 -0
  51. data/spec/base_model_spec.rb +34 -0
  52. data/spec/binder_spec.rb +69 -0
  53. data/spec/config.yml +18 -0
  54. data/spec/elasticsearch_storage_spec.rb +28 -0
  55. data/spec/fixtures/page.html +12 -0
  56. data/spec/fixtures/positions.yml +13 -0
  57. data/spec/fixtures/positions_with_error.yml +14 -0
  58. data/spec/fixtures/states.yml +3 -0
  59. data/spec/job_spec.rb +31 -0
  60. data/spec/mock/analyzer_job.rb +30 -0
  61. data/spec/mock/file_storage.rb +28 -0
  62. data/spec/mock/notify_object.rb +13 -0
  63. data/spec/mock/page.rb +13 -0
  64. data/spec/mock/position.rb +40 -0
  65. data/spec/mock/web_notifier.rb +30 -0
  66. data/spec/mongo_storage_spec.rb +20 -0
  67. data/spec/redis_notifier_spec.rb +98 -0
  68. data/spec/spec_helper.rb +27 -0
  69. data/spec/support/storage_examples.rb +103 -0
  70. metadata +408 -0
@@ -0,0 +1,64 @@
1
+ require 'yaml'
2
+ require 'active_model'
3
+ require 'rake'
4
+ require 'resque'
5
+ require 'resque/tasks'
6
+ require 'ohm'
7
+ require 'uuid'
8
+ require 'yajl/json_gem'
9
+ require 'tire'
10
+ require 'logger'
11
+ require 'thor'
12
+ require 'thor/group'
13
+ require 'nokogiri'
14
+ require 'mongo'
15
+
16
+ module DCA
17
+ class ApplicationError < Exception; end
18
+
19
+ def self.root
20
+ Dir.pwd
21
+ end
22
+
23
+ def self.used?
24
+ File.basename(self.root).downcase != 'dca'
25
+ end
26
+
27
+ def self.project_name
28
+ return @project if @project.present?
29
+
30
+ gemspec = Dir[File.join self.root, '*.gemspec'].first
31
+ raise 'Generate gemspec file' if gemspec.blank?
32
+
33
+ gem = Gem::Specification.load gemspec
34
+ raise 'Set gem name in gemspec' if gem.name.blank?
35
+
36
+ @project = gem.name.camelize
37
+ if @project.safe_constantize.nil?
38
+ @project = (Object.constants.detect { |const|
39
+ const.to_s.downcase == @project.downcase}).to_s
40
+ raise "Unknown project name" if @project.nil?
41
+ end
42
+
43
+ @project
44
+ end
45
+
46
+ def self.project_path
47
+ @project_path ||= File.join(DCA.root, 'lib', File.basename(DCA.root))
48
+ end
49
+
50
+ def self.project_file
51
+ @project_path ||= project_path + '.rb'
52
+ end
53
+ end
54
+
55
+ require File.expand_path('../dca/config', __FILE__)
56
+ require File.expand_path('../dca/helpers', __FILE__)
57
+ require File.expand_path('../dca/storage', __FILE__)
58
+ require File.expand_path('../dca/jobs', __FILE__)
59
+ require File.expand_path('../dca/net', __FILE__)
60
+ require File.expand_path('../dca/notifier', __FILE__)
61
+ require File.expand_path('../dca/models', __FILE__)
62
+ require File.expand_path('../dca/cli', __FILE__)
63
+
64
+ require DCA.project_path if DCA.used? && File.exist?(DCA.project_file)
@@ -0,0 +1,32 @@
1
+ require File.expand_path('../commands/area', __FILE__)
2
+
3
+ module DCA
4
+ class CLI < Thor
5
+ include Thor::Actions
6
+
7
+ def self.source_root
8
+ File.expand_path('../commands/templates', __FILE__)
9
+ end
10
+
11
+ desc 'config', 'Create default config file'
12
+ def config
13
+ empty_directory 'config'
14
+ template 'config.yml.erb', 'config/config.yml'
15
+ end
16
+
17
+ desc 'area SUBCOMMAND ...ARGS', 'Manage project areas'
18
+ subcommand 'area', Commands::Area
19
+
20
+ desc 'install', 'Install dca project'
21
+ def install
22
+ project = "#{DCA.project_name}::Project".constantize
23
+ project.install
24
+ end
25
+
26
+ desc 'uninstall', 'Uninstall dca project'
27
+ def uninstall
28
+ project = "#{DCA.project_name}::Project".constantize
29
+ project.remove
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,133 @@
1
+ module DCA
2
+ module Commands
3
+ class Area < Thor
4
+ include Thor::Actions
5
+
6
+ def self.source_root
7
+ File.expand_path('../templates', __FILE__)
8
+ end
9
+
10
+ desc 'create NAME TITLE DESCRIPTION URL', 'Generate and add new area to project'
11
+ long_desc <<-LONGDESC
12
+ Generate and add a new area named <name> with to project. Parameters <name>, <title>, <description> and <url>
13
+ are required
14
+ LONGDESC
15
+ def create name, title, description, url
16
+ @name = name
17
+ @class_name = name.camelize
18
+
19
+ template 'area/analyzer.rb.erb', "lib/areas/#{name.downcase}/analyzer.rb"
20
+ template 'area/position.rb.erb', "lib/areas/#{name.downcase}/models/position.rb"
21
+ template 'area/page.rb.erb', "lib/areas/#{name.downcase}/models/page.rb"
22
+ template 'area/models.rb.erb', "lib/areas/#{name.downcase}/models.rb"
23
+ template 'area/area.rb.erb', "lib/areas/#{name.downcase}.rb"
24
+
25
+ template 'spec/analyzer_spec.rb.erb', "spec/areas/#{name.downcase}/analyzer_spec.rb"
26
+ template 'spec/spec_helper.rb.erb', "spec/areas/#{name.downcase}/spec_helper.rb"
27
+
28
+ empty_directory 'config'
29
+ area_file = 'config/areas.yml'
30
+ areas = {}
31
+ areas = YAML.load_file(area_file) if File.exist? area_file
32
+ area_hash = {'title' => title, 'description' => description, 'url' => url}
33
+ if areas.has_key? name
34
+ if areas[name] == area_hash
35
+ shell.say_status :identical, area_file, :blue
36
+ else
37
+ areas[name] = area_hash
38
+ shell.say_status :conflict, area_file, :red
39
+ if shell.file_collision(area_file) { areas.to_yaml }
40
+ open(area_file, 'w:utf-8') { |file| file.write areas.to_yaml }
41
+ shell.say_status :force, area_file, :yellow
42
+ end
43
+ end
44
+ else
45
+ status = areas.empty? ? :create : :update
46
+ areas[name] = area_hash
47
+ open(area_file, 'w:utf-8') { |file| file.write areas.to_yaml }
48
+ shell.say_status status, area_file, :green
49
+ end
50
+ end
51
+
52
+ desc 'start NAME', 'Start area to analyze'
53
+ def start name
54
+ shell.say "Starting analyze area #{name}"
55
+ config = area_config name.to_sym
56
+
57
+ job_ops = {}
58
+ job_ops[:distributed] = true if config[:distributed]
59
+ job = "#{DCA.project_name}::Areas::#{name}::AnalyzerJob".constantize
60
+ job.create job_ops
61
+
62
+ background = config[:background].nil? ? true : config[:background]
63
+ run_worker name, config[:workers] || 1, background
64
+ end
65
+
66
+ desc 'stop NAME', 'Stop area to analyze'
67
+ method_option :force, :type => :boolean, :aliases => '-f', :desc => 'force stop area analyzing process'
68
+ def stop name
69
+ shell.say "Stopping analyze area #{name}"
70
+
71
+ pids = workers_pids name
72
+ unless pids.empty?
73
+ system("kill -s #{options[:force] ? 'TERM' : 'QUIT'} #{pids.join(' ')}")
74
+ end
75
+
76
+ wait_worker name
77
+
78
+ Resque.remove_queue name
79
+ end
80
+
81
+ private
82
+
83
+ def area_config area_name
84
+ config = {}
85
+ config = APP_CONFIG[:areas][area_name] if APP_CONFIG[:areas]
86
+ config
87
+ end
88
+
89
+ def run_worker(queue, count = 1, background = true)
90
+ puts "Starting #{count} worker(s) with QUEUE: #{queue}"
91
+ unless background
92
+ ENV['QUEUE'] = queue
93
+ ENV['VERBOSE'] = '1'
94
+ Rake::Task['resque:work'].invoke
95
+ else
96
+ log_dir = File.join DCA.root, 'log'
97
+ Dir.mkdir log_dir unless Dir.exist? log_dir
98
+ ops = { :pgroup => true }
99
+ if APP_CONFIG[:logger]
100
+ debug_file = [File.join(DCA.root, "log/#{queue.underscore}.debug"), 'a']
101
+ ops[:err] = debug_file
102
+ ops[:out] = debug_file
103
+ end
104
+ env_vars = {'QUEUE' => queue}
105
+
106
+ count.times {
107
+ ## Using Kernel.spawn and Process.detach because regular system() call would
108
+ ## cause the processes to quit when capistrano finishes
109
+ pid = spawn(env_vars, "rake resque:work", ops)
110
+ Process.detach(pid)
111
+ }
112
+ end
113
+ end
114
+
115
+ def workers_pids name
116
+ pids = Array.new
117
+ Resque.workers.each do |worker|
118
+ host, pid, queues = worker.id.split(':')
119
+ next unless host == worker.hostname
120
+
121
+ queues = queues.split(',')
122
+ next unless queues.include? name
123
+ pids.concat(worker.worker_pids)
124
+ end
125
+ pids.uniq
126
+ end
127
+
128
+ def wait_worker name
129
+ sleep 1 while workers_pids(name).count > 0
130
+ end
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,34 @@
1
+ module <%= DCA.project_name %>
2
+ module Areas
3
+ module <%= @class_name %>
4
+ # Base analyze jobs class for <%=@class_name%> area.
5
+ class AnalyzerJob < DCA::Jobs::AnalyzerJob
6
+ def change
7
+ false
8
+ end
9
+
10
+ def positions &block
11
+ page = Page.find options[:category]
12
+ until page.nil? do
13
+ logger.debug "Page number #{page.number}"
14
+
15
+ page.positions.each do |position|
16
+ begin
17
+ block.call position
18
+ rescue Exception => e
19
+ logger.exception e
20
+ end
21
+ end
22
+
23
+ break if shutdown?
24
+ break unless page.next
25
+ end
26
+ end
27
+
28
+ def fetch position
29
+ position
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,2 @@
1
+ require File.expand_path('../<%= @name %>/models', __FILE__)
2
+ require File.expand_path('../<%= @name %>/analyzer', __FILE__)
@@ -0,0 +1,2 @@
1
+ require File.expand_path('../models/position', __FILE__)
2
+ require File.expand_path('../models/page', __FILE__)
@@ -0,0 +1,17 @@
1
+ module <%= DCA.project_name %>
2
+ module Areas
3
+ module <%= @class_name %>
4
+ class Page < DCA::Models::BaseModel
5
+ attr_reader :url, :number
6
+
7
+ # need to ser right selector
8
+ # has_many :positions, :selector => 'div'
9
+
10
+ def next
11
+ @number += 1
12
+ # get next page if it posible or return nil
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,8 @@
1
+ module <%= DCA.project_name %>
2
+ module Areas
3
+ module <%= @class_name %>
4
+ class Position < DCA::Models::Position
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,38 @@
1
+ production:
2
+ dca_db:
3
+ driver: ElasticSearch
4
+ host: localhost
5
+ port: 27017
6
+ redis:
7
+ host: localhost
8
+ port: 6379
9
+ notifier:
10
+ driver: Redis
11
+ host: localhost
12
+ port: 6379
13
+
14
+ development:
15
+ dca_db:
16
+ driver: ElasticSearch
17
+ host: localhost
18
+ port: 27017
19
+ redis:
20
+ host: localhost
21
+ port: 6379
22
+ notifier:
23
+ driver: Redis
24
+ host: localhost
25
+ port: 6379
26
+
27
+ test:
28
+ dca_db:
29
+ driver: ElasticSearch
30
+ host: localhost
31
+ port: 27017
32
+ redis:
33
+ host: localhost
34
+ port: 6379
35
+ notifier:
36
+ driver: Redis
37
+ host: localhost
38
+ port: 6379
@@ -0,0 +1,15 @@
1
+ require File.expand_path('../spec_helper', __FILE__)
2
+
3
+ include <%= DCA.project_name%>::Areas::<%= @class_name %>
4
+
5
+ describe '<%= @class_name%> analyzer' do
6
+ before :all do
7
+ <%= DCA.project_name %>::Project.remove
8
+ <%= DCA.project_name %>::Project.install
9
+ DCA::Notifier.create APP_CONFIG[:notifier]
10
+ end
11
+
12
+ after :all do
13
+ <%= DCA.project_name %>::Project.remove
14
+ end
15
+ end
@@ -0,0 +1,2 @@
1
+ require File.expand_path('../../../spec_helper', __FILE__)
2
+ require './lib/areas/<%= @name %>'
@@ -0,0 +1,20 @@
1
+ SYS_ENV = 'development' unless defined? SYS_ENV
2
+
3
+ unless defined? APP_CONFIG
4
+ if File.exist? './config/config.yml'
5
+ APP_CONFIG = YAML.load_file('./config/config.yml')[SYS_ENV].deep_symbolize_keys
6
+ else
7
+ APP_CONFIG = {}
8
+ puts 'WARNING! Missing config file. Use rake system:config to create default config file.' if DCA.used?
9
+ end
10
+ end
11
+
12
+ unless defined? AREAS_CONFIG
13
+ if APP_CONFIG[:areas]
14
+ AREAS_CONFIG = APP_CONFIG[:areas]
15
+ else
16
+ AREAS_CONFIG = {}
17
+ end
18
+ end
19
+
20
+
@@ -0,0 +1,2 @@
1
+ require File.expand_path('../helpers/logger', __FILE__)
2
+
@@ -0,0 +1,50 @@
1
+ module DCA
2
+ module Helpers
3
+ class VerboseLogger < ::Logger
4
+
5
+ def initialize(logdev, shift_age = 0, shift_size = 1048576, verbose = false)
6
+ super logdev, shift_age, shift_size
7
+ @verbose_logdev = LogDevice.new(STDOUT, :shift_age => shift_age, :shift_size => shift_size) if verbose
8
+ @default_logdev = @logdev
9
+ end
10
+
11
+ def add(severity, message = nil, progname = nil, &block)
12
+ super severity, message, progname, &block
13
+
14
+ if @verbose_logdev
15
+ @logdev = @verbose_logdev
16
+ super severity, message, progname, &block
17
+ @logdev = @default_logdev
18
+ end
19
+ end
20
+
21
+ def exception(error, progname = nil, &block)
22
+ add(FATAL, "#{error.message}\n#{error.backtrace.join("\n")}", progname, &block)
23
+ end
24
+ end
25
+
26
+ module Logger
27
+ extend ActiveSupport::Concern
28
+
29
+ module ClassMethods
30
+ def logger_name name
31
+ define_method :logger do
32
+ @logger unless @logger.nil?
33
+
34
+ out = APP_CONFIG[:logger] ? File.join(DCA.root, 'log', "#{(self.class.send name).underscore}.log") : NIL
35
+ @logger ||= VerboseLogger.new out, 0, 1048576, APP_CONFIG[:verbose]
36
+ end
37
+ end
38
+
39
+ def logger= value
40
+ @logger = value
41
+ end
42
+ end
43
+
44
+ def logger
45
+ self.class.logger
46
+ end
47
+
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,3 @@
1
+ require File.expand_path('../jobs/job', __FILE__)
2
+ require File.expand_path('../jobs/analyzer_job', __FILE__)
3
+
@@ -0,0 +1,119 @@
1
+ module DCA
2
+ module Jobs
3
+ class AnalyzerJob < Job
4
+ def session
5
+ @session ||= options[:session] || UUID.generate(:compact)
6
+ end
7
+
8
+ def perform
9
+ return on_change if change
10
+
11
+ if options[:distributed] && options[:position]
12
+ analyze position options[:position]
13
+ return
14
+ end
15
+
16
+ index = 0
17
+ # get list of positions and add to cache
18
+ positions do |position|
19
+ if options[:distributed]
20
+ distribute position
21
+ else
22
+ analyze position
23
+ end
24
+
25
+ index += 1
26
+ break if options[:limit] == index || shutdown?
27
+ end
28
+ end
29
+
30
+ def change
31
+ false
32
+ end
33
+
34
+ def distribute position
35
+ self.class.create :distributed => true, :position => position.to_hash, session => self.session
36
+ end
37
+
38
+ # Return all positions or newly created or modified if possible. Some cases not possible to get newly created or
39
+ # modified positions. In this case cache will be used to identify only newly created or modified positions.
40
+ # Position must be a hash and should contain unique key :id and checksum for compare with cached positions and
41
+ # identify newly created or modified
42
+ def positions(&block)
43
+ raise NotImplementedError
44
+ end
45
+
46
+ # Return position model from hash
47
+ def position hash
48
+ Models::Position.new hash
49
+ end
50
+
51
+ # Fetch newly created or modified positions
52
+ def fetch position
53
+ raise NotImplementedError
54
+ end
55
+
56
+ def on_change
57
+ notify(:change)
58
+ end
59
+
60
+ def on_analyze(position, state)
61
+ logger.debug "[#{position.class}] Analyze position base_id:#{position.base_id} state:#{state}"
62
+ notify(:analyze, :position => position, :state => state)
63
+ end
64
+
65
+ def on_fetch(position, state, result)
66
+ if result
67
+ logger.debug "[#{position.class}] Fetch valid position id:#{position.id} base_id:#{position.base_id} state:#{state}"
68
+ else
69
+ logger.debug "[#{position.class}] Fetch invalid position base_id:#{position.base_id} state:#{state}"
70
+ logger.debug " Validation errors:\n #{position.errors.full_messages.join("\n ")}"
71
+ end
72
+ notify(:fetch, :position => position, :state => state, :result => result )
73
+ end
74
+
75
+ def on_failure(error)
76
+ logger.exception error
77
+ notify(:failure, :exception => error)
78
+ end
79
+
80
+ def on_success
81
+ notify(:success)
82
+ end
83
+
84
+ protected
85
+
86
+ def notify(event, options={})
87
+ Notifier.push self, event, options
88
+ end
89
+
90
+ def analyze position
91
+ state = position.state
92
+ on_analyze position, state
93
+
94
+ unless state == :unmodified
95
+ new_position = fetch_safe! position
96
+
97
+ unless new_position
98
+ on_fetch position, state, false
99
+ else
100
+ position = new_position
101
+
102
+ valid = new_position.valid?
103
+ state = position.save if valid
104
+ on_fetch position, state, valid
105
+ end
106
+ end
107
+ rescue Exception => exception
108
+ on_failure exception
109
+ end
110
+
111
+ def fetch_safe!(position)
112
+ fetch position
113
+ rescue Exception => exception
114
+ on_failure exception
115
+ false
116
+ end
117
+ end
118
+ end
119
+ end