dca 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. data/.document +5 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +48 -0
  4. data/Gemfile.lock +126 -0
  5. data/Guardfile +8 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.rdoc +19 -0
  8. data/Rakefile +48 -0
  9. data/VERSION +1 -0
  10. data/bin/dca +5 -0
  11. data/dca.gemspec +160 -0
  12. data/lib/dca.rb +64 -0
  13. data/lib/dca/cli.rb +32 -0
  14. data/lib/dca/commands/area.rb +133 -0
  15. data/lib/dca/commands/templates/area/analyzer.rb.erb +34 -0
  16. data/lib/dca/commands/templates/area/area.rb.erb +2 -0
  17. data/lib/dca/commands/templates/area/models.rb.erb +2 -0
  18. data/lib/dca/commands/templates/area/page.rb.erb +17 -0
  19. data/lib/dca/commands/templates/area/position.rb.erb +8 -0
  20. data/lib/dca/commands/templates/config.yml.erb +38 -0
  21. data/lib/dca/commands/templates/spec/analyzer_spec.rb.erb +15 -0
  22. data/lib/dca/commands/templates/spec/spec_helper.rb.erb +2 -0
  23. data/lib/dca/config.rb +20 -0
  24. data/lib/dca/helpers.rb +2 -0
  25. data/lib/dca/helpers/logger.rb +50 -0
  26. data/lib/dca/jobs.rb +3 -0
  27. data/lib/dca/jobs/analyzer_job.rb +119 -0
  28. data/lib/dca/jobs/job.rb +62 -0
  29. data/lib/dca/models.rb +5 -0
  30. data/lib/dca/models/base_model.rb +73 -0
  31. data/lib/dca/models/binder.rb +68 -0
  32. data/lib/dca/models/binder_helper.rb +48 -0
  33. data/lib/dca/models/nokogiri_binder.rb +43 -0
  34. data/lib/dca/models/position.rb +15 -0
  35. data/lib/dca/net.rb +1 -0
  36. data/lib/dca/net/browser_helper.rb +20 -0
  37. data/lib/dca/notifier.rb +2 -0
  38. data/lib/dca/notifier/notifier.rb +11 -0
  39. data/lib/dca/notifier/redis/models/analyze_notify.rb +12 -0
  40. data/lib/dca/notifier/redis/models/failure_notify.rb +8 -0
  41. data/lib/dca/notifier/redis/models/fetch_notify.rb +15 -0
  42. data/lib/dca/notifier/redis/models/session.rb +52 -0
  43. data/lib/dca/notifier/redis/notifier.rb +25 -0
  44. data/lib/dca/notifier/redis_notifier.rb +9 -0
  45. data/lib/dca/storage.rb +3 -0
  46. data/lib/dca/storage/elasticsearch_storage.rb +80 -0
  47. data/lib/dca/storage/mongo_storage.rb +51 -0
  48. data/lib/dca/storage/storage.rb +55 -0
  49. data/spec/analyzer_spec.rb +64 -0
  50. data/spec/area_task_spec.rb +45 -0
  51. data/spec/base_model_spec.rb +34 -0
  52. data/spec/binder_spec.rb +69 -0
  53. data/spec/config.yml +18 -0
  54. data/spec/elasticsearch_storage_spec.rb +28 -0
  55. data/spec/fixtures/page.html +12 -0
  56. data/spec/fixtures/positions.yml +13 -0
  57. data/spec/fixtures/positions_with_error.yml +14 -0
  58. data/spec/fixtures/states.yml +3 -0
  59. data/spec/job_spec.rb +31 -0
  60. data/spec/mock/analyzer_job.rb +30 -0
  61. data/spec/mock/file_storage.rb +28 -0
  62. data/spec/mock/notify_object.rb +13 -0
  63. data/spec/mock/page.rb +13 -0
  64. data/spec/mock/position.rb +40 -0
  65. data/spec/mock/web_notifier.rb +30 -0
  66. data/spec/mongo_storage_spec.rb +20 -0
  67. data/spec/redis_notifier_spec.rb +98 -0
  68. data/spec/spec_helper.rb +27 -0
  69. data/spec/support/storage_examples.rb +103 -0
  70. metadata +408 -0
@@ -0,0 +1,64 @@
1
+ require 'yaml'
2
+ require 'active_model'
3
+ require 'rake'
4
+ require 'resque'
5
+ require 'resque/tasks'
6
+ require 'ohm'
7
+ require 'uuid'
8
+ require 'yajl/json_gem'
9
+ require 'tire'
10
+ require 'logger'
11
+ require 'thor'
12
+ require 'thor/group'
13
+ require 'nokogiri'
14
+ require 'mongo'
15
+
16
+ module DCA
17
+ class ApplicationError < Exception; end
18
+
19
+ def self.root
20
+ Dir.pwd
21
+ end
22
+
23
+ def self.used?
24
+ File.basename(self.root).downcase != 'dca'
25
+ end
26
+
27
+ def self.project_name
28
+ return @project if @project.present?
29
+
30
+ gemspec = Dir[File.join self.root, '*.gemspec'].first
31
+ raise 'Generate gemspec file' if gemspec.blank?
32
+
33
+ gem = Gem::Specification.load gemspec
34
+ raise 'Set gem name in gemspec' if gem.name.blank?
35
+
36
+ @project = gem.name.camelize
37
+ if @project.safe_constantize.nil?
38
+ @project = (Object.constants.detect { |const|
39
+ const.to_s.downcase == @project.downcase}).to_s
40
+ raise "Unknown project name" if @project.nil?
41
+ end
42
+
43
+ @project
44
+ end
45
+
46
+ def self.project_path
47
+ @project_path ||= File.join(DCA.root, 'lib', File.basename(DCA.root))
48
+ end
49
+
50
+ def self.project_file
51
+ @project_path ||= project_path + '.rb'
52
+ end
53
+ end
54
+
55
+ require File.expand_path('../dca/config', __FILE__)
56
+ require File.expand_path('../dca/helpers', __FILE__)
57
+ require File.expand_path('../dca/storage', __FILE__)
58
+ require File.expand_path('../dca/jobs', __FILE__)
59
+ require File.expand_path('../dca/net', __FILE__)
60
+ require File.expand_path('../dca/notifier', __FILE__)
61
+ require File.expand_path('../dca/models', __FILE__)
62
+ require File.expand_path('../dca/cli', __FILE__)
63
+
64
+ require DCA.project_path if DCA.used? && File.exist?(DCA.project_file)
@@ -0,0 +1,32 @@
1
+ require File.expand_path('../commands/area', __FILE__)
2
+
3
+ module DCA
4
+ class CLI < Thor
5
+ include Thor::Actions
6
+
7
+ def self.source_root
8
+ File.expand_path('../commands/templates', __FILE__)
9
+ end
10
+
11
+ desc 'config', 'Create default config file'
12
+ def config
13
+ empty_directory 'config'
14
+ template 'config.yml.erb', 'config/config.yml'
15
+ end
16
+
17
+ desc 'area SUBCOMMAND ...ARGS', 'Manage project areas'
18
+ subcommand 'area', Commands::Area
19
+
20
+ desc 'install', 'Install dca project'
21
+ def install
22
+ project = "#{DCA.project_name}::Project".constantize
23
+ project.install
24
+ end
25
+
26
+ desc 'uninstall', 'Uninstall dca project'
27
+ def uninstall
28
+ project = "#{DCA.project_name}::Project".constantize
29
+ project.remove
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,133 @@
1
+ module DCA
2
+ module Commands
3
+ class Area < Thor
4
+ include Thor::Actions
5
+
6
+ def self.source_root
7
+ File.expand_path('../templates', __FILE__)
8
+ end
9
+
10
+ desc 'create NAME TITLE DESCRIPTION URL', 'Generate and add new area to project'
11
+ long_desc <<-LONGDESC
12
+ Generate and add a new area named <name> with to project. Parameters <name>, <title>, <description> and <url>
13
+ are required
14
+ LONGDESC
15
+ def create name, title, description, url
16
+ @name = name
17
+ @class_name = name.camelize
18
+
19
+ template 'area/analyzer.rb.erb', "lib/areas/#{name.downcase}/analyzer.rb"
20
+ template 'area/position.rb.erb', "lib/areas/#{name.downcase}/models/position.rb"
21
+ template 'area/page.rb.erb', "lib/areas/#{name.downcase}/models/page.rb"
22
+ template 'area/models.rb.erb', "lib/areas/#{name.downcase}/models.rb"
23
+ template 'area/area.rb.erb', "lib/areas/#{name.downcase}.rb"
24
+
25
+ template 'spec/analyzer_spec.rb.erb', "spec/areas/#{name.downcase}/analyzer_spec.rb"
26
+ template 'spec/spec_helper.rb.erb', "spec/areas/#{name.downcase}/spec_helper.rb"
27
+
28
+ empty_directory 'config'
29
+ area_file = 'config/areas.yml'
30
+ areas = {}
31
+ areas = YAML.load_file(area_file) if File.exist? area_file
32
+ area_hash = {'title' => title, 'description' => description, 'url' => url}
33
+ if areas.has_key? name
34
+ if areas[name] == area_hash
35
+ shell.say_status :identical, area_file, :blue
36
+ else
37
+ areas[name] = area_hash
38
+ shell.say_status :conflict, area_file, :red
39
+ if shell.file_collision(area_file) { areas.to_yaml }
40
+ open(area_file, 'w:utf-8') { |file| file.write areas.to_yaml }
41
+ shell.say_status :force, area_file, :yellow
42
+ end
43
+ end
44
+ else
45
+ status = areas.empty? ? :create : :update
46
+ areas[name] = area_hash
47
+ open(area_file, 'w:utf-8') { |file| file.write areas.to_yaml }
48
+ shell.say_status status, area_file, :green
49
+ end
50
+ end
51
+
52
+ desc 'start NAME', 'Start area to analyze'
53
+ def start name
54
+ shell.say "Starting analyze area #{name}"
55
+ config = area_config name.to_sym
56
+
57
+ job_ops = {}
58
+ job_ops[:distributed] = true if config[:distributed]
59
+ job = "#{DCA.project_name}::Areas::#{name}::AnalyzerJob".constantize
60
+ job.create job_ops
61
+
62
+ background = config[:background].nil? ? true : config[:background]
63
+ run_worker name, config[:workers] || 1, background
64
+ end
65
+
66
+ desc 'stop NAME', 'Stop area to analyze'
67
+ method_option :force, :type => :boolean, :aliases => '-f', :desc => 'force stop area analyzing process'
68
+ def stop name
69
+ shell.say "Stopping analyze area #{name}"
70
+
71
+ pids = workers_pids name
72
+ unless pids.empty?
73
+ system("kill -s #{options[:force] ? 'TERM' : 'QUIT'} #{pids.join(' ')}")
74
+ end
75
+
76
+ wait_worker name
77
+
78
+ Resque.remove_queue name
79
+ end
80
+
81
+ private
82
+
83
+ def area_config area_name
84
+ config = {}
85
+ config = APP_CONFIG[:areas][area_name] if APP_CONFIG[:areas]
86
+ config
87
+ end
88
+
89
+ def run_worker(queue, count = 1, background = true)
90
+ puts "Starting #{count} worker(s) with QUEUE: #{queue}"
91
+ unless background
92
+ ENV['QUEUE'] = queue
93
+ ENV['VERBOSE'] = '1'
94
+ Rake::Task['resque:work'].invoke
95
+ else
96
+ log_dir = File.join DCA.root, 'log'
97
+ Dir.mkdir log_dir unless Dir.exist? log_dir
98
+ ops = { :pgroup => true }
99
+ if APP_CONFIG[:logger]
100
+ debug_file = [File.join(DCA.root, "log/#{queue.underscore}.debug"), 'a']
101
+ ops[:err] = debug_file
102
+ ops[:out] = debug_file
103
+ end
104
+ env_vars = {'QUEUE' => queue}
105
+
106
+ count.times {
107
+ ## Using Kernel.spawn and Process.detach because regular system() call would
108
+ ## cause the processes to quit when capistrano finishes
109
+ pid = spawn(env_vars, "rake resque:work", ops)
110
+ Process.detach(pid)
111
+ }
112
+ end
113
+ end
114
+
115
+ def workers_pids name
116
+ pids = Array.new
117
+ Resque.workers.each do |worker|
118
+ host, pid, queues = worker.id.split(':')
119
+ next unless host == worker.hostname
120
+
121
+ queues = queues.split(',')
122
+ next unless queues.include? name
123
+ pids.concat(worker.worker_pids)
124
+ end
125
+ pids.uniq
126
+ end
127
+
128
+ def wait_worker name
129
+ sleep 1 while workers_pids(name).count > 0
130
+ end
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,34 @@
1
+ module <%= DCA.project_name %>
2
+ module Areas
3
+ module <%= @class_name %>
4
+ # Base analyze jobs class for <%=@class_name%> area.
5
+ class AnalyzerJob < DCA::Jobs::AnalyzerJob
6
+ def change
7
+ false
8
+ end
9
+
10
+ def positions &block
11
+ page = Page.find options[:category]
12
+ until page.nil? do
13
+ logger.debug "Page number #{page.number}"
14
+
15
+ page.positions.each do |position|
16
+ begin
17
+ block.call position
18
+ rescue Exception => e
19
+ logger.exception e
20
+ end
21
+ end
22
+
23
+ break if shutdown?
24
+ break unless page.next
25
+ end
26
+ end
27
+
28
+ def fetch position
29
+ position
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,2 @@
1
+ require File.expand_path('../<%= @name %>/models', __FILE__)
2
+ require File.expand_path('../<%= @name %>/analyzer', __FILE__)
@@ -0,0 +1,2 @@
1
+ require File.expand_path('../models/position', __FILE__)
2
+ require File.expand_path('../models/page', __FILE__)
@@ -0,0 +1,17 @@
1
+ module <%= DCA.project_name %>
2
+ module Areas
3
+ module <%= @class_name %>
4
+ class Page < DCA::Models::BaseModel
5
+ attr_reader :url, :number
6
+
7
+ # need to ser right selector
8
+ # has_many :positions, :selector => 'div'
9
+
10
+ def next
11
+ @number += 1
12
+ # get next page if it posible or return nil
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,8 @@
1
+ module <%= DCA.project_name %>
2
+ module Areas
3
+ module <%= @class_name %>
4
+ class Position < DCA::Models::Position
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,38 @@
1
+ production:
2
+ dca_db:
3
+ driver: ElasticSearch
4
+ host: localhost
5
+ port: 27017
6
+ redis:
7
+ host: localhost
8
+ port: 6379
9
+ notifier:
10
+ driver: Redis
11
+ host: localhost
12
+ port: 6379
13
+
14
+ development:
15
+ dca_db:
16
+ driver: ElasticSearch
17
+ host: localhost
18
+ port: 27017
19
+ redis:
20
+ host: localhost
21
+ port: 6379
22
+ notifier:
23
+ driver: Redis
24
+ host: localhost
25
+ port: 6379
26
+
27
+ test:
28
+ dca_db:
29
+ driver: ElasticSearch
30
+ host: localhost
31
+ port: 27017
32
+ redis:
33
+ host: localhost
34
+ port: 6379
35
+ notifier:
36
+ driver: Redis
37
+ host: localhost
38
+ port: 6379
@@ -0,0 +1,15 @@
1
+ require File.expand_path('../spec_helper', __FILE__)
2
+
3
+ include <%= DCA.project_name%>::Areas::<%= @class_name %>
4
+
5
+ describe '<%= @class_name%> analyzer' do
6
+ before :all do
7
+ <%= DCA.project_name %>::Project.remove
8
+ <%= DCA.project_name %>::Project.install
9
+ DCA::Notifier.create APP_CONFIG[:notifier]
10
+ end
11
+
12
+ after :all do
13
+ <%= DCA.project_name %>::Project.remove
14
+ end
15
+ end
@@ -0,0 +1,2 @@
1
+ require File.expand_path('../../../spec_helper', __FILE__)
2
+ require './lib/areas/<%= @name %>'
@@ -0,0 +1,20 @@
1
+ SYS_ENV = 'development' unless defined? SYS_ENV
2
+
3
+ unless defined? APP_CONFIG
4
+ if File.exist? './config/config.yml'
5
+ APP_CONFIG = YAML.load_file('./config/config.yml')[SYS_ENV].deep_symbolize_keys
6
+ else
7
+ APP_CONFIG = {}
8
+ puts 'WARNING! Missing config file. Use rake system:config to create default config file.' if DCA.used?
9
+ end
10
+ end
11
+
12
+ unless defined? AREAS_CONFIG
13
+ if APP_CONFIG[:areas]
14
+ AREAS_CONFIG = APP_CONFIG[:areas]
15
+ else
16
+ AREAS_CONFIG = {}
17
+ end
18
+ end
19
+
20
+
@@ -0,0 +1,2 @@
1
+ require File.expand_path('../helpers/logger', __FILE__)
2
+
@@ -0,0 +1,50 @@
1
+ module DCA
2
+ module Helpers
3
+ class VerboseLogger < ::Logger
4
+
5
+ def initialize(logdev, shift_age = 0, shift_size = 1048576, verbose = false)
6
+ super logdev, shift_age, shift_size
7
+ @verbose_logdev = LogDevice.new(STDOUT, :shift_age => shift_age, :shift_size => shift_size) if verbose
8
+ @default_logdev = @logdev
9
+ end
10
+
11
+ def add(severity, message = nil, progname = nil, &block)
12
+ super severity, message, progname, &block
13
+
14
+ if @verbose_logdev
15
+ @logdev = @verbose_logdev
16
+ super severity, message, progname, &block
17
+ @logdev = @default_logdev
18
+ end
19
+ end
20
+
21
+ def exception(error, progname = nil, &block)
22
+ add(FATAL, "#{error.message}\n#{error.backtrace.join("\n")}", progname, &block)
23
+ end
24
+ end
25
+
26
+ module Logger
27
+ extend ActiveSupport::Concern
28
+
29
+ module ClassMethods
30
+ def logger_name name
31
+ define_method :logger do
32
+ @logger unless @logger.nil?
33
+
34
+ out = APP_CONFIG[:logger] ? File.join(DCA.root, 'log', "#{(self.class.send name).underscore}.log") : NIL
35
+ @logger ||= VerboseLogger.new out, 0, 1048576, APP_CONFIG[:verbose]
36
+ end
37
+ end
38
+
39
+ def logger= value
40
+ @logger = value
41
+ end
42
+ end
43
+
44
+ def logger
45
+ self.class.logger
46
+ end
47
+
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,3 @@
1
+ require File.expand_path('../jobs/job', __FILE__)
2
+ require File.expand_path('../jobs/analyzer_job', __FILE__)
3
+
@@ -0,0 +1,119 @@
1
+ module DCA
2
+ module Jobs
3
+ class AnalyzerJob < Job
4
+ def session
5
+ @session ||= options[:session] || UUID.generate(:compact)
6
+ end
7
+
8
+ def perform
9
+ return on_change if change
10
+
11
+ if options[:distributed] && options[:position]
12
+ analyze position options[:position]
13
+ return
14
+ end
15
+
16
+ index = 0
17
+ # get list of positions and add to cache
18
+ positions do |position|
19
+ if options[:distributed]
20
+ distribute position
21
+ else
22
+ analyze position
23
+ end
24
+
25
+ index += 1
26
+ break if options[:limit] == index || shutdown?
27
+ end
28
+ end
29
+
30
+ def change
31
+ false
32
+ end
33
+
34
+ def distribute position
35
+ self.class.create :distributed => true, :position => position.to_hash, session => self.session
36
+ end
37
+
38
+ # Return all positions or newly created or modified if possible. Some cases not possible to get newly created or
39
+ # modified positions. In this case cache will be used to identify only newly created or modified positions.
40
+ # Position must be a hash and should contain unique key :id and checksum for compare with cached positions and
41
+ # identify newly created or modified
42
+ def positions(&block)
43
+ raise NotImplementedError
44
+ end
45
+
46
+ # Return position model from hash
47
+ def position hash
48
+ Models::Position.new hash
49
+ end
50
+
51
+ # Fetch newly created or modified positions
52
+ def fetch position
53
+ raise NotImplementedError
54
+ end
55
+
56
+ def on_change
57
+ notify(:change)
58
+ end
59
+
60
+ def on_analyze(position, state)
61
+ logger.debug "[#{position.class}] Analyze position base_id:#{position.base_id} state:#{state}"
62
+ notify(:analyze, :position => position, :state => state)
63
+ end
64
+
65
+ def on_fetch(position, state, result)
66
+ if result
67
+ logger.debug "[#{position.class}] Fetch valid position id:#{position.id} base_id:#{position.base_id} state:#{state}"
68
+ else
69
+ logger.debug "[#{position.class}] Fetch invalid position base_id:#{position.base_id} state:#{state}"
70
+ logger.debug " Validation errors:\n #{position.errors.full_messages.join("\n ")}"
71
+ end
72
+ notify(:fetch, :position => position, :state => state, :result => result )
73
+ end
74
+
75
+ def on_failure(error)
76
+ logger.exception error
77
+ notify(:failure, :exception => error)
78
+ end
79
+
80
+ def on_success
81
+ notify(:success)
82
+ end
83
+
84
+ protected
85
+
86
+ def notify(event, options={})
87
+ Notifier.push self, event, options
88
+ end
89
+
90
+ def analyze position
91
+ state = position.state
92
+ on_analyze position, state
93
+
94
+ unless state == :unmodified
95
+ new_position = fetch_safe! position
96
+
97
+ unless new_position
98
+ on_fetch position, state, false
99
+ else
100
+ position = new_position
101
+
102
+ valid = new_position.valid?
103
+ state = position.save if valid
104
+ on_fetch position, state, valid
105
+ end
106
+ end
107
+ rescue Exception => exception
108
+ on_failure exception
109
+ end
110
+
111
+ def fetch_safe!(position)
112
+ fetch position
113
+ rescue Exception => exception
114
+ on_failure exception
115
+ false
116
+ end
117
+ end
118
+ end
119
+ end