dca 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. data/.document +5 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +48 -0
  4. data/Gemfile.lock +126 -0
  5. data/Guardfile +8 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.rdoc +19 -0
  8. data/Rakefile +48 -0
  9. data/VERSION +1 -0
  10. data/bin/dca +5 -0
  11. data/dca.gemspec +160 -0
  12. data/lib/dca.rb +64 -0
  13. data/lib/dca/cli.rb +32 -0
  14. data/lib/dca/commands/area.rb +133 -0
  15. data/lib/dca/commands/templates/area/analyzer.rb.erb +34 -0
  16. data/lib/dca/commands/templates/area/area.rb.erb +2 -0
  17. data/lib/dca/commands/templates/area/models.rb.erb +2 -0
  18. data/lib/dca/commands/templates/area/page.rb.erb +17 -0
  19. data/lib/dca/commands/templates/area/position.rb.erb +8 -0
  20. data/lib/dca/commands/templates/config.yml.erb +38 -0
  21. data/lib/dca/commands/templates/spec/analyzer_spec.rb.erb +15 -0
  22. data/lib/dca/commands/templates/spec/spec_helper.rb.erb +2 -0
  23. data/lib/dca/config.rb +20 -0
  24. data/lib/dca/helpers.rb +2 -0
  25. data/lib/dca/helpers/logger.rb +50 -0
  26. data/lib/dca/jobs.rb +3 -0
  27. data/lib/dca/jobs/analyzer_job.rb +119 -0
  28. data/lib/dca/jobs/job.rb +62 -0
  29. data/lib/dca/models.rb +5 -0
  30. data/lib/dca/models/base_model.rb +73 -0
  31. data/lib/dca/models/binder.rb +68 -0
  32. data/lib/dca/models/binder_helper.rb +48 -0
  33. data/lib/dca/models/nokogiri_binder.rb +43 -0
  34. data/lib/dca/models/position.rb +15 -0
  35. data/lib/dca/net.rb +1 -0
  36. data/lib/dca/net/browser_helper.rb +20 -0
  37. data/lib/dca/notifier.rb +2 -0
  38. data/lib/dca/notifier/notifier.rb +11 -0
  39. data/lib/dca/notifier/redis/models/analyze_notify.rb +12 -0
  40. data/lib/dca/notifier/redis/models/failure_notify.rb +8 -0
  41. data/lib/dca/notifier/redis/models/fetch_notify.rb +15 -0
  42. data/lib/dca/notifier/redis/models/session.rb +52 -0
  43. data/lib/dca/notifier/redis/notifier.rb +25 -0
  44. data/lib/dca/notifier/redis_notifier.rb +9 -0
  45. data/lib/dca/storage.rb +3 -0
  46. data/lib/dca/storage/elasticsearch_storage.rb +80 -0
  47. data/lib/dca/storage/mongo_storage.rb +51 -0
  48. data/lib/dca/storage/storage.rb +55 -0
  49. data/spec/analyzer_spec.rb +64 -0
  50. data/spec/area_task_spec.rb +45 -0
  51. data/spec/base_model_spec.rb +34 -0
  52. data/spec/binder_spec.rb +69 -0
  53. data/spec/config.yml +18 -0
  54. data/spec/elasticsearch_storage_spec.rb +28 -0
  55. data/spec/fixtures/page.html +12 -0
  56. data/spec/fixtures/positions.yml +13 -0
  57. data/spec/fixtures/positions_with_error.yml +14 -0
  58. data/spec/fixtures/states.yml +3 -0
  59. data/spec/job_spec.rb +31 -0
  60. data/spec/mock/analyzer_job.rb +30 -0
  61. data/spec/mock/file_storage.rb +28 -0
  62. data/spec/mock/notify_object.rb +13 -0
  63. data/spec/mock/page.rb +13 -0
  64. data/spec/mock/position.rb +40 -0
  65. data/spec/mock/web_notifier.rb +30 -0
  66. data/spec/mongo_storage_spec.rb +20 -0
  67. data/spec/redis_notifier_spec.rb +98 -0
  68. data/spec/spec_helper.rb +27 -0
  69. data/spec/support/storage_examples.rb +103 -0
  70. metadata +408 -0
@@ -0,0 +1,18 @@
1
+ test:
2
+ elascticseach_db:
3
+ driver: ElasticSearch
4
+ host: localhost
5
+ port: 9200
6
+ index: test
7
+ mongo_db:
8
+ driver: Mongo
9
+ host: localhost
10
+ port: 27017
11
+ collection: test
12
+ db:
13
+ driver: Mock::File
14
+ notifier:
15
+ driver: Mock::Web
16
+ logger: false
17
+
18
+
@@ -0,0 +1,28 @@
1
+ require File.expand_path('../spec_helper', __FILE__)
2
+ require File.expand_path('../mock/position', __FILE__)
3
+
4
+ include DCA::Mock
5
+
6
+ describe 'ElasticSearch storage' do
7
+ let(:connection) { @connection ||= DCA::ElasticSearchStorage.establish_connection APP_CONFIG[:elascticseach_db] }
8
+ let(:position) { ElasticSearchPosition.new :base_id => '0', :checksum => '0'}
9
+ let(:storage) { @storage ||= DCA::ElasticSearchStorage.new connection, position.class, :index => 'test' }
10
+
11
+ before :all do
12
+ connection
13
+ storage.index do
14
+ create
15
+ store :type => 'position', :base_id => '1', :checksum => '1'
16
+ refresh
17
+ end
18
+ end
19
+
20
+ after :all do
21
+ storage.index do
22
+ delete
23
+ refresh
24
+ end
25
+ end
26
+
27
+ it_behaves_like 'storage'
28
+ end
@@ -0,0 +1,12 @@
1
+ <html>
2
+ <head>
3
+ <title>Page</title>
4
+ </head>
5
+ <body>
6
+ <ul>
7
+ <li><a href="/positions/1">Position 1</a><span class="description">Description 1</span><span class="date">12.10.2012</span></li>
8
+ <li><a href="/positions/2">Position 2</a><span class="description">Description 2</span><span class="date">13.10.2012</span></li>
9
+ <li><a href="/positions/3">Position 3</a><span class="description">Description 3</span><span class="date">14.10.2012</span></li>
10
+ </ul>
11
+ </body>
12
+ </html>
@@ -0,0 +1,13 @@
1
+ ---
2
+ - id: 1
3
+ checksum: '1'
4
+ failed: false
5
+ base_id: 1
6
+ - id: 2
7
+ checksum: '2'
8
+ failed: true
9
+ base_id: 2
10
+ - id: 3
11
+ checksum: '2'
12
+ failed: false
13
+ base_id: 3
@@ -0,0 +1,14 @@
1
+ ---
2
+ - id: 1
3
+ checksum: '1'
4
+ failed: false
5
+ base_id: 1
6
+ - id: 2
7
+ checksum: '2'
8
+ failed: true
9
+ raise: true
10
+ base_id: 2
11
+ - id: 3
12
+ checksum: '2'
13
+ failed: false
14
+ base_id: 3
@@ -0,0 +1,3 @@
1
+ 1: create
2
+ 2: create
3
+ 3: unmodified
@@ -0,0 +1,31 @@
1
+ require 'timeout'
2
+ require File.expand_path('../spec_helper', __FILE__)
3
+
4
+ module TestModule
5
+ class TestJob < DCA::Jobs::Job
6
+ end
7
+
8
+ class LoopJob < DCA::Jobs::Job
9
+ def perform
10
+ loop do
11
+ sleep 1
12
+ break if shutdown?
13
+ end
14
+ end
15
+ end
16
+ end
17
+
18
+ describe 'Job' do
19
+ it 'should get queue name from module name' do
20
+ TestModule::TestJob.queue.should == 'TestModule'
21
+ end
22
+
23
+ it 'should shutdown when QUIT signal is happened' do
24
+ job_pid = Process.fork { TestModule::LoopJob.create }
25
+ sleep 1
26
+ Process.kill 'QUIT', job_pid
27
+ Timeout::timeout(1) {
28
+ Process.waitpid job_pid
29
+ }
30
+ end
31
+ end
@@ -0,0 +1,30 @@
1
+ module DCA
2
+ module Mock
3
+ class AnalyzerJob < DCA::Jobs::AnalyzerJob
4
+
5
+ def positions(&block)
6
+ positions = YAML.load_file(options[:fixture])
7
+ positions.each { |position| block.call Position.new position.symbolize_keys }
8
+ end
9
+
10
+ def fetch(position)
11
+ raise Exception if position.raise
12
+ position.failed ? nil : position
13
+ end
14
+ end
15
+ end
16
+
17
+ module Areas
18
+ module Mock
19
+ class AnalyzerJob < DCA::Jobs::AnalyzerJob
20
+ def perform
21
+ loop do
22
+ break if shutdown?
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+
@@ -0,0 +1,28 @@
1
+ module DCA
2
+ module Mock
3
+ class FileStorage
4
+
5
+ attr_reader :collection
6
+
7
+ def initialize(connection, context, options = {})
8
+ @collection = DCA.project_name
9
+ @states = YAML.load_file('./spec/fixtures/states.yml')
10
+ end
11
+
12
+ def self.establish_connection(config)
13
+ end
14
+
15
+ def state(position)
16
+ @states[position.id].to_sym
17
+ end
18
+
19
+ def refresh(position, state)
20
+
21
+ end
22
+
23
+ def context object
24
+ self.clone
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,13 @@
1
+ module DCA
2
+ module Mock
3
+ class NotifyObject
4
+ def session
5
+ 'test_session'
6
+ end
7
+
8
+ def self.queue
9
+ 'test_queue'
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ module DCA
2
+ module Mock
3
+ class Page < DCA::Models::BaseModel
4
+ has_many :positions, ExtPosition, :selector => 'li'
5
+ end
6
+
7
+
8
+ class PageExt < DCA::Models::BaseModel
9
+ attr_accessor :category
10
+ has_many :positions, :selector => 'li', :polymorphic => :category, :append => true
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,40 @@
1
+ module DCA
2
+ module Mock
3
+ class MongoSearchPosition < DCA::Models::Position
4
+ establish_connection :mongo_db
5
+ end
6
+
7
+ class ElasticSearchPosition < DCA::Models::Position
8
+ establish_connection :elascticseach_db
9
+ end
10
+
11
+ class Position < DCA::Models::Position
12
+ attr_accessor :raise, :failed, :title
13
+
14
+ has_one :base_id, :integer, :selector => 'a', :attribute => :href, :regex => /(\d+)$/
15
+ has_one :title, :string, :selector => 'a'
16
+ end
17
+
18
+ class ExtPosition < Position
19
+ has_one :description, :string, :selector => 'span.description'
20
+ has_one :date, :datetime, :selector => 'span.date'
21
+ end
22
+
23
+ class FullPosition < Position
24
+ has_one :base_id, :string, :selector => 'a'
25
+ end
26
+
27
+ class ChildPosition < DCA::Models::Position
28
+ attr_reader :name, :test
29
+ end
30
+
31
+ class RootPosition < DCA::Models::Position
32
+ has_one :one_child, :child_position
33
+ has_many :child_position
34
+ end
35
+
36
+ class PositionWithoutState < DCA::Models::BaseModel
37
+ attr_reader :name
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,30 @@
1
+ module DCA
2
+ module Mock
3
+ class WebNotifier
4
+ def initialize config
5
+
6
+ end
7
+
8
+ def self.queue
9
+ @queue ||= {}
10
+ end
11
+
12
+ def self.clean
13
+ @queue = {}
14
+ end
15
+
16
+ def push(object, event, options)
17
+ if event == :fetch && options[:result] == false
18
+ failed_queue = Mock::WebNotifier.queue[:failed] ||= {}
19
+ failed_queue[options[:state]] ||= 0
20
+ failed_queue[options[:state]] += 1
21
+ end
22
+ if [:analyze, :fetch].include? event
23
+ queue = Mock::WebNotifier.queue[event] ||= {}
24
+ queue[options[:state]] ||= 0
25
+ queue[options[:state]] += 1
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,20 @@
1
+ require File.expand_path('../spec_helper', __FILE__)
2
+ require File.expand_path('../mock/position', __FILE__)
3
+
4
+ include DCA::Mock
5
+
6
+ describe DCA::MongoStorage do
7
+ let(:connection) { @connection ||= DCA::MongoStorage.establish_connection APP_CONFIG[:mongo_db] }
8
+ let(:position) { MongoSearchPosition.new :base_id => '0', :checksum => '0'}
9
+ let(:storage) { @storage ||= DCA::MongoStorage.new connection, position.class, :collection => 'test' }
10
+
11
+ before :all do
12
+ connection
13
+ end
14
+
15
+ after :all do
16
+ connection.drop_database storage.database.name
17
+ end
18
+
19
+ it_behaves_like 'storage'
20
+ end
@@ -0,0 +1,98 @@
1
+ require File.expand_path('../spec_helper', __FILE__)
2
+ require File.expand_path('../mock/notify_object', __FILE__)
3
+ require File.expand_path('../mock/position', __FILE__)
4
+ require File.expand_path('../mock/file_storage', __FILE__)
5
+ require File.expand_path('../mock/analyzer_job', __FILE__)
6
+
7
+ include DCA
8
+
9
+ describe 'Redis Notifier' do
10
+
11
+ it 'should connect to redis' do
12
+ DCA::Notifier.create :driver => 'Redis', :host => 'localhost', :port => '6379'
13
+ Ohm.redis.info
14
+ end
15
+
16
+ describe 'Instance' do
17
+ before :all do
18
+ DCA::Notifier.create :driver => 'Redis', :host => 'localhost', :port => '6379'
19
+ end
20
+
21
+ before :each do
22
+ DCA::Redis::Session.all.each { |session| session.delete }
23
+ end
24
+
25
+ it 'should push analyze notify' do
26
+ notify_object = Mock::NotifyObject.new
27
+ DCA::Notifier.push notify_object, :analyze, :state => :create
28
+ session = DCA::Redis::Session.find(:project => 'DCA', :area => 'test_queue', :uid => 'test_session').first
29
+ session.analyze_state(:create).count.should equal 1
30
+
31
+ DCA::Notifier.push notify_object, :analyze, :state => :create
32
+ session.analyze_state(:create).count.should equal 2
33
+ end
34
+
35
+ it 'should push analyze notify with different state' do
36
+ notify_object = Mock::NotifyObject.new
37
+ DCA::Notifier.push notify_object, :analyze, :state => :create
38
+ DCA::Notifier.push notify_object, :analyze, :state => :update
39
+ DCA::Notifier.push notify_object, :analyze, :state => :remove
40
+ DCA::Notifier.push notify_object, :analyze, :state => :unmodified
41
+
42
+ session = DCA::Redis::Session.find(:project => 'DCA', :area => 'test_queue', :uid => 'test_session').first
43
+ session.analyze_state(:create).count.should equal 1
44
+ session.analyze_state(:update).count.should equal 1
45
+ session.analyze_state(:remove).count.should equal 1
46
+ session.analyze_state(:unmodified).count.should equal 1
47
+ end
48
+
49
+ it 'should push fetch notify' do
50
+ notify_object = Mock::NotifyObject.new
51
+ DCA::Notifier.push notify_object, :fetch, :state => :create, :result => true
52
+ session = DCA::Redis::Session.find(:project => 'DCA', :area => 'test_queue', :uid => 'test_session').first
53
+ session.fetch_state(:create).success.should equal 1
54
+
55
+ DCA::Notifier.push notify_object, :fetch, :state => :create, :result => true
56
+ session.fetch_state(:create).success.should equal 2
57
+
58
+ DCA::Notifier.push notify_object, :fetch, :state => :create, :result => false
59
+ session.fetch_state(:create).failure.should equal 1
60
+
61
+ DCA::Notifier.push notify_object, :fetch, :state => :create, :result => false
62
+ session.fetch_state(:create).failure.should equal 2
63
+ end
64
+
65
+ it 'should push analyze notify with different state' do
66
+ notify_object = Mock::NotifyObject.new
67
+ DCA::Notifier.push notify_object, :fetch, :state => :create, :result => true
68
+ DCA::Notifier.push notify_object, :fetch, :state => :update, :result => true
69
+
70
+ session = DCA::Redis::Session.find(:project => 'DCA', :area => 'test_queue', :uid => 'test_session').first
71
+ session.fetch_state(:create).success.should equal 1
72
+ session.fetch_state(:update).success.should equal 1
73
+ end
74
+
75
+ it 'should notify failure' do
76
+ notify_object = Mock::NotifyObject.new
77
+
78
+ begin
79
+ raise "Test exception"
80
+ rescue Exception => exception
81
+ DCA::Notifier.push notify_object, :failure, :exception => exception
82
+ end
83
+
84
+ session = DCA::Redis::Session.find(:project => 'DCA', :area => 'test_queue', :uid => 'test_session').first
85
+ session.failures.count.should equal 1
86
+ end
87
+
88
+ it 'should work with analyze job' do
89
+ Mock::AnalyzerJob.create :fixture => './spec/fixtures/positions.yml'
90
+ session = DCA::Redis::Session.all.first
91
+ session.analyze_state(:create).count.should equal 2
92
+ session.analyze_state(:unmodified).count.should equal 1
93
+
94
+ session.fetch_state(:create).success.should equal 1
95
+ session.fetch_state(:create).failure.should equal 1
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,27 @@
1
+ require 'simplecov'
2
+ SimpleCov.start
3
+
4
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
5
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
6
+
7
+ require 'yaml'
8
+ require 'rake'
9
+ require 'rspec'
10
+ require 'hashr'
11
+
12
+ SYS_ENV = 'test'
13
+ APP_CONFIG = YAML.load_file('./spec/config.yml')[SYS_ENV].deep_symbolize_keys
14
+
15
+ require './lib/dca'
16
+
17
+
18
+ # Set resque call perform method inline, without putting into redis queue
19
+ Resque.inline = true
20
+
21
+ # Requires supporting files with custom matchers and macros, etc,
22
+ # in ./support/ and its subdirectories.
23
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
24
+
25
+ RSpec.configure do |config|
26
+
27
+ end
@@ -0,0 +1,103 @@
1
+ shared_examples_for 'storage' do
2
+ context '.establish_connection' do
3
+ it 'should connect to storage' do
4
+ storage.should_not be_nil
5
+ end
6
+ end
7
+
8
+ describe '#state' do
9
+ context 'when new position' do
10
+ subject { position.state }
11
+ it { should equal :create}
12
+ end
13
+
14
+ context 'when modify position' do
15
+ before do
16
+ position.save
17
+ position.checksum = 1
18
+ end
19
+
20
+ after { position.destroy }
21
+
22
+ subject { position.state }
23
+ it { should equal :update}
24
+ end
25
+
26
+ context 'when exist position' do
27
+ before { position.save }
28
+ after { position.destroy }
29
+ subject { position.state }
30
+ it { should equal :unmodified}
31
+ end
32
+
33
+ context 'when position without state' do
34
+ let(:position) { PositionWithoutState.new name: 'test'}
35
+ subject { position.state }
36
+ it { should equal :create}
37
+ end
38
+ end
39
+
40
+ describe '#refresh' do
41
+ def refresh state
42
+ storage.should_receive(state).with(position)
43
+ storage.refresh(position, state)
44
+ end
45
+
46
+ context 'when new position' do
47
+ it 'then create it' do
48
+ refresh :create
49
+ end
50
+ end
51
+
52
+ context 'when modify position' do
53
+ it 'then update it' do
54
+ refresh :update
55
+ end
56
+ end
57
+
58
+ context 'when old position' do
59
+ it 'then delete it' do
60
+ refresh :remove
61
+ end
62
+ end
63
+ end
64
+
65
+ describe '#create' do
66
+ before { position.save }
67
+ after { position.destroy }
68
+
69
+ it 'sould create position' do
70
+ storage.find(position).should_not be_nil
71
+ end
72
+
73
+ it 'set position id' do
74
+ position.id.should_not be_nil
75
+ end
76
+ end
77
+
78
+ describe '#update' do
79
+ before do
80
+ position.save
81
+ position.checksum = '1'
82
+ position.save
83
+ end
84
+ after { position.destroy }
85
+
86
+ it 'should update position' do
87
+ storage.find(position)['checksum'].should eql '1'
88
+ end
89
+ end
90
+
91
+ describe '#remove' do
92
+ before do
93
+ position.save
94
+ position.destroy
95
+ end
96
+
97
+ after { position.destroy }
98
+
99
+ it 'should remove position' do
100
+ storage.find(position).should be_nil
101
+ end
102
+ end
103
+ end