elasticrawl 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,5 +4,35 @@ module Elasticrawl
4
4
  class CrawlSegment < ActiveRecord::Base
5
5
  belongs_to :crawl
6
6
  has_many :job_steps
7
+
8
+ # Description shows name and number of files in the segment.
9
+ def segment_desc
10
+ "Segment: #{segment_name} Files: #{file_count}"
11
+ end
12
+
13
+ # Creates a crawl segment based on its S3 path if it does not exist.
14
+ def self.create_segment(crawl, segment_name, file_count)
15
+ s3_uri = build_s3_uri(crawl.crawl_name, segment_name)
16
+
17
+ segment = CrawlSegment.where(:crawl_id => crawl.id,
18
+ :segment_name => segment_name,
19
+ :segment_s3_uri => s3_uri,
20
+ :file_count => file_count).first_or_create
21
+ end
22
+
23
+ private
24
+ # Generates the S3 location where this segment is stored.
25
+ def self.build_s3_uri(crawl_name, segment_name)
26
+ s3_path = ['',
27
+ Elasticrawl::COMMON_CRAWL_PATH,
28
+ crawl_name,
29
+ Elasticrawl::SEGMENTS_PATH,
30
+ segment_name,
31
+ '']
32
+
33
+ URI::Generic.build(:scheme => 's3',
34
+ :host => Elasticrawl::COMMON_CRAWL_BUCKET,
35
+ :path => s3_path.join('/'))
36
+ end
7
37
  end
8
38
  end
@@ -7,19 +7,26 @@ module Elasticrawl
7
7
  # Elastic MapReduce job flow and cluster.
8
8
  def confirm_message
9
9
  cluster = Cluster.new
10
- message = []
11
10
 
12
- message[0] = 'Job configuration'
13
- message[1] = self.job_desc
14
- message[2] = ''
15
- message[3] = cluster.cluster_desc
11
+ case self.type
12
+ when 'Elasticrawl::ParseJob'
13
+ message = segment_list
14
+ else
15
+ message = []
16
+ end
17
+
18
+ message.push('Job configuration')
19
+ message.push(self.job_desc)
20
+ message.push('')
21
+ message.push(cluster.cluster_desc)
22
+
16
23
  message.join("\n")
17
24
  end
18
25
 
19
26
  # Displays the Job Name and Elastic MapReduce Job Flow ID if the job was
20
27
  # launched successfully.
21
28
  def result_message
22
- "\nJob Name: #{self.job_name} Job Flow ID: #{self.job_flow_id}"
29
+ "\nJob: #{self.job_name} Job Flow ID: #{self.job_flow_id}"
23
30
  end
24
31
 
25
32
  # Displays the history of the current job. Called by the status command.
@@ -13,7 +13,7 @@ module Elasticrawl
13
13
  # data files to process before the job exits.
14
14
  def job_flow_step(job_config)
15
15
  jar = job_config['jar']
16
- max_files = self.job.max_files
16
+ max_files = self.job.max_files
17
17
 
18
18
  step_args = []
19
19
  step_args[0] = job_config['class']
@@ -35,8 +35,10 @@ module Elasticrawl
35
35
  def set_step_name
36
36
  case self.job.type
37
37
  when 'Elasticrawl::ParseJob'
38
- segment =self.crawl_segment.segment_name if self.crawl_segment.present?
39
- "Segment: #{segment}"
38
+ if self.crawl_segment.present?
39
+ max_files = self.job.max_files || 'all'
40
+ "#{self.crawl_segment.segment_desc} Parsing: #{max_files}"
41
+ end
40
42
  when 'Elasticrawl::CombineJob'
41
43
  paths = self.input_paths.split(',')
42
44
  "Combining #{paths.count} jobs"
@@ -41,6 +41,20 @@ module Elasticrawl
41
41
  build_s3_uri(s3_path)
42
42
  end
43
43
 
44
+ # Return list of segment descriptions.
45
+ def segment_list
46
+ segments = ['Segments']
47
+
48
+ job_steps.each do |job_step|
49
+ if job_step.crawl_segment.present?
50
+ segment = job_step.crawl_segment
51
+ segments.push(segment.segment_desc)
52
+ end
53
+ end
54
+
55
+ segments.push('')
56
+ end
57
+
44
58
  private
45
59
  # Creates a job step for the crawl segment.
46
60
  def create_job_step(segment)
@@ -1,3 +1,3 @@
1
1
  module Elasticrawl
2
- VERSION = "1.0.0"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -0,0 +1,6 @@
1
+ common-crawl/crawl-data/CC-MAIN-2014-49/segments/1416400372202.67/warc/CC-MAIN-20141119123252-00001-ip-10-235-23-156.ec2.internal.warc.gz
2
+ common-crawl/crawl-data/CC-MAIN-2014-49/segments/1416400372202.67/warc/CC-MAIN-20141119123252-00002-ip-10-235-23-156.ec2.internal.warc.gz
3
+ common-crawl/crawl-data/CC-MAIN-2014-49/segments/1416400372202.67/warc/CC-MAIN-20141119123252-00003-ip-10-235-23-156.ec2.internal.warc.gz
4
+ common-crawl/crawl-data/CC-MAIN-2014-49/segments/1416400372490.23/warc/CC-MAIN-20141119123252-00000-ip-10-235-23-156.ec2.internal.warc.gz
5
+ common-crawl/crawl-data/CC-MAIN-2014-49/segments/1416400372490.23/warc/CC-MAIN-20141119123252-00002-ip-10-235-23-156.ec2.internal.warc.gz
6
+ common-crawl/crawl-data/CC-MAIN-2014-49/segments/1416400372542.20/warc/CC-MAIN-20141119123252-00000-ip-10-235-23-156.ec2.internal.warc.gz
data/spec/spec_helper.rb CHANGED
@@ -1,31 +1,25 @@
1
1
  require 'elasticrawl'
2
2
  require 'rspec'
3
- require 'mocha'
4
3
  require 'database_cleaner'
5
4
  require 'shoulda-matchers'
6
5
 
7
6
  RSpec.configure do |config|
8
- config.before(:suite) do
9
- # Return S3 paths that are used to create a crawl object with 3 crawl segments.
10
- segment_paths = []
11
- segment_paths[0] = 'common-crawl/crawl-data/CC-MAIN-2013-20/segments/1368696381249/'
12
- segment_paths[1] = 'common-crawl/crawl-data/CC-MAIN-2013-20/segments/1368696381630/'
13
- segment_paths[2] = 'common-crawl/crawl-data/CC-MAIN-2013-20/segments/1368696382185/'
14
- Elasticrawl::Crawl.any_instance.stubs(:s3_segment_paths).returns(segment_paths)
7
+ # Run each test in a transaction and rollback data on completion.
8
+ DatabaseCleaner.strategy = :transaction
9
+
10
+ config.before(:each) do
11
+ # Stub S3 call to get WARC file paths
12
+ warc_paths = IO.read(File.join(File.dirname(__FILE__), 'fixtures', 'warc.paths'))
13
+ allow_any_instance_of(Elasticrawl::Crawl).to receive(:warc_paths).and_return(warc_paths)
15
14
 
16
15
  # Load config from spec/fixtures/ rather than ~/.elasticrawl/
17
16
  config_dir = File.join(File.dirname(__FILE__), 'fixtures')
18
- Elasticrawl::Config.any_instance.stubs(:config_dir).returns(config_dir)
17
+ allow_any_instance_of(Elasticrawl::Config).to receive(:config_dir).and_return(config_dir)
19
18
 
20
19
  # Load sqlite database. For testing this is stored at db/elasticrawl.sqlite3
21
20
  config = Elasticrawl::Config.new
22
21
  config.load_database
23
- end
24
22
 
25
- # Run each test in a transaction and rollback data on completion.
26
- DatabaseCleaner.strategy = :transaction
27
-
28
- config.before(:each) do
29
23
  DatabaseCleaner.start
30
24
  end
31
25
 
@@ -2,7 +2,7 @@ require 'spec_helper'
2
2
 
3
3
  describe Elasticrawl::Cluster do
4
4
  describe '#create_job_flow' do
5
- let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2013-20') }
5
+ let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2014-49') }
6
6
  let(:job) { Elasticrawl::ParseJob.new }
7
7
  let(:cluster) { Elasticrawl::Cluster.new }
8
8
  subject { cluster.create_job_flow(job) }
@@ -16,7 +16,7 @@ describe Elasticrawl::Cluster do
16
16
  end
17
17
 
18
18
  it 'should have a job flow name' do
19
- expect(subject.name).to eq "Job Name: #{job.job_name} #{job.job_desc}"
19
+ expect(subject.name).to eq "Job: #{job.job_name} #{job.job_desc}"
20
20
  end
21
21
 
22
22
  it 'should have a log uri' do
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Elasticrawl::CombineJob do
4
4
  describe '#set_input_jobs' do
5
5
  let(:job_name) { (Time.now.to_f * 1000).to_i.to_s }
6
- let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2013-20') }
6
+ let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2014-49') }
7
7
  let(:segment_list_1) { crawl.crawl_segments[0..1] }
8
8
  let(:segment_list_2) { [crawl.crawl_segments[2]]}
9
9
 
@@ -55,7 +55,7 @@ describe Elasticrawl::CombineJob do
55
55
  end
56
56
 
57
57
  describe '#run' do
58
- let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2013-20') }
58
+ let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2014-49') }
59
59
  let(:parse_job_1) { Elasticrawl::ParseJob.new }
60
60
  let(:parse_job_2) { Elasticrawl::ParseJob.new }
61
61
  let(:combine_job) { Elasticrawl::CombineJob.new }
@@ -71,7 +71,7 @@ describe Elasticrawl::CombineJob do
71
71
  end
72
72
 
73
73
  it 'should set a job flow id' do
74
- Elasticity::JobFlow.any_instance.stubs(:run).returns(job_flow_id)
74
+ allow_any_instance_of(Elasticity::JobFlow).to receive(:run).and_return(job_flow_id)
75
75
  combine_job.run
76
76
 
77
77
  expect(combine_job.job_flow_id).to eq job_flow_id
@@ -79,7 +79,7 @@ describe Elasticrawl::CombineJob do
79
79
  end
80
80
 
81
81
  describe '#log_uri' do
82
- let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2013-20') }
82
+ let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2014-49') }
83
83
  let(:parse_job) { Elasticrawl::ParseJob.new }
84
84
  let(:job) { Elasticrawl::CombineJob.new }
85
85
 
@@ -6,22 +6,31 @@ describe Elasticrawl::CrawlSegment do
6
6
  it { should have_db_column(:segment_name).of_type(:string) }
7
7
  it { should have_db_column(:segment_s3_uri).of_type(:string) }
8
8
  it { should have_db_column(:parse_time).of_type(:datetime) }
9
+ it { should have_db_column(:file_count).of_type(:integer) }
9
10
 
10
- describe '#initialize' do
11
- let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2013-20') }
12
- subject { crawl.crawl_segments[0]}
13
-
14
- before do
15
- crawl.create_segments
16
- end
17
-
11
+ describe '.create_segment' do
12
+ let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2014-49') }
13
+ let(:segment_name) { '1416400372202.67' }
14
+ let(:file_count) { 3 }
15
+ let(:segment_desc) { 'Segment: 1416400372202.67 Files: 3' }
16
+ subject { Elasticrawl::CrawlSegment.create_segment(crawl,
17
+ segment_name,
18
+ file_count) }
18
19
  it 'should have a segment name' do
19
- expect(subject.segment_name).to eq '1368696381249'
20
+ expect(subject.segment_name).to eq segment_name
20
21
  end
21
22
 
22
23
  it 'should have an s3 uri' do
23
24
  expect(subject.segment_s3_uri).to eq \
24
- 's3://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-20/segments/1368696381249/'
25
+ "s3://aws-publicdatasets/common-crawl/crawl-data/#{crawl.crawl_name}/segments/#{segment_name}/"
26
+ end
27
+
28
+ it 'should have a file count' do
29
+ expect(subject.file_count).to eq file_count
30
+ end
31
+
32
+ it 'should have a segment description' do
33
+ expect(subject.segment_desc).to eq segment_desc
25
34
  end
26
35
  end
27
36
  end
@@ -5,7 +5,7 @@ describe Elasticrawl::Crawl do
5
5
  it { should have_db_column(:crawl_name).of_type(:string) }
6
6
 
7
7
  describe '#has_segments?' do
8
- let(:crawl_name) { 'CC-MAIN-2013-20' }
8
+ let(:crawl_name) { 'CC-MAIN-2014-49' }
9
9
  subject { Elasticrawl::Crawl.new(:crawl_name => crawl_name) }
10
10
 
11
11
  it 'should have segments' do
@@ -14,7 +14,7 @@ describe Elasticrawl::Crawl do
14
14
  end
15
15
 
16
16
  describe '#create_segments' do
17
- let(:crawl_name) { 'CC-MAIN-2013-20' }
17
+ let(:crawl_name) { 'CC-MAIN-2014-49' }
18
18
  subject { Elasticrawl::Crawl.create(:crawl_name => crawl_name) }
19
19
 
20
20
  before do
@@ -30,17 +30,21 @@ describe Elasticrawl::Crawl do
30
30
  end
31
31
 
32
32
  it 'should create segment names' do
33
- expect(subject.crawl_segments[0].segment_name).to eq '1368696381249'
33
+ expect(subject.crawl_segments[0].segment_name).to eq '1416400372202.67'
34
34
  end
35
35
 
36
36
  it 'should create segment s3 uris' do
37
37
  expect(subject.crawl_segments[0].segment_s3_uri).to eq \
38
- 's3://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-20/segments/1368696381249/'
38
+ 's3://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2014-49/segments/1416400372202.67/'
39
+ end
40
+
41
+ it 'should set file counts' do
42
+ expect(subject.crawl_segments[0].file_count).to eq 3
39
43
  end
40
44
  end
41
45
 
42
46
  describe '#next_segments' do
43
- let(:crawl_name) { 'CC-MAIN-2013-20' }
47
+ let(:crawl_name) { 'CC-MAIN-2014-49' }
44
48
  subject { Elasticrawl::Crawl.create(:crawl_name => crawl_name) }
45
49
 
46
50
  before do
@@ -52,7 +56,7 @@ describe Elasticrawl::Crawl do
52
56
 
53
57
  expect(crawl_segments.count).to eq 3
54
58
  expect(crawl_segments[0].crawl.crawl_name).to eq crawl_name
55
- expect(crawl_segments[0].segment_name).to eq '1368696381249'
59
+ expect(crawl_segments[0].segment_name).to eq '1416400372202.67'
56
60
  end
57
61
 
58
62
  it 'should return first # segments' do
@@ -60,12 +64,12 @@ describe Elasticrawl::Crawl do
60
64
 
61
65
  expect(crawl_segments.count).to eq 2
62
66
  expect(crawl_segments[0].crawl.crawl_name).to eq crawl_name
63
- expect(crawl_segments[0].segment_name).to eq '1368696381249'
67
+ expect(crawl_segments[0].segment_name).to eq '1416400372202.67'
64
68
  end
65
69
  end
66
70
 
67
71
  describe '#select_segments' do
68
- let(:crawl_name) { 'CC-MAIN-2013-20' }
72
+ let(:crawl_name) { 'CC-MAIN-2014-49' }
69
73
  subject { Elasticrawl::Crawl.create(:crawl_name => crawl_name) }
70
74
 
71
75
  before do
@@ -80,7 +84,7 @@ describe Elasticrawl::Crawl do
80
84
  end
81
85
 
82
86
  it 'should select only segments in list' do
83
- segments_list = ['1368696381249', '1368696382185']
87
+ segments_list = ['1416400372202.67', '1416400372490.23']
84
88
  crawl_segments = subject.select_segments(segments_list)
85
89
 
86
90
  expect(crawl_segments.count).to eq 2
@@ -88,7 +92,7 @@ describe Elasticrawl::Crawl do
88
92
  end
89
93
 
90
94
  describe '#reset' do
91
- let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2013-20') }
95
+ let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2014-49') }
92
96
  let(:job) { Elasticrawl::ParseJob.new }
93
97
  let(:job_flow_id) { 'j-3QHDKKBT6VAIS' }
94
98
 
@@ -96,7 +100,7 @@ describe Elasticrawl::Crawl do
96
100
  crawl.create_segments
97
101
  job.set_segments(crawl.crawl_segments[0..1])
98
102
 
99
- Elasticity::JobFlow.any_instance.stubs(:run).returns(job_flow_id)
103
+ allow_any_instance_of(Elasticity::JobFlow).to receive(:run).and_return(job_flow_id)
100
104
  job.run
101
105
 
102
106
  crawl.reset
@@ -109,9 +113,9 @@ describe Elasticrawl::Crawl do
109
113
  end
110
114
 
111
115
  describe '.status' do
112
- let(:job_desc) { 'Crawl: CC-MAIN-2013-20 Segments: 2 Parsing: 5 files per segment' }
113
- let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2013-20') }
114
- let(:max_files) { 5 }
116
+ let(:job_desc) { 'Crawl: CC-MAIN-2014-49 Segments: 2 Parsing: 3 files per segment' }
117
+ let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2014-49') }
118
+ let(:max_files) { 3 }
115
119
  let(:job) { Elasticrawl::ParseJob.new }
116
120
  let(:job_flow_id) { 'j-3QHDKKBT6VAIS' }
117
121
 
@@ -119,17 +123,18 @@ describe Elasticrawl::Crawl do
119
123
  crawl.create_segments
120
124
  job.set_segments(crawl.crawl_segments[0..1], max_files)
121
125
 
122
- Elasticity::JobFlow.any_instance.stubs(:run).returns(job_flow_id)
126
+ allow_any_instance_of(Elasticity::JobFlow).to receive(:run).and_return(job_flow_id)
123
127
  job.run
124
128
  end
125
129
 
126
130
  it 'should display status of crawl segments' do
127
131
  expect(Elasticrawl::Crawl.status.split("\n")[1]).to eq \
128
- 'CC-MAIN-2013-20 Segments: to parse 1, parsed 2, total 3'
132
+ 'CC-MAIN-2014-49 Segments: to parse 1, parsed 2, total 3'
129
133
  end
130
134
 
131
135
  it 'should display parse job desc' do
132
136
  crawl_status = Elasticrawl::Crawl.status.split("\n")[4]
137
+
133
138
  expect(crawl_status.include?(job.job_name)).to eq true
134
139
  expect(crawl_status.include?(job.job_desc)).to eq true
135
140
  end
@@ -8,11 +8,11 @@ describe Elasticrawl::JobStep do
8
8
 
9
9
  describe '#job_flow_step' do
10
10
  let(:job) { Elasticrawl::ParseJob.create(:job_name => '1389789645620',
11
- :max_files => 5) }
12
- let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2013-20') }
11
+ :max_files => 3) }
12
+ let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2014-49') }
13
13
  let(:crawl_segment) { crawl.crawl_segments[0] }
14
14
  let(:input_paths) {
15
- 's3://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/*.warc.wet.gz'
15
+ 's3://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2014-49/segments/1368696381249/wet/*.warc.wet.gz'
16
16
  }
17
17
  let(:output_path) {
18
18
  's3://elasticrawl/data/1-parse/1389789645620/segments/1368696381249/'
@@ -54,7 +54,7 @@ describe Elasticrawl::JobStep do
54
54
  end
55
55
 
56
56
  it 'should have a max files arg' do
57
- expect(subject.arguments[3]).to eq '5'
57
+ expect(subject.arguments[3]).to eq '3'
58
58
  end
59
59
  end
60
60
  end
@@ -3,9 +3,9 @@ require 'spec_helper'
3
3
  describe Elasticrawl::ParseJob do
4
4
  describe '#set_segments' do
5
5
  let(:job_name) { (Time.now.to_f * 1000).to_i.to_s }
6
- let(:job_desc) { 'Crawl: CC-MAIN-2013-20 Segments: 2 Parsing: 5 files per segment' }
7
- let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2013-20') }
8
- let(:max_files) { 5 }
6
+ let(:job_desc) { 'Crawl: CC-MAIN-2014-49 Segments: 2 Parsing: 3 files per segment' }
7
+ let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2014-49') }
8
+ let(:max_files) { 3 }
9
9
  let(:parse_job) { Elasticrawl::ParseJob.new }
10
10
 
11
11
  before do
@@ -42,34 +42,40 @@ describe Elasticrawl::ParseJob do
42
42
  end
43
43
 
44
44
  describe '#confirm_message' do
45
- let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2013-20') }
45
+ let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2014-49') }
46
46
  let(:job) { Elasticrawl::ParseJob.new }
47
- let(:job_desc) { 'Crawl: CC-MAIN-2013-20 Segments: 3 Parsing: 5 files per segment' }
47
+ let(:job_desc) { 'Crawl: CC-MAIN-2014-49 Segments: 3 Parsing: 3 files per segment' }
48
+ let(:segment_desc) { 'Segment: 1416400372202.67 Files: 3' }
49
+
48
50
  let(:cluster_desc) {
49
- cluster_desc = <<-HERE
51
+ cluster_desc = <<-CLUSTER_DESC
50
52
  Cluster configuration
51
53
  Master: 1 m1.medium (Spot: 0.12)
52
54
  Core: 2 m1.medium (Spot: 0.12)
53
55
  Task: --
54
- HERE
56
+ CLUSTER_DESC
55
57
  }
56
58
 
57
59
  before do
58
60
  crawl.create_segments
59
- job.set_segments(crawl.crawl_segments[0..2], 5)
61
+ job.set_segments(crawl.crawl_segments[0..2], 3)
60
62
  end
61
63
 
62
64
  it 'should display message including job desc' do
63
65
  expect(job.confirm_message.include?(job_desc)).to eq true
64
66
  end
65
67
 
68
+ it 'should display message including segment desc' do
69
+ expect(job.confirm_message.include?(segment_desc)).to eq true
70
+ end
71
+
66
72
  it 'should display message including cluster desc' do
67
73
  expect(job.confirm_message.include?(cluster_desc)).to eq true
68
74
  end
69
75
  end
70
76
 
71
77
  describe '#run' do
72
- let(:crawl_name) { 'CC-MAIN-2013-20' }
78
+ let(:crawl_name) { 'CC-MAIN-2014-49' }
73
79
  let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => crawl_name) }
74
80
  let(:job) { Elasticrawl::ParseJob.new }
75
81
  let(:job_flow_id) { 'j-3QHDKKBT6VAIS' }
@@ -78,7 +84,7 @@ Task: --
78
84
  crawl.create_segments
79
85
  job.set_segments(crawl.crawl_segments[0..1], 5)
80
86
 
81
- Elasticity::JobFlow.any_instance.stubs(:run).returns(job_flow_id)
87
+ allow_any_instance_of(Elasticity::JobFlow).to receive(:run).and_return(job_flow_id)
82
88
  job.run
83
89
  end
84
90
 
@@ -94,7 +100,7 @@ Task: --
94
100
  end
95
101
 
96
102
  describe '#log_uri' do
97
- let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2013-20') }
103
+ let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2014-49') }
98
104
  let(:job) { Elasticrawl::ParseJob.new }
99
105
 
100
106
  before do
@@ -108,16 +114,16 @@ Task: --
108
114
  end
109
115
 
110
116
  describe '#history' do
111
- let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2013-20') }
117
+ let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2014-49') }
112
118
  let(:job) { Elasticrawl::ParseJob.new }
113
- let(:job_desc) { 'Crawl: CC-MAIN-2013-20 Segments: 3 Parsing: all files' }
119
+ let(:job_desc) { 'Crawl: CC-MAIN-2014-49 Segments: 3 Parsing: all files' }
114
120
  let(:job_flow_id) { 'j-3QHDKKBT6VAIS' }
115
121
 
116
122
  before do
117
123
  crawl.create_segments
118
124
  job.set_segments(crawl.crawl_segments)
119
125
 
120
- Elasticity::JobFlow.any_instance.stubs(:run).returns(job_flow_id)
126
+ allow_any_instance_of(Elasticity::JobFlow).to receive(:run).and_return(job_flow_id)
121
127
  job.run
122
128
  end
123
129