elasticrawl 1.1.4 → 1.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2a1024132524ee242004a94b6d8d029f8d77a54e
4
- data.tar.gz: 11269b2244d5d7ecddb9a09b02341aae581afeb7
3
+ metadata.gz: a7a988f505c699d078fa4cc15981b77f7924e907
4
+ data.tar.gz: db7ee7ae05ccec51b0ffde16900656c859523001
5
5
  SHA512:
6
- metadata.gz: 1d14f20dd70d8e27a7c68b96c6fbe9e81505189a8dfc827997599ba2abb12095cab90abbca010ecafd81499b84fae1e4b9905218d15b9bf517c12e22241e6182
7
- data.tar.gz: 606e4ded5b041880d48049bb1de91182757a01bbc8bdb835df516d57e90b8155ac595b27177ea94cd7cad0730786045d1fc3fcf0b4fdd690e7d8f0e5d35ce4c4
6
+ metadata.gz: 12949f3230c3a0f7d08d4e02a9b296968616fa634072db882fe26809c58791d06ebf48d21fbc3c703ab1f858fdf12cfa9c324a6a97a7e896a5916679c7d6de06
7
+ data.tar.gz: ab9ec066ecb469707241751087ff6d25ff89e9013fdb0a8505bca904fcaa3fed2b331ef289fa0ffb71dd970261ec7c0028ca955d94642fad7093191723d8c4e3
@@ -1,5 +1,6 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.0.0
4
- - 2.1.5
5
- - 2.2.0
3
+ - 2.0.0-p648
4
+ - 2.1.8
5
+ - 2.2.4
6
+ - 2.3.0
data/Cheffile CHANGED
@@ -1,14 +1,11 @@
1
1
  #!/usr/bin/env ruby
2
2
  #^syntax detection
3
3
 
4
- site "http://community.opscode.com/api/v1"
4
+ site "https://supermarket.getchef.com/api/v1"
5
5
 
6
- cookbook "apt",
7
- :version => "1.7.0"
8
- cookbook "build-essential"
9
- cookbook "git"
10
- cookbook "rbenv",
11
- :git => "https://github.com/fnichol/chef-rbenv.git",
12
- :ref => "v0.7.2"
13
- cookbook "ruby_build"
14
- cookbook "vim"
6
+ cookbook "apt", "2.9.2"
7
+ cookbook "build-essential", "2.2.4"
8
+ cookbook "git", "4.3.5"
9
+ cookbook "ruby_rbenv", "1.0.1"
10
+ cookbook "ruby_build", "0.8.0"
11
+ cookbook "vim", "2.0.0"
@@ -1,37 +1,30 @@
1
1
  SITE
2
- remote: http://community.opscode.com/api/v1
2
+ remote: https://supermarket.getchef.com/api/v1
3
3
  specs:
4
- apt (2.2.1)
5
- build-essential (1.4.2)
6
- chef_handler (1.1.4)
7
- dmg (2.0.4)
8
- git (2.7.0)
4
+ apt (2.9.2)
5
+ build-essential (2.2.4)
6
+ chef_handler (1.2.0)
7
+ dmg (2.3.0)
8
+ git (4.3.5)
9
9
  build-essential (>= 0.0.0)
10
10
  dmg (>= 0.0.0)
11
- runit (>= 1.0.0)
12
11
  windows (>= 0.0.0)
13
- yum (>= 0.0.0)
12
+ yum-epel (>= 0.0.0)
14
13
  ruby_build (0.8.0)
15
- runit (1.3.0)
16
- build-essential (>= 0.0.0)
17
- yum (>= 0.0.0)
18
- vim (1.0.2)
19
- windows (1.11.0)
14
+ ruby_rbenv (1.0.1)
15
+ ruby_build (>= 0.0.0)
16
+ vim (2.0.0)
17
+ windows (1.39.0)
20
18
  chef_handler (>= 0.0.0)
21
- yum (2.3.4)
22
-
23
- GIT
24
- remote: https://github.com/fnichol/chef-rbenv.git
25
- ref: v0.7.2
26
- sha: f2b53292e810dd2b43f6121f9958f5f29979dcb1
27
- specs:
28
- rbenv (0.7.2)
19
+ yum (3.8.2)
20
+ yum-epel (0.6.5)
21
+ yum (~> 3.2)
29
22
 
30
23
  DEPENDENCIES
31
- apt (>= 0)
32
- build-essential (>= 0)
33
- git (>= 0)
34
- rbenv (>= 0)
35
- ruby_build (>= 0)
36
- vim (>= 0)
24
+ apt (= 2.9.2)
25
+ build-essential (= 2.2.4)
26
+ git (= 4.3.5)
27
+ ruby_build (= 0.8.0)
28
+ ruby_rbenv (= 1.0.1)
29
+ vim (= 2.0.0)
37
30
 
data/README.md CHANGED
@@ -1,20 +1,11 @@
1
1
  # Elasticrawl
2
2
 
3
- Command line tool for launching Hadoop jobs using AWS EMR (Elastic MapReduce) to process Common Crawl data.
4
- Elasticrawl can be used with [crawl data](http://commoncrawl.org/the-data/get-started/) from April 2014 onwards.
3
+ * Command line tool for launching Hadoop jobs using AWS EMR (Elastic MapReduce) to process Common Crawl data.
4
+ * Elasticrawl can be used with [crawl data](http://commoncrawl.org/the-data/get-started/) from April 2014 onwards.
5
+ * A list of crawls released by Common Crawl is maintained on the [wiki](https://github.com/rossf7/elasticrawl/wiki).
6
+ * Common Crawl announce new crawls on their [blog](http://blog.commoncrawl.org/).
5
7
 
6
- | Crawl Name | Month | Web Pages | Segments
7
- | -------------- |:---------:|:----------:|:-------:
8
- | [CC-MAIN-2015-06](http://blog.commoncrawl.org/2015/03/january-2015-crawl-archive-available/) | January 2015 | ~ 1.82 billion | 98
9
- | [CC-MAIN-2014-52](http://blog.commoncrawl.org/2015/01/december-2014-crawl-archive-available/) | December 2014 | ~ 2.08 billion | 314
10
- | [CC-MAIN-2014-49](http://blog.commoncrawl.org/2014/12/november-2014-crawl-archive-available/) | November 2014 | ~ 1.95 billion | 136
11
- | [CC-MAIN-2014-35](http://blog.commoncrawl.org/2014/09/august-2014-crawl-data-available/) | August 2014 | ~ 2.8 billion | 111
12
- | [CC-MAIN-2014-23](http://blog.commoncrawl.org/2014/08/july-2014-crawl-data-available/) | July 2014 | ~ 3.6 billion | 253
13
- | [CC-MAIN-2014-15](http://blog.commoncrawl.org/2014/07/april-2014-crawl-data-available/) | April 2014 | ~ 2.3 billion | 70
14
-
15
- Common Crawl announce new crawls on their [blog](http://blog.commoncrawl.org/).
16
-
17
- Ships with a default configuration that launches the
8
+ * Ships with a default configuration that launches the
18
9
  [elasticrawl-examples](https://github.com/rossf7/elasticrawl-examples) jobs.
19
10
  This is an implementation of the standard Hadoop Word Count example.
20
11
 
@@ -25,18 +16,27 @@ This [blog post](https://rossfairbanks.com/2015/01/03/parsing-common-crawl-using
25
16
  Deployment packages are available for Linux and OS X, unfortunately Windows isn't supported yet. Download the package, extract it and run the elasticrawl command from the package directory.
26
17
 
27
18
  ```bash
28
- # OS X https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.3-osx.tar.gz
29
- # Linux (64-bit) https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.3-linux-x86_64.tar.gz
30
- # Linux (32-bit) https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.3-linux-x86.tar.gz
19
+ # OS X https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.5-osx.tar.gz
20
+ # Linux (64-bit) https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.5-linux-x86_64.tar.gz
21
+ # Linux (32-bit) https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.5-linux-x86.tar.gz
31
22
 
32
23
  # e.g.
33
24
 
34
- curl -O https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.3-osx.tar.gz
35
- tar -xzf elasticrawl-1.1.3-osx.tar.gz
36
- cd elasticrawl-1.1.3-osx/
25
+ curl -O https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.5-osx.tar.gz
26
+ tar -xzf elasticrawl-1.1.5-osx.tar.gz
27
+ cd elasticrawl-1.1.5-osx/
37
28
  ./elasticrawl --help
38
29
  ```
39
30
 
31
+ ### Troubleshooting
32
+
33
+ If you get the error "EMR service role arn:aws:iam::156793023547:role/EMR_DefaultRole is invalid" when launching a cluster then you don't have the necessary IAM roles.
34
+ To fix this install the [AWS CLI](https://aws.amazon.com/cli/) and run the command below.
35
+
36
+ ```
37
+ aws emr create-default-roles
38
+ ```
39
+
40
40
  ## Commands
41
41
 
42
42
  ### elasticrawl init
@@ -62,13 +62,13 @@ Config complete
62
62
  The parse command takes in the crawl name and an optional number of segments and files to parse.
63
63
 
64
64
  ```bash
65
- ~$ ./elasticrawl parse CC-MAIN-2014-49 --max-segments 2 --max-files 3
65
+ ~$ ./elasticrawl parse CC-MAIN-2015-48 --max-segments 2 --max-files 3
66
66
  Segments
67
67
  Segment: 1416400372202.67 Files: 150
68
68
  Segment: 1416400372490.23 Files: 124
69
69
 
70
70
  Job configuration
71
- Crawl: CC-MAIN-2014-49 Segments: 2 Parsing: 3 files per segment
71
+ Crawl: CC-MAIN-2015-48 Segments: 2 Parsing: 3 files per segment
72
72
 
73
73
  Cluster configuration
74
74
  Master: 1 m1.medium (Spot: 0.12)
@@ -106,10 +106,10 @@ The status command shows crawls and your job history.
106
106
  ```bash
107
107
  ~$ ./elasticrawl status
108
108
  Crawl Status
109
- CC-MAIN-2014-49 Segments: to parse 134, parsed 2, total 136
109
+ CC-MAIN-2015-48 Segments: to parse 98, parsed 2, total 100
110
110
 
111
111
  Job History (last 10)
112
- 1420124830792 Launched: 2015-01-01 15:07:10 Crawl: CC-MAIN-2014-49 Segments: 2 Parsing: 3 files per segment
112
+ 1420124830792 Launched: 2015-01-01 15:07:10 Crawl: CC-MAIN-2015-48 Segments: 2 Parsing: 3 files per segment
113
113
  ```
114
114
 
115
115
  ### elasticrawl reset
@@ -117,10 +117,10 @@ Job History (last 10)
117
117
  The reset comment resets a crawl so it is parsed again.
118
118
 
119
119
  ```bash
120
- ~$ ./elasticrawl reset CC-MAIN-2014-49
120
+ ~$ ./elasticrawl reset CC-MAIN-2015-48
121
121
  Reset crawl? (y/n)
122
122
  y
123
- CC-MAIN-2014-49 Segments: to parse 136, parsed 0, total 136
123
+ CC-MAIN-2015-48 Segments: to parse 100, parsed 0, total 100
124
124
  ```
125
125
 
126
126
  ### elasticrawl destroy
@@ -158,13 +158,13 @@ stores your S3 bucket name and the config for the parse and combine jobs
158
158
 
159
159
  ## Development
160
160
 
161
- Elasticrawl is developed in Ruby and requires Ruby 2.0.0 or later (Ruby 2.1 is recommended). The sqlite3 and nokogiri gems have C extensions which mean you may need to install development headers.
161
+ Elasticrawl is developed in Ruby and requires Ruby 2.0.0 or later (Ruby 2.2 is recommended). The sqlite3 and nokogiri gems have C extensions which mean you may need to install development headers.
162
162
 
163
163
  [![Gem Version](https://badge.fury.io/rb/elasticrawl.png)](http://badge.fury.io/rb/elasticrawl)
164
164
  [![Code Climate](https://codeclimate.com/github/rossf7/elasticrawl.png)](https://codeclimate.com/github/rossf7/elasticrawl)
165
- [![Build Status](https://travis-ci.org/rossf7/elasticrawl.png?branch=master)](https://travis-ci.org/rossf7/elasticrawl) 2.0.0, 2.1.5, 2.2.0
165
+ [![Build Status](https://travis-ci.org/rossf7/elasticrawl.png?branch=master)](https://travis-ci.org/rossf7/elasticrawl) 2.0.0, 2.1.8, 2.2.4, 2.3.0
166
166
 
167
- The deployment packages are created using [Traveling Ruby](http://phusion.github.io/traveling-ruby/). The deploy packages contain a Ruby 2.1 interpreter, Gems and the compiled C extensions. The [traveling-elasticrawl](https://github.com/rossf7/traveling-elasticrawl) repository has a Rake task that automates building the deployment packages.
167
+ The deployment packages are created using [Traveling Ruby](http://phusion.github.io/traveling-ruby/). The deploy packages contain a Ruby 2.2 interpreter, Gems and the compiled C extensions. The [traveling-elasticrawl](https://github.com/rossf7/traveling-elasticrawl) repository has a Rake task that automates building the deployment packages.
168
168
 
169
169
  ## TODO
170
170
 
@@ -6,17 +6,17 @@ Vagrant.configure("2") do |config|
6
6
  # options are documented and commented below. For a complete reference,
7
7
  # please see the online documentation at vagrantup.com.
8
8
 
9
- # Fix DNS issues with Ubuntu 12.04 by always using host's resolver
9
+ # Increase RAM to 1 GB
10
10
  config.vm.provider "virtualbox" do |vbox|
11
- vbox.customize ["modifyvm", :id, "--natdnshostresolver1", "on"]
11
+ vbox.customize ["modifyvm", :id, "--memory", 1024]
12
12
  end
13
13
 
14
14
  # Elasticrawl launches Hadoop jobs for the CommonCrawl dataset using the AWS EMR service.
15
15
  config.vm.define :elasticrawl do |elasticrawl|
16
16
  elasticrawl.vm.box = "elasticrawl"
17
17
 
18
- # Ubuntu Server 12.04 LTS
19
- elasticrawl.vm.box_url = "http://files.vagrantup.com/precise64.box"
18
+ # Ubuntu Server 14.04 LTS
19
+ elasticrawl.vm.box = "ubuntu/trusty64"
20
20
 
21
21
  # Network config
22
22
  elasticrawl.vm.network :public_network
@@ -30,7 +30,7 @@ Vagrant.configure("2") do |config|
30
30
  chef.add_recipe "apt"
31
31
  chef.add_recipe "build-essential"
32
32
  chef.add_recipe "ruby_build"
33
- chef.add_recipe "rbenv::user"
33
+ chef.add_recipe "ruby_rbenv::user"
34
34
  chef.add_recipe "git"
35
35
  chef.add_recipe "vim"
36
36
 
@@ -39,17 +39,24 @@ Vagrant.configure("2") do |config|
39
39
  "user_installs" => [
40
40
  {
41
41
  "user" => "vagrant",
42
- "rubies" => ["2.0.0-p643", "2.1.5", "2.2.0"],
43
- "global" => "2.1.5",
42
+ "rubies" => ["2.0.0-p648", "2.1.8", "2.2.4", "2.3.0"],
43
+ "global" => "2.2.4",
44
44
  "gems" => {
45
- "2.0.0-p643" => [
46
- { "name" => "bundler" }
45
+ "2.0.0-p648" => [
46
+ { "name" => "bundler",
47
+ "version" => "1.11.2" }
47
48
  ],
48
- "2.1.5" => [
49
- { "name" => "bundler" }
49
+ "2.1.8" => [
50
+ { "name" => "bundler",
51
+ "version" => "1.11.2" }
50
52
  ],
51
- "2.2.0" => [
52
- { "name" => "bundler" }
53
+ "2.2.4" => [
54
+ { "name" => "bundler",
55
+ "version" => "1.11.2" }
56
+ ],
57
+ "2.3.0" => [
58
+ { "name" => "bundler",
59
+ "version" => "1.11.2" }
53
60
  ]
54
61
  }
55
62
  }
@@ -18,17 +18,17 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ['lib']
20
20
 
21
- spec.add_dependency 'activerecord', '~> 4.2'
22
- spec.add_dependency 'activesupport', '~> 4.2'
23
- spec.add_dependency 'aws-sdk', '~> 1.60'
24
- spec.add_dependency 'elasticity', '~> 4.0'
25
- spec.add_dependency 'highline', '~> 1.6'
26
- spec.add_dependency 'sqlite3', '~> 1.3'
27
- spec.add_dependency 'thor', '~> 0.19'
21
+ spec.add_dependency 'activerecord', '~> 4.2.5'
22
+ spec.add_dependency 'activesupport', '~> 4.2.5'
23
+ spec.add_dependency 'aws-sdk', '~> 1.66.0'
24
+ spec.add_dependency 'elasticity', '~> 6.0.5'
25
+ spec.add_dependency 'highline', '~> 1.7.8'
26
+ spec.add_dependency 'sqlite3', '~> 1.3.11'
27
+ spec.add_dependency 'thor', '~> 0.19.1'
28
28
 
29
- spec.add_development_dependency 'rake'
30
- spec.add_development_dependency 'bundler', '~> 1.3'
31
- spec.add_development_dependency 'rspec', '~> 3.1'
32
- spec.add_development_dependency 'database_cleaner', '~> 1.3.0'
33
- spec.add_development_dependency 'shoulda-matchers', '~> 2.7.0'
29
+ spec.add_development_dependency 'rake', '~> 10.4.2'
30
+ spec.add_development_dependency 'bundler', '~> 1.11.2'
31
+ spec.add_development_dependency 'rspec', '~> 3.4.0'
32
+ spec.add_development_dependency 'database_cleaner', '~> 1.5.1'
33
+ spec.add_development_dependency 'shoulda-matchers', '~> 3.0.1'
34
34
  end
@@ -11,8 +11,13 @@ module Elasticrawl
11
11
  # Returns a configured job flow to the calling job.
12
12
  def create_job_flow(job, emr_config = nil)
13
13
  config = Config.new
14
- job_flow = Elasticity::JobFlow.new(config.access_key_id,
15
- config.secret_access_key)
14
+
15
+ Elasticity.configure do |c|
16
+ c.access_key = config.access_key_id
17
+ c.secret_key = config.secret_access_key
18
+ end
19
+
20
+ job_flow = Elasticity::JobFlow.new
16
21
  job_flow.name = "Job: #{job.job_name} #{job.job_desc}"
17
22
  job_flow.log_uri = job.log_uri
18
23
 
@@ -1,3 +1,3 @@
1
1
  module Elasticrawl
2
- VERSION = '1.1.4'
2
+ VERSION = '1.1.5'
3
3
  end
@@ -7,6 +7,9 @@ RSpec.configure do |config|
7
7
  # Run each test in a transaction and rollback data on completion.
8
8
  DatabaseCleaner.strategy = :transaction
9
9
 
10
+ # Use Shoulda matchers for schema tests.
11
+ config.include(Shoulda::Matchers::ActiveRecord, type: :model)
12
+
10
13
  config.before(:each) do
11
14
  # Stub S3 call to get WARC file paths
12
15
  warc_paths = IO.read(File.join(File.dirname(__FILE__), 'fixtures', 'warc.paths'))
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Elasticrawl::CrawlSegment do
3
+ describe Elasticrawl::CrawlSegment, type: :model do
4
4
  it { should belong_to(:crawl) }
5
5
  it { should have_many(:job_steps) }
6
6
  it { should have_db_column(:segment_name).of_type(:string) }
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Elasticrawl::Crawl do
3
+ describe Elasticrawl::Crawl, type: :model do
4
4
  it { should have_many(:crawl_segments) }
5
5
  it { should have_db_column(:crawl_name).of_type(:string) }
6
6
 
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Elasticrawl::Job do
3
+ describe Elasticrawl::Job, type: :model do
4
4
  it { should have_many(:job_steps) }
5
5
  it { should have_db_column(:type).of_type(:string) }
6
6
  it { should have_db_column(:job_name).of_type(:string) }
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Elasticrawl::JobStep do
3
+ describe Elasticrawl::JobStep, type: :model do
4
4
  it { should belong_to(:job) }
5
5
  it { should belong_to(:crawl_segment) }
6
6
  it { should have_db_column(:input_paths).of_type(:text) }
metadata CHANGED
@@ -1,183 +1,183 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.4
4
+ version: 1.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ross Fairbanks
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-15 00:00:00.000000000 Z
11
+ date: 2016-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '4.2'
19
+ version: 4.2.5
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '4.2'
26
+ version: 4.2.5
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: activesupport
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ~>
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '4.2'
33
+ version: 4.2.5
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ~>
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '4.2'
40
+ version: 4.2.5
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: aws-sdk
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ~>
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '1.60'
47
+ version: 1.66.0
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ~>
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '1.60'
54
+ version: 1.66.0
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: elasticity
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ~>
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '4.0'
61
+ version: 6.0.5
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ~>
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '4.0'
68
+ version: 6.0.5
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: highline
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ~>
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '1.6'
75
+ version: 1.7.8
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ~>
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '1.6'
82
+ version: 1.7.8
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: sqlite3
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ~>
87
+ - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '1.3'
89
+ version: 1.3.11
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ~>
94
+ - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '1.3'
96
+ version: 1.3.11
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: thor
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - ~>
101
+ - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '0.19'
103
+ version: 0.19.1
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - ~>
108
+ - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '0.19'
110
+ version: 0.19.1
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: rake
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
- - - '>='
115
+ - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: '0'
117
+ version: 10.4.2
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - '>='
122
+ - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: '0'
124
+ version: 10.4.2
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: bundler
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
- - - ~>
129
+ - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: '1.3'
131
+ version: 1.11.2
132
132
  type: :development
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
- - - ~>
136
+ - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: '1.3'
138
+ version: 1.11.2
139
139
  - !ruby/object:Gem::Dependency
140
140
  name: rspec
141
141
  requirement: !ruby/object:Gem::Requirement
142
142
  requirements:
143
- - - ~>
143
+ - - "~>"
144
144
  - !ruby/object:Gem::Version
145
- version: '3.1'
145
+ version: 3.4.0
146
146
  type: :development
147
147
  prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
- - - ~>
150
+ - - "~>"
151
151
  - !ruby/object:Gem::Version
152
- version: '3.1'
152
+ version: 3.4.0
153
153
  - !ruby/object:Gem::Dependency
154
154
  name: database_cleaner
155
155
  requirement: !ruby/object:Gem::Requirement
156
156
  requirements:
157
- - - ~>
157
+ - - "~>"
158
158
  - !ruby/object:Gem::Version
159
- version: 1.3.0
159
+ version: 1.5.1
160
160
  type: :development
161
161
  prerelease: false
162
162
  version_requirements: !ruby/object:Gem::Requirement
163
163
  requirements:
164
- - - ~>
164
+ - - "~>"
165
165
  - !ruby/object:Gem::Version
166
- version: 1.3.0
166
+ version: 1.5.1
167
167
  - !ruby/object:Gem::Dependency
168
168
  name: shoulda-matchers
169
169
  requirement: !ruby/object:Gem::Requirement
170
170
  requirements:
171
- - - ~>
171
+ - - "~>"
172
172
  - !ruby/object:Gem::Version
173
- version: 2.7.0
173
+ version: 3.0.1
174
174
  type: :development
175
175
  prerelease: false
176
176
  version_requirements: !ruby/object:Gem::Requirement
177
177
  requirements:
178
- - - ~>
178
+ - - "~>"
179
179
  - !ruby/object:Gem::Version
180
- version: 2.7.0
180
+ version: 3.0.1
181
181
  description: Elasticrawl is a tool for launching AWS Elastic MapReduce jobs that process
182
182
  Common Crawl data.
183
183
  email:
@@ -187,8 +187,8 @@ executables:
187
187
  extensions: []
188
188
  extra_rdoc_files: []
189
189
  files:
190
- - .gitignore
191
- - .travis.yml
190
+ - ".gitignore"
191
+ - ".travis.yml"
192
192
  - CHANGELOG.md
193
193
  - Cheffile
194
194
  - Cheffile.lock
@@ -241,17 +241,17 @@ require_paths:
241
241
  - lib
242
242
  required_ruby_version: !ruby/object:Gem::Requirement
243
243
  requirements:
244
- - - '>='
244
+ - - ">="
245
245
  - !ruby/object:Gem::Version
246
246
  version: '0'
247
247
  required_rubygems_version: !ruby/object:Gem::Requirement
248
248
  requirements:
249
- - - '>='
249
+ - - ">="
250
250
  - !ruby/object:Gem::Version
251
251
  version: '0'
252
252
  requirements: []
253
253
  rubyforge_project:
254
- rubygems_version: 2.0.14
254
+ rubygems_version: 2.4.5.1
255
255
  signing_key:
256
256
  specification_version: 4
257
257
  summary: Launch AWS Elastic MapReduce jobs that process Common Crawl data.