elasticrawl 1.1.1 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +10 -9
- data/lib/elasticrawl/config.rb +3 -3
- data/lib/elasticrawl/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb0c0acad4311e80efcdd7f91af5fde8bc150fad
|
4
|
+
data.tar.gz: 225d92cc5c7b2bc34e31c954b89be891110c249f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 76853faac58ed851a212597b25c30613695096312a3b062be8afe530a5ff54305fe4d3e38c7b8c60d1305c6a5795ff43a7942de7b3259389b6248c4b8e1f2019
|
7
|
+
data.tar.gz: 3af76e29d3d3c5f99e6470cab5efea7a02477930b473b6e35dd7d1e2fa7c71f047ed08c54b7b4719106df0ede06d30e75e4a3ae82cec702714866d68f36fe280
|
data/README.md
CHANGED
@@ -5,10 +5,11 @@ Elasticrawl can be used with [crawl data](http://commoncrawl.org/the-data/get-st
|
|
5
5
|
|
6
6
|
| Crawl Name | Month | Web Pages
|
7
7
|
| -------------- |:--------:|:--------:|
|
8
|
-
| [CC-MAIN-2014-
|
9
|
-
| [CC-MAIN-2014-23](http://blog.commoncrawl.org/2014/08/july-2014-crawl-data-available/) | July 2014 | ~ 3.6 billion
|
10
|
-
| [CC-MAIN-2014-35](http://blog.commoncrawl.org/2014/09/august-2014-crawl-data-available/) | August 2014 | ~ 2.8 billion
|
8
|
+
| [CC-MAIN-2014-52](http://blog.commoncrawl.org/2015/01/december-2014-crawl-archive-available/) | December 2014 | ~ 2.08 billion
|
11
9
|
| [CC-MAIN-2014-49](http://blog.commoncrawl.org/2014/12/november-2014-crawl-archive-available/) | November 2014 | ~ 1.95 billion
|
10
|
+
| [CC-MAIN-2014-35](http://blog.commoncrawl.org/2014/09/august-2014-crawl-data-available/) | August 2014 | ~ 2.8 billion
|
11
|
+
| [CC-MAIN-2014-23](http://blog.commoncrawl.org/2014/08/july-2014-crawl-data-available/) | July 2014 | ~ 3.6 billion
|
12
|
+
| [CC-MAIN-2014-15](http://blog.commoncrawl.org/2014/07/april-2014-crawl-data-available/) | April 2014 | ~ 2.3 billion
|
12
13
|
|
13
14
|
Common Crawl announce new crawls on their [blog](http://blog.commoncrawl.org/).
|
14
15
|
|
@@ -23,15 +24,15 @@ This [blog post](https://rossfairbanks.com/2015/01/03/parsing-common-crawl-using
|
|
23
24
|
Deployment packages are available for Linux and OS X, unfortunately Windows isn't supported yet. Download the package, extract it and run the elasticrawl command from the package directory.
|
24
25
|
|
25
26
|
```bash
|
26
|
-
# OS X https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.
|
27
|
-
# Linux (64-bit) https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.
|
28
|
-
# Linux (32-bit) https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.
|
27
|
+
# OS X https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.2-osx.tar.gz
|
28
|
+
# Linux (64-bit) https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.2-linux-x86_64.tar.gz
|
29
|
+
# Linux (32-bit) https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.2-linux-x86.tar.gz
|
29
30
|
|
30
31
|
# e.g.
|
31
32
|
|
32
|
-
curl -O https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.
|
33
|
-
tar -xzf elasticrawl-1.1.
|
34
|
-
cd elasticrawl-1.1.
|
33
|
+
curl -O https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.2-osx.tar.gz
|
34
|
+
tar -xzf elasticrawl-1.1.2-osx.tar.gz
|
35
|
+
cd elasticrawl-1.1.2-osx/
|
35
36
|
./elasticrawl --help
|
36
37
|
```
|
37
38
|
|
data/lib/elasticrawl/config.rb
CHANGED
@@ -101,7 +101,7 @@ module Elasticrawl
|
|
101
101
|
rescue AWS::S3::Errors::SignatureDoesNotMatch => e
|
102
102
|
raise AWSCredentialsInvalidError, 'AWS access credentials are invalid'
|
103
103
|
rescue AWS::Errors::Base => s3e
|
104
|
-
raise S3AccessError.new(s3e.http_response),
|
104
|
+
raise S3AccessError.new(s3e.http_response), s3e.message
|
105
105
|
end
|
106
106
|
end
|
107
107
|
|
@@ -159,7 +159,7 @@ module Elasticrawl
|
|
159
159
|
s3.buckets.create(bucket_name)
|
160
160
|
|
161
161
|
rescue AWS::Errors::Base => s3e
|
162
|
-
raise S3AccessError.new(s3e.http_response),
|
162
|
+
raise S3AccessError.new(s3e.http_response), s3e.message
|
163
163
|
end
|
164
164
|
end
|
165
165
|
|
@@ -171,7 +171,7 @@ module Elasticrawl
|
|
171
171
|
bucket.delete!
|
172
172
|
|
173
173
|
rescue AWS::Errors::Base => s3e
|
174
|
-
raise S3AccessError.new(s3e.http_response),
|
174
|
+
raise S3AccessError.new(s3e.http_response), s3e.message
|
175
175
|
end
|
176
176
|
end
|
177
177
|
|
data/lib/elasticrawl/version.rb
CHANGED