elasticrawl 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +10 -9
- data/lib/elasticrawl/config.rb +3 -3
- data/lib/elasticrawl/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cb0c0acad4311e80efcdd7f91af5fde8bc150fad
|
|
4
|
+
data.tar.gz: 225d92cc5c7b2bc34e31c954b89be891110c249f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 76853faac58ed851a212597b25c30613695096312a3b062be8afe530a5ff54305fe4d3e38c7b8c60d1305c6a5795ff43a7942de7b3259389b6248c4b8e1f2019
|
|
7
|
+
data.tar.gz: 3af76e29d3d3c5f99e6470cab5efea7a02477930b473b6e35dd7d1e2fa7c71f047ed08c54b7b4719106df0ede06d30e75e4a3ae82cec702714866d68f36fe280
|
data/README.md
CHANGED
|
@@ -5,10 +5,11 @@ Elasticrawl can be used with [crawl data](http://commoncrawl.org/the-data/get-st
|
|
|
5
5
|
|
|
6
6
|
| Crawl Name | Month | Web Pages
|
|
7
7
|
| -------------- |:--------:|:--------:|
|
|
8
|
-
| [CC-MAIN-2014-
|
|
9
|
-
| [CC-MAIN-2014-23](http://blog.commoncrawl.org/2014/08/july-2014-crawl-data-available/) | July 2014 | ~ 3.6 billion
|
|
10
|
-
| [CC-MAIN-2014-35](http://blog.commoncrawl.org/2014/09/august-2014-crawl-data-available/) | August 2014 | ~ 2.8 billion
|
|
8
|
+
| [CC-MAIN-2014-52](http://blog.commoncrawl.org/2015/01/december-2014-crawl-archive-available/) | December 2014 | ~ 2.08 billion
|
|
11
9
|
| [CC-MAIN-2014-49](http://blog.commoncrawl.org/2014/12/november-2014-crawl-archive-available/) | November 2014 | ~ 1.95 billion
|
|
10
|
+
| [CC-MAIN-2014-35](http://blog.commoncrawl.org/2014/09/august-2014-crawl-data-available/) | August 2014 | ~ 2.8 billion
|
|
11
|
+
| [CC-MAIN-2014-23](http://blog.commoncrawl.org/2014/08/july-2014-crawl-data-available/) | July 2014 | ~ 3.6 billion
|
|
12
|
+
| [CC-MAIN-2014-15](http://blog.commoncrawl.org/2014/07/april-2014-crawl-data-available/) | April 2014 | ~ 2.3 billion
|
|
12
13
|
|
|
13
14
|
Common Crawl announce new crawls on their [blog](http://blog.commoncrawl.org/).
|
|
14
15
|
|
|
@@ -23,15 +24,15 @@ This [blog post](https://rossfairbanks.com/2015/01/03/parsing-common-crawl-using
|
|
|
23
24
|
Deployment packages are available for Linux and OS X, unfortunately Windows isn't supported yet. Download the package, extract it and run the elasticrawl command from the package directory.
|
|
24
25
|
|
|
25
26
|
```bash
|
|
26
|
-
# OS X https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.
|
|
27
|
-
# Linux (64-bit) https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.
|
|
28
|
-
# Linux (32-bit) https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.
|
|
27
|
+
# OS X https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.2-osx.tar.gz
|
|
28
|
+
# Linux (64-bit) https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.2-linux-x86_64.tar.gz
|
|
29
|
+
# Linux (32-bit) https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.2-linux-x86.tar.gz
|
|
29
30
|
|
|
30
31
|
# e.g.
|
|
31
32
|
|
|
32
|
-
curl -O https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.
|
|
33
|
-
tar -xzf elasticrawl-1.1.
|
|
34
|
-
cd elasticrawl-1.1.
|
|
33
|
+
curl -O https://d2ujrnticqzebc.cloudfront.net/elasticrawl-1.1.2-osx.tar.gz
|
|
34
|
+
tar -xzf elasticrawl-1.1.2-osx.tar.gz
|
|
35
|
+
cd elasticrawl-1.1.2-osx/
|
|
35
36
|
./elasticrawl --help
|
|
36
37
|
```
|
|
37
38
|
|
data/lib/elasticrawl/config.rb
CHANGED
|
@@ -101,7 +101,7 @@ module Elasticrawl
|
|
|
101
101
|
rescue AWS::S3::Errors::SignatureDoesNotMatch => e
|
|
102
102
|
raise AWSCredentialsInvalidError, 'AWS access credentials are invalid'
|
|
103
103
|
rescue AWS::Errors::Base => s3e
|
|
104
|
-
raise S3AccessError.new(s3e.http_response),
|
|
104
|
+
raise S3AccessError.new(s3e.http_response), s3e.message
|
|
105
105
|
end
|
|
106
106
|
end
|
|
107
107
|
|
|
@@ -159,7 +159,7 @@ module Elasticrawl
|
|
|
159
159
|
s3.buckets.create(bucket_name)
|
|
160
160
|
|
|
161
161
|
rescue AWS::Errors::Base => s3e
|
|
162
|
-
raise S3AccessError.new(s3e.http_response),
|
|
162
|
+
raise S3AccessError.new(s3e.http_response), s3e.message
|
|
163
163
|
end
|
|
164
164
|
end
|
|
165
165
|
|
|
@@ -171,7 +171,7 @@ module Elasticrawl
|
|
|
171
171
|
bucket.delete!
|
|
172
172
|
|
|
173
173
|
rescue AWS::Errors::Base => s3e
|
|
174
|
-
raise S3AccessError.new(s3e.http_response),
|
|
174
|
+
raise S3AccessError.new(s3e.http_response), s3e.message
|
|
175
175
|
end
|
|
176
176
|
end
|
|
177
177
|
|
data/lib/elasticrawl/version.rb
CHANGED