polipus 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.rubocop_todo.yml +0 -4
- data/CHANGELOG.md +6 -0
- data/Rakefile +1 -0
- data/examples/basic.rb +1 -0
- data/examples/error_handling.rb +1 -0
- data/examples/incremental.rb +1 -0
- data/examples/robots_txt_handling.rb +1 -0
- data/examples/survival.rb +1 -0
- data/lib/polipus/http.rb +1 -0
- data/lib/polipus/page.rb +1 -0
- data/lib/polipus/plugin.rb +1 -0
- data/lib/polipus/plugins/cleaner.rb +2 -1
- data/lib/polipus/plugins/sample.rb +1 -0
- data/lib/polipus/plugins/sleeper.rb +1 -0
- data/lib/polipus/queue_overflow.rb +1 -0
- data/lib/polipus/queue_overflow/base.rb +1 -0
- data/lib/polipus/queue_overflow/dev_null_queue.rb +1 -0
- data/lib/polipus/queue_overflow/manager.rb +1 -0
- data/lib/polipus/queue_overflow/mongo_queue.rb +1 -0
- data/lib/polipus/queue_overflow/mongo_queue_capped.rb +1 -0
- data/lib/polipus/robotex.rb +1 -0
- data/lib/polipus/signal_handler.rb +1 -0
- data/lib/polipus/storage.rb +1 -0
- data/lib/polipus/storage/base.rb +1 -0
- data/lib/polipus/storage/dev_null.rb +1 -0
- data/lib/polipus/storage/memory_store.rb +1 -0
- data/lib/polipus/storage/mongo_store.rb +2 -1
- data/lib/polipus/storage/s3_store.rb +1 -0
- data/lib/polipus/url_tracker.rb +1 -0
- data/lib/polipus/url_tracker/bloomfilter.rb +1 -0
- data/lib/polipus/url_tracker/redis_set.rb +1 -0
- data/lib/polipus/version.rb +2 -1
- data/spec/clear.rb +1 -0
- data/spec/http_spec.rb +1 -0
- data/spec/page_spec.rb +1 -0
- data/spec/polipus_spec.rb +1 -0
- data/spec/queue_overflow_manager_spec.rb +1 -0
- data/spec/queue_overflow_spec.rb +1 -0
- data/spec/robotex_spec.rb +1 -0
- data/spec/signal_handler_spec.rb +1 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/storage_memory_spec.rb +1 -0
- data/spec/storage_mongo_spec.rb +1 -0
- data/spec/storage_s3_spec.rb +1 -0
- data/spec/url_tracker_spec.rb +1 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MGZhM2Q1OWE5ZTkxZWE3Y2JhMDJkNzZkMWRjMWZjYjI4YmRhODJmYw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ODg4NTk5ZjhjNDM0ZjBhN2M2OWJlOWQyOTE4OWFiZmY1ZmQxMTZiYQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NTdmYWZhMmMzYTQxOWY5OTBlNGE1ZWQzNWYwMThjMjAwNzQ4NmU4MDY4OWRi
|
10
|
+
OWVkNzEwNmYwNTY3OGI4NmFkZGJiNzNhN2I3M2ZjMDUxNjAzMDAyZjEyYmZj
|
11
|
+
ZGFkODliNDc5ZTYzYWE2ODNiMzdkMWFjZGExYjgzNTQyNmM4MDc=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NGYzNDc2ODQ3OTBiMzE5N2M5YmM3ZDliN2IyYWRmNjBiZDI3ZTYyYjkxNjBk
|
14
|
+
MDFhZjA0NDE5YjFkYzdiZTQxMjg0ZGEwODA3Nzk0YWZiNDRmY2I1MmE2ZWI4
|
15
|
+
MjQxNTU4NjgwOTM1ZDdlYjM4Mjg0NjhjY2M2OGU5MDA1YjNjMjY=
|
data/.rubocop_todo.yml
CHANGED
@@ -17,10 +17,6 @@ Style/CyclomaticComplexity:
|
|
17
17
|
Style/Documentation:
|
18
18
|
Enabled: false
|
19
19
|
|
20
|
-
# Offense count: 38
|
21
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
22
|
-
Style/Encoding:
|
23
|
-
Enabled: false
|
24
20
|
|
25
21
|
# Offense count: 2
|
26
22
|
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 0.3.3 (2015-06-26)
|
4
|
+
|
5
|
+
[Compare changes in gem](https://github.com/taganaka/polipus/compare/0.3.2...0.3.3)
|
6
|
+
|
7
|
+
* BugFix: Better compatibility for mongo 2.6.x on index creation
|
8
|
+
|
3
9
|
## 0.3.2 (2015-06-17)
|
4
10
|
|
5
11
|
[Compare changes in gem](https://github.com/taganaka/polipus/compare/0.3.1...0.3.2)
|
data/Rakefile
CHANGED
data/examples/basic.rb
CHANGED
data/examples/error_handling.rb
CHANGED
data/examples/incremental.rb
CHANGED
data/examples/survival.rb
CHANGED
data/lib/polipus/http.rb
CHANGED
data/lib/polipus/page.rb
CHANGED
data/lib/polipus/plugin.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# encoding: UTF-8
|
1
2
|
module Polipus
|
2
3
|
module Plugin
|
3
4
|
class Cleaner
|
@@ -8,7 +9,7 @@ module Polipus
|
|
8
9
|
def on_initialize(crawler)
|
9
10
|
crawler.logger.info { 'Cleaner plugin loaded' }
|
10
11
|
unless @reset
|
11
|
-
crawler.logger.info { 'Cleaner plugin is
|
12
|
+
crawler.logger.info { 'Cleaner plugin is disabled, add :reset => true to the plugin if you really know what you are doing' }
|
12
13
|
return nil
|
13
14
|
end
|
14
15
|
crawler.logger.info { 'Cleaning all: url_tracker, storage, queue' }
|
data/lib/polipus/robotex.rb
CHANGED
data/lib/polipus/storage.rb
CHANGED
data/lib/polipus/storage/base.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# encoding: UTF-8
|
1
2
|
require 'mongo'
|
2
3
|
require 'zlib'
|
3
4
|
require 'thread'
|
@@ -9,7 +10,7 @@ module Polipus
|
|
9
10
|
@mongo = options[:mongo]
|
10
11
|
@collection = options[:collection]
|
11
12
|
@mongo.create_collection(@collection)
|
12
|
-
@mongo[@collection].ensure_index(:uuid, unique: true,
|
13
|
+
@mongo[@collection].ensure_index(:uuid, unique: true, dropDups: true, background: true)
|
13
14
|
@compress_body = options[:compress_body] ||= true
|
14
15
|
@except = options[:except] ||= []
|
15
16
|
@semaphore = Mutex.new
|
data/lib/polipus/url_tracker.rb
CHANGED
data/lib/polipus/version.rb
CHANGED
data/spec/clear.rb
CHANGED
data/spec/http_spec.rb
CHANGED
data/spec/page_spec.rb
CHANGED
data/spec/polipus_spec.rb
CHANGED
data/spec/queue_overflow_spec.rb
CHANGED
data/spec/robotex_spec.rb
CHANGED
data/spec/signal_handler_spec.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
data/spec/storage_memory_spec.rb
CHANGED
data/spec/storage_mongo_spec.rb
CHANGED
data/spec/storage_s3_spec.rb
CHANGED
data/spec/url_tracker_spec.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polipus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francesco Laurita
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis-bloomfilter
|