polipus 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/.rubocop_todo.yml +0 -4
- data/CHANGELOG.md +6 -0
- data/Rakefile +1 -0
- data/examples/basic.rb +1 -0
- data/examples/error_handling.rb +1 -0
- data/examples/incremental.rb +1 -0
- data/examples/robots_txt_handling.rb +1 -0
- data/examples/survival.rb +1 -0
- data/lib/polipus/http.rb +1 -0
- data/lib/polipus/page.rb +1 -0
- data/lib/polipus/plugin.rb +1 -0
- data/lib/polipus/plugins/cleaner.rb +2 -1
- data/lib/polipus/plugins/sample.rb +1 -0
- data/lib/polipus/plugins/sleeper.rb +1 -0
- data/lib/polipus/queue_overflow.rb +1 -0
- data/lib/polipus/queue_overflow/base.rb +1 -0
- data/lib/polipus/queue_overflow/dev_null_queue.rb +1 -0
- data/lib/polipus/queue_overflow/manager.rb +1 -0
- data/lib/polipus/queue_overflow/mongo_queue.rb +1 -0
- data/lib/polipus/queue_overflow/mongo_queue_capped.rb +1 -0
- data/lib/polipus/robotex.rb +1 -0
- data/lib/polipus/signal_handler.rb +1 -0
- data/lib/polipus/storage.rb +1 -0
- data/lib/polipus/storage/base.rb +1 -0
- data/lib/polipus/storage/dev_null.rb +1 -0
- data/lib/polipus/storage/memory_store.rb +1 -0
- data/lib/polipus/storage/mongo_store.rb +2 -1
- data/lib/polipus/storage/s3_store.rb +1 -0
- data/lib/polipus/url_tracker.rb +1 -0
- data/lib/polipus/url_tracker/bloomfilter.rb +1 -0
- data/lib/polipus/url_tracker/redis_set.rb +1 -0
- data/lib/polipus/version.rb +2 -1
- data/spec/clear.rb +1 -0
- data/spec/http_spec.rb +1 -0
- data/spec/page_spec.rb +1 -0
- data/spec/polipus_spec.rb +1 -0
- data/spec/queue_overflow_manager_spec.rb +1 -0
- data/spec/queue_overflow_spec.rb +1 -0
- data/spec/robotex_spec.rb +1 -0
- data/spec/signal_handler_spec.rb +1 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/storage_memory_spec.rb +1 -0
- data/spec/storage_mongo_spec.rb +1 -0
- data/spec/storage_s3_spec.rb +1 -0
- data/spec/url_tracker_spec.rb +1 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MGZhM2Q1OWE5ZTkxZWE3Y2JhMDJkNzZkMWRjMWZjYjI4YmRhODJmYw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ODg4NTk5ZjhjNDM0ZjBhN2M2OWJlOWQyOTE4OWFiZmY1ZmQxMTZiYQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NTdmYWZhMmMzYTQxOWY5OTBlNGE1ZWQzNWYwMThjMjAwNzQ4NmU4MDY4OWRi
|
10
|
+
OWVkNzEwNmYwNTY3OGI4NmFkZGJiNzNhN2I3M2ZjMDUxNjAzMDAyZjEyYmZj
|
11
|
+
ZGFkODliNDc5ZTYzYWE2ODNiMzdkMWFjZGExYjgzNTQyNmM4MDc=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NGYzNDc2ODQ3OTBiMzE5N2M5YmM3ZDliN2IyYWRmNjBiZDI3ZTYyYjkxNjBk
|
14
|
+
MDFhZjA0NDE5YjFkYzdiZTQxMjg0ZGEwODA3Nzk0YWZiNDRmY2I1MmE2ZWI4
|
15
|
+
MjQxNTU4NjgwOTM1ZDdlYjM4Mjg0NjhjY2M2OGU5MDA1YjNjMjY=
|
data/.rubocop_todo.yml
CHANGED
@@ -17,10 +17,6 @@ Style/CyclomaticComplexity:
|
|
17
17
|
Style/Documentation:
|
18
18
|
Enabled: false
|
19
19
|
|
20
|
-
# Offense count: 38
|
21
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
22
|
-
Style/Encoding:
|
23
|
-
Enabled: false
|
24
20
|
|
25
21
|
# Offense count: 2
|
26
22
|
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 0.3.3 (2015-06-26)
|
4
|
+
|
5
|
+
[Compare changes in gem](https://github.com/taganaka/polipus/compare/0.3.2...0.3.3)
|
6
|
+
|
7
|
+
* BugFix: Better compatibility for mongo 2.6.x on index creation
|
8
|
+
|
3
9
|
## 0.3.2 (2015-06-17)
|
4
10
|
|
5
11
|
[Compare changes in gem](https://github.com/taganaka/polipus/compare/0.3.1...0.3.2)
|
data/Rakefile
CHANGED
data/examples/basic.rb
CHANGED
data/examples/error_handling.rb
CHANGED
data/examples/incremental.rb
CHANGED
data/examples/survival.rb
CHANGED
data/lib/polipus/http.rb
CHANGED
data/lib/polipus/page.rb
CHANGED
data/lib/polipus/plugin.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# encoding: UTF-8
|
1
2
|
module Polipus
|
2
3
|
module Plugin
|
3
4
|
class Cleaner
|
@@ -8,7 +9,7 @@ module Polipus
|
|
8
9
|
def on_initialize(crawler)
|
9
10
|
crawler.logger.info { 'Cleaner plugin loaded' }
|
10
11
|
unless @reset
|
11
|
-
crawler.logger.info { 'Cleaner plugin is
|
12
|
+
crawler.logger.info { 'Cleaner plugin is disabled, add :reset => true to the plugin if you really know what you are doing' }
|
12
13
|
return nil
|
13
14
|
end
|
14
15
|
crawler.logger.info { 'Cleaning all: url_tracker, storage, queue' }
|
data/lib/polipus/robotex.rb
CHANGED
data/lib/polipus/storage.rb
CHANGED
data/lib/polipus/storage/base.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# encoding: UTF-8
|
1
2
|
require 'mongo'
|
2
3
|
require 'zlib'
|
3
4
|
require 'thread'
|
@@ -9,7 +10,7 @@ module Polipus
|
|
9
10
|
@mongo = options[:mongo]
|
10
11
|
@collection = options[:collection]
|
11
12
|
@mongo.create_collection(@collection)
|
12
|
-
@mongo[@collection].ensure_index(:uuid, unique: true,
|
13
|
+
@mongo[@collection].ensure_index(:uuid, unique: true, dropDups: true, background: true)
|
13
14
|
@compress_body = options[:compress_body] ||= true
|
14
15
|
@except = options[:except] ||= []
|
15
16
|
@semaphore = Mutex.new
|
data/lib/polipus/url_tracker.rb
CHANGED
data/lib/polipus/version.rb
CHANGED
data/spec/clear.rb
CHANGED
data/spec/http_spec.rb
CHANGED
data/spec/page_spec.rb
CHANGED
data/spec/polipus_spec.rb
CHANGED
data/spec/queue_overflow_spec.rb
CHANGED
data/spec/robotex_spec.rb
CHANGED
data/spec/signal_handler_spec.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
data/spec/storage_memory_spec.rb
CHANGED
data/spec/storage_mongo_spec.rb
CHANGED
data/spec/storage_s3_spec.rb
CHANGED
data/spec/url_tracker_spec.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polipus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francesco Laurita
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis-bloomfilter
|