polipus 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +8 -8
  2. data/.rubocop_todo.yml +0 -4
  3. data/CHANGELOG.md +6 -0
  4. data/Rakefile +1 -0
  5. data/examples/basic.rb +1 -0
  6. data/examples/error_handling.rb +1 -0
  7. data/examples/incremental.rb +1 -0
  8. data/examples/robots_txt_handling.rb +1 -0
  9. data/examples/survival.rb +1 -0
  10. data/lib/polipus/http.rb +1 -0
  11. data/lib/polipus/page.rb +1 -0
  12. data/lib/polipus/plugin.rb +1 -0
  13. data/lib/polipus/plugins/cleaner.rb +2 -1
  14. data/lib/polipus/plugins/sample.rb +1 -0
  15. data/lib/polipus/plugins/sleeper.rb +1 -0
  16. data/lib/polipus/queue_overflow.rb +1 -0
  17. data/lib/polipus/queue_overflow/base.rb +1 -0
  18. data/lib/polipus/queue_overflow/dev_null_queue.rb +1 -0
  19. data/lib/polipus/queue_overflow/manager.rb +1 -0
  20. data/lib/polipus/queue_overflow/mongo_queue.rb +1 -0
  21. data/lib/polipus/queue_overflow/mongo_queue_capped.rb +1 -0
  22. data/lib/polipus/robotex.rb +1 -0
  23. data/lib/polipus/signal_handler.rb +1 -0
  24. data/lib/polipus/storage.rb +1 -0
  25. data/lib/polipus/storage/base.rb +1 -0
  26. data/lib/polipus/storage/dev_null.rb +1 -0
  27. data/lib/polipus/storage/memory_store.rb +1 -0
  28. data/lib/polipus/storage/mongo_store.rb +2 -1
  29. data/lib/polipus/storage/s3_store.rb +1 -0
  30. data/lib/polipus/url_tracker.rb +1 -0
  31. data/lib/polipus/url_tracker/bloomfilter.rb +1 -0
  32. data/lib/polipus/url_tracker/redis_set.rb +1 -0
  33. data/lib/polipus/version.rb +2 -1
  34. data/spec/clear.rb +1 -0
  35. data/spec/http_spec.rb +1 -0
  36. data/spec/page_spec.rb +1 -0
  37. data/spec/polipus_spec.rb +1 -0
  38. data/spec/queue_overflow_manager_spec.rb +1 -0
  39. data/spec/queue_overflow_spec.rb +1 -0
  40. data/spec/robotex_spec.rb +1 -0
  41. data/spec/signal_handler_spec.rb +1 -0
  42. data/spec/spec_helper.rb +1 -0
  43. data/spec/storage_memory_spec.rb +1 -0
  44. data/spec/storage_mongo_spec.rb +1 -0
  45. data/spec/storage_s3_spec.rb +1 -0
  46. data/spec/url_tracker_spec.rb +1 -0
  47. metadata +2 -2
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- NmQ5ZTBhZThlMDNlZGEzNzQ4ZDU0MGQ3NzBkODY0YjZlOGZmZTFmOA==
4
+ MGZhM2Q1OWE5ZTkxZWE3Y2JhMDJkNzZkMWRjMWZjYjI4YmRhODJmYw==
5
5
  data.tar.gz: !binary |-
6
- NTc3NTI5YmMyYzk4MzNkNDFjZTY4N2JiNGE2MzI4ZTRiZDZmZjY4Yw==
6
+ ODg4NTk5ZjhjNDM0ZjBhN2M2OWJlOWQyOTE4OWFiZmY1ZmQxMTZiYQ==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- ZjczYmUzNjdhOWY0NjYzMzNhZDE4NmMwMWI5YTVhZTg1NGY5ZGJjNWJkN2I5
10
- NDYwZjRiNDM1ZGZlYjFmZTAxNWNiNmQ0MGM5MzJmNDAzZTAxYWUyNTc3OTlh
11
- ZWFhMWZkZGI4MTlhZmQ3NmM3NmQwZTk0YmM1YWU4NDJkYzk4NGQ=
9
+ NTdmYWZhMmMzYTQxOWY5OTBlNGE1ZWQzNWYwMThjMjAwNzQ4NmU4MDY4OWRi
10
+ OWVkNzEwNmYwNTY3OGI4NmFkZGJiNzNhN2I3M2ZjMDUxNjAzMDAyZjEyYmZj
11
+ ZGFkODliNDc5ZTYzYWE2ODNiMzdkMWFjZGExYjgzNTQyNmM4MDc=
12
12
  data.tar.gz: !binary |-
13
- ZmI4YzFjMzNiNTYwNWFkZmNiY2VmZTcyY2I1YTI3ZWQ2NWI2MjYwYTJiNDY3
14
- YjhmZjE3MGJkNWUwNmZkMWNhMTExZTJhYmE4YTI4OThlNmRkYWZhODkxZTY1
15
- MWZiZjA0M2E2MjcwOGZiNDViZmUyNTg4YmFkNmVjM2Y4ZjIzZDU=
13
+ NGYzNDc2ODQ3OTBiMzE5N2M5YmM3ZDliN2IyYWRmNjBiZDI3ZTYyYjkxNjBk
14
+ MDFhZjA0NDE5YjFkYzdiZTQxMjg0ZGEwODA3Nzk0YWZiNDRmY2I1MmE2ZWI4
15
+ MjQxNTU4NjgwOTM1ZDdlYjM4Mjg0NjhjY2M2OGU5MDA1YjNjMjY=
data/.rubocop_todo.yml CHANGED
@@ -17,10 +17,6 @@ Style/CyclomaticComplexity:
17
17
  Style/Documentation:
18
18
  Enabled: false
19
19
 
20
- # Offense count: 38
21
- # Configuration parameters: EnforcedStyle, SupportedStyles.
22
- Style/Encoding:
23
- Enabled: false
24
20
 
25
21
  # Offense count: 2
26
22
  # Configuration parameters: EnforcedStyle, SupportedStyles.
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.3.3 (2015-06-26)
4
+
5
+ [Compare changes in gem](https://github.com/taganaka/polipus/compare/0.3.2...0.3.3)
6
+
7
+ * BugFix: Better compatibility for mongo 2.6.x on index creation
8
+
3
9
  ## 0.3.2 (2015-06-17)
4
10
 
5
11
  [Compare changes in gem](https://github.com/taganaka/polipus/compare/0.3.1...0.3.2)
data/Rakefile CHANGED
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'bundler/gem_tasks'
2
3
  require 'rspec/core/rake_task'
3
4
 
data/examples/basic.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'polipus'
2
3
  require 'mongo'
3
4
  require 'polipus/plugins/cleaner'
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'polipus'
2
3
 
3
4
  Polipus.crawler('rubygems', 'http://rubygems.org/') do |crawler|
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'polipus'
2
3
  require 'mongo'
3
4
 
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'polipus'
2
3
 
3
4
  options = {
data/examples/survival.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'polipus'
2
3
 
3
4
  Polipus.crawler('rubygems', 'http://rubygems.org/') do |crawler|
data/lib/polipus/http.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'net/https'
2
3
  require 'polipus/page'
3
4
  require 'zlib'
data/lib/polipus/page.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'nokogiri'
2
3
  require 'json'
3
4
  require 'ostruct'
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  module Polipus
2
3
  module Plugin
3
4
  @@plugins = {}
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  module Polipus
2
3
  module Plugin
3
4
  class Cleaner
@@ -8,7 +9,7 @@ module Polipus
8
9
  def on_initialize(crawler)
9
10
  crawler.logger.info { 'Cleaner plugin loaded' }
10
11
  unless @reset
11
- crawler.logger.info { 'Cleaner plugin is disable, add :reset => true to the plugin if you really know what you are doing' }
12
+ crawler.logger.info { 'Cleaner plugin is disabled, add :reset => true to the plugin if you really know what you are doing' }
12
13
  return nil
13
14
  end
14
15
  crawler.logger.info { 'Cleaning all: url_tracker, storage, queue' }
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  module Polipus
2
3
  module Plugin
3
4
  class Sample
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  module Polipus
2
3
  module Plugin
3
4
  class Sleeper
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'polipus/queue_overflow/manager'
2
3
  module Polipus
3
4
  module QueueOverflow
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  module Polipus
2
3
  module QueueOverflow
3
4
  class Base
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'thread'
2
3
  module Polipus
3
4
  module QueueOverflow
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  module Polipus
2
3
  module QueueOverflow
3
4
  class Manager
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'thread'
2
3
  require 'mongo'
3
4
  module Polipus
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'polipus/queue_overflow/mongo_queue'
2
3
  module Polipus
3
4
  module QueueOverflow
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'open-uri'
2
3
  require 'uri'
3
4
  require 'timeout'
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'singleton'
2
3
  module Polipus
3
4
  class SignalHandler
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'polipus/storage/base'
2
3
  module Polipus
3
4
  module Storage
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'uri'
2
3
 
3
4
  module Polipus
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  module Polipus
2
3
  module Storage
3
4
  class DevNull < Base
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'thread'
2
3
  module Polipus
3
4
  module Storage
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'mongo'
2
3
  require 'zlib'
3
4
  require 'thread'
@@ -9,7 +10,7 @@ module Polipus
9
10
  @mongo = options[:mongo]
10
11
  @collection = options[:collection]
11
12
  @mongo.create_collection(@collection)
12
- @mongo[@collection].ensure_index(:uuid, unique: true, drop_dups: true, background: true)
13
+ @mongo[@collection].ensure_index(:uuid, unique: true, dropDups: true, background: true)
13
14
  @compress_body = options[:compress_body] ||= true
14
15
  @except = options[:except] ||= []
15
16
  @semaphore = Mutex.new
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'aws/s3'
2
3
  require 'zlib'
3
4
  require 'thread'
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  module Polipus
2
3
  module UrlTracker
3
4
  def self.bloomfilter(options = {})
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'redis-bloomfilter'
2
3
  module Polipus
3
4
  module UrlTracker
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  module Polipus
2
3
  module UrlTracker
3
4
  class RedisSet
@@ -1,4 +1,5 @@
1
+ # encoding: UTF-8
1
2
  module Polipus
2
- VERSION = '0.3.2'
3
+ VERSION = '0.3.3'
3
4
  HOMEPAGE = 'https://github.com/taganaka/polipus'
4
5
  end
data/spec/clear.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'yaml'
2
3
  Dir.glob('./cassettes/*.yml').each do|f|
3
4
  next unless f =~ /[a-f0-9]{32}/
data/spec/http_spec.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'spec_helper'
2
3
  require 'mongo'
3
4
  require 'polipus/http'
data/spec/page_spec.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'spec_helper'
2
3
  require 'polipus/page'
3
4
 
data/spec/polipus_spec.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'spec_helper'
2
3
 
3
4
  describe Polipus::PolipusCrawler do
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'spec_helper'
2
3
  require 'mongo'
3
4
  require 'polipus/queue_overflow'
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'spec_helper'
2
3
  require 'polipus/queue_overflow'
3
4
 
data/spec/robotex_spec.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'spec_helper'
2
3
  require 'polipus/robotex'
3
4
  describe Polipus::Robotex do
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'spec_helper'
2
3
 
3
4
  describe Polipus::SignalHandler do
data/spec/spec_helper.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  # This file was generated by the `rspec --init` command. Conventionally, all
2
3
  # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
4
  # Require this file using `require "spec_helper"` to ensure that it is only
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'spec_helper'
2
3
  require 'mongo'
3
4
  require 'polipus/storage/memory_store'
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'spec_helper'
2
3
  require 'mongo'
3
4
  require 'polipus/storage/mongo_store'
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'spec_helper'
2
3
  require 'aws/s3'
3
4
  require 'polipus/storage/s3_store'
@@ -1,3 +1,4 @@
1
+ # encoding: UTF-8
1
2
  require 'spec_helper'
2
3
  require 'polipus/url_tracker'
3
4
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polipus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francesco Laurita
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-18 00:00:00.000000000 Z
11
+ date: 2014-06-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis-bloomfilter