url_processor 0.1.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ODM4YTQ5NWIzYzgzZWU3NDg4Mzk2Y2M3NmQyM2I1YzA5Nzk1NzIzNw==
5
+ data.tar.gz: !binary |-
6
+ NWJlNDU2YTdjZTM3MTVlNWE4ZmUxZjQ2ZmI2MjVkYWM0NDU0ZTg1Ng==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZWYyOTU3ZTU2YTZmZDgxNTljYjAzMmMxMzdiMWFiMThmZmZhM2UzZjcyMDlj
10
+ MjVkMWIxMWE4OWUyYTMzOTBhYzgxZTZhNzJlNGVmYjk3YTU5NzAyNzI5ZDg5
11
+ OWUzODI5NTFhODI3ZGFhYTEwNjRjOTZjMGFkNmZlYjhkNDJlYzA=
12
+ data.tar.gz: !binary |-
13
+ YTkwNTgyNTA4YTcxOWNlZWNiMmE3YTJhNDc3NmVkZTE0YzYwMDljMWUzMWM0
14
+ NjhhYWJiMjJkNmExNmRkZDc2NTExZWE2YzgwYTFkODMwZGM3NDQ4ZjU1ODA4
15
+ MTk4N2ZiODg2NTA4Y2MyZTM5NGM2NDI5ZGY4OGMxNmZkNzZiMmQ=
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec CHANGED
@@ -1 +1,2 @@
1
+ --format documentation
1
2
  --color
data/.travis.yml ADDED
@@ -0,0 +1,9 @@
1
+ script: bundle exec rake
2
+ rvm:
3
+ - 1.9.3
4
+ deploy:
5
+ provider: rubygems
6
+ api_key:
7
+ secure: lHnBn15z5MLSKiCqcqG0Bgj7r089C0uzI9zxzNtEGJ4fScIrRREJkYEb0l0ndCizek5++ChVOVM1/qguVEcFlGyUJ4DEXzWynRzsWui2sN+Xt9AaEUXGc23L/ljSg6oiH8G1TgIfeTODxn95WIUfFI6ydu5rZddsDeAWbXBxp4A=
8
+ on:
9
+ tags: true
data/Gemfile CHANGED
@@ -1,14 +1,4 @@
1
- source "http://rubygems.org"
2
- # Add dependencies required to use your gem here.
3
- # Example:
4
- # gem "activesupport", ">= 2.3.5"
1
+ source 'https://rubygems.org'
5
2
 
6
- # Add dependencies to develop your gem here.
7
- # Include everything needed to run rake, tests, features, etc.
8
- group :development do
9
- gem "rspec", "~> 2.8.0"
10
- gem "rdoc", "~> 3.12"
11
- gem "bundler", "~> 1.0"
12
- gem "jeweler", "~> 2.0.1"
13
- gem "simplecov", ">= 0"
14
- end
3
+ # Specify your gem's dependencies in url_processor.gemspec
4
+ gemspec
data/LICENSE.txt CHANGED
@@ -1,5 +1,7 @@
1
1
  Copyright (c) 2014 Eric Hayes
2
2
 
3
+ MIT License
4
+
3
5
  Permission is hereby granted, free of charge, to any person obtaining
4
6
  a copy of this software and associated documentation files (the
5
7
  "Software"), to deal in the Software without restriction, including
data/README.md ADDED
@@ -0,0 +1,108 @@
1
+ # url_processor [![Build Status](https://travis-ci.org/ejhayes/url_processor.png?branch=master)](https://travis-ci.org/ejhayes/url_processor) [![Gem Version](https://badge.fury.io/rb/url_processor.png)](http://badge.fury.io/rb/url_processor) [![Code Climate](https://codeclimate.com/github/ejhayes/url_processor.png)](https://codeclimate.com/github/ejhayes/url_processor)
2
+
3
+ Fast and easy way to process urls.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'url_processor'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install url_processor
18
+
19
+ ## Usage
20
+
21
+ Example use case:
22
+
23
+ require 'url_processor'
24
+ require 'trollop'
25
+
26
+ opts = Trollop::options do
27
+ banner <<-EOS
28
+ This utility validates the quality of links used by courtreference.com.
29
+ EOS
30
+ opt :log, "output to logfile (default is STDOUT)", :type => :string
31
+ opt :debug, "enable debugging mode"
32
+ opt :batch_size, "maximum batch size (records to retrieve at a time before processing urls)", :default => 100
33
+ opt :max_concurrency, "maximum number of threads to spawn", :type => :int, :default => 20
34
+ opt :max_retries, "maximum number of times to try a url before failing", :type => :int, :default => 3
35
+ opt :max_timeout, "maximum duration in seconds to wait for url to load", :type => :int, :default => 5
36
+ opt :cookies_file, "file to store cookie information", :type => :string, :default => '/tmp/whatever_cookies'
37
+ opt :max_total_connections, "maximum number of connections to keep open at a time", :type => :int, :default => 100
38
+ end
39
+
40
+ # Additional validations
41
+ opts[:log] = STDOUT if opts[:log] == nil
42
+
43
+ link_check = UrlProcessor.create do |config|
44
+ config.log = opts[:log]
45
+ config.debug = opts[:debug]
46
+ config.batch_size = opts[:batch_size]
47
+ config.max_concurrency = opts[:max_concurrency]
48
+ config.max_retries = opts[:max_retries]
49
+ config.cookies_file = opts[:cookies_file]
50
+ config.max_total_connections = opts[:max_total_connections]
51
+
52
+ config.retrieves_links_by_id_with do |link_id|
53
+ puts "I was called with #{link_id}"
54
+ end
55
+
56
+ config.retrieves_all_links_with do
57
+ links = []
58
+ (1..3).each do |i|
59
+ links << OpenStruct.new(:id => i, :urls => [{:url => 'http://www.example.com'}])
60
+ end
61
+ links
62
+ end
63
+
64
+ config.creates_new_link_request_with do |url, params|
65
+ UrlProcessor::LinkRequest.new(url, params)
66
+ end
67
+
68
+ config.processes_response_with do |response|
69
+ if response.return_code == :ok
70
+ config.logger.info "#{response.return_code} - #{response.effective_url}".green
71
+ destroyed_broken_link = OpenStruct.new(:id => 1, :serializable_hash => { :about => 'Not a real record' })
72
+ config.logger.debug "broken link destroyed (#{destroyed_broken_link.id}): #{destroyed_broken_link.serializable_hash}".green
73
+ else
74
+ config.logger.info "#{response.return_code} - #{response.effective_url}".red
75
+ end
76
+ end
77
+ end
78
+
79
+ puts "Running this thing..."
80
+ link_check.run
81
+
82
+ ## Updating this gem
83
+
84
+ If you are making changes to this gem, here's some stuff you will need to know:
85
+
86
+ ## Running the executables
87
+
88
+ If you have an exetable file called "my_executable" in the bin folder, you can run it by doing:
89
+
90
+ bundle exec my_executable
91
+
92
+ If you want to play around in the interactive console with the gem already loaded, you can do this:
93
+
94
+ bundle exec rake console
95
+
96
+ To run the tests
97
+
98
+ bundle exec rake spec
99
+
100
+ Please note that we are using simplecov for code coverage.
101
+
102
+ ## Contributing
103
+
104
+ 1. Fork it ( http://github.com/ejhayes/url_processor/fork )
105
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
106
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
107
+ 4. Push to the branch (`git push origin my-new-feature`)
108
+ 5. Create new Pull Request
data/Rakefile CHANGED
@@ -1,50 +1,18 @@
1
- # encoding: utf-8
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
2
3
 
3
- require 'rubygems'
4
- require 'bundler'
5
- begin
6
- Bundler.setup(:default, :development)
7
- rescue Bundler::BundlerError => e
8
- $stderr.puts e.message
9
- $stderr.puts "Run `bundle install` to install missing gems"
10
- exit e.status_code
11
- end
12
- require 'rake'
13
-
14
- require 'jeweler'
15
- Jeweler::Tasks.new do |gem|
16
- # gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
17
- gem.name = "url_processor"
18
- gem.homepage = "http://github.com/ejhayes/url_processor"
19
- gem.license = "MIT"
20
- gem.summary = %Q{Fast and reliable link checker.}
21
- gem.description = %Q{Fast and easy way to validate tons of urls without locking up your system or eating up too much memory.}
22
- gem.email = "eric@deployfx.com"
23
- gem.authors = ["Eric Hayes"]
24
- # dependencies defined in Gemfile
25
- end
26
- Jeweler::RubygemsDotOrgTasks.new
27
-
28
- require 'rspec/core'
29
- require 'rspec/core/rake_task'
30
- RSpec::Core::RakeTask.new(:spec) do |spec|
31
- spec.pattern = FileList['spec/**/*_spec.rb']
32
- end
33
-
34
- desc "Code coverage detail"
35
- task :simplecov do
36
- ENV['COVERAGE'] = "true"
37
- Rake::Task['spec'].execute
38
- end
4
+ RSpec::Core::RakeTask.new(:spec)
39
5
 
40
6
  task :default => :spec
7
+ task :test => :spec
41
8
 
42
- require 'rdoc/task'
43
- Rake::RDocTask.new do |rdoc|
44
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
45
-
46
- rdoc.rdoc_dir = 'rdoc'
47
- rdoc.title = "url_processor #{version}"
48
- rdoc.rdoc_files.include('README*')
49
- rdoc.rdoc_files.include('lib/**/*.rb')
9
+ desc 'Interactive console with gem pre-loaded'
10
+ task :console do
11
+ require 'irb'
12
+ require 'irb/completion'
13
+ require 'ruby-debug'
14
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
15
+ require 'url_processor'
16
+ ARGV.clear
17
+ IRB.start
50
18
  end
data/TODO ADDED
@@ -0,0 +1,12 @@
1
+ Things to be done, eventually
2
+ =============================
3
+
4
+ - rdoc documentation in code
5
+ - simplecov code coverage
6
+ - rspec tests
7
+ - integration with travisci
8
+ - integration with code climate
9
+ - published gem to rubygems
10
+ - ppa setup for libcurl
11
+ - post-install message to install correct libcurl message
12
+ in the event that asyncdns is not properly setup
data/bin/url_processor ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'url_processor'
4
+
5
+ puts "THIS IS A TEST OF THE TOOL, version: #{UrlProcessor::VERSION}"
@@ -0,0 +1,129 @@
1
+ module UrlProcessor
2
+ class Base
3
+ attr_reader :config
4
+
5
+ def initialize(c)
6
+ raise ArgumentError unless c.is_a? UrlProcessor::Config
7
+ @config = c
8
+
9
+ # connect to the db
10
+ #OnlinesearchesModels::connect
11
+ end
12
+
13
+ def new_broken_link(params={})
14
+ raise NotImplementedError.new "new_broken_link not implemented"
15
+ end
16
+
17
+ def report_broken_link(link_id, params={})
18
+ url_type_code = params[:url_type_code]
19
+ response_code = params[:response_code]
20
+ begin
21
+ link = config.get_link_by_id.call(link_id)
22
+ broken_link = new_broken_link(
23
+ :link_id => link.id,
24
+ :fips_code => link.fips_code,
25
+ :url_type_code => url_type_code,
26
+ :response_code => response_code,
27
+ :reported_by => 'QC Report'
28
+ )
29
+ broken_link.save
30
+ config.logger.debug "broken link created (#{broken_link.id}): #{broken_link.serializable_hash}".red
31
+ rescue ActiveRecord::RecordNotFound => e
32
+ config.logger.warn "#{e}".red
33
+ end
34
+ end
35
+
36
+ def pre_process_link(link)
37
+ # do nothing
38
+ end
39
+
40
+ def process_response(response)
41
+ raise NotImplementedError.new "process_reponse is not implemented"
42
+ end
43
+
44
+ def new_link_request(url, params={})
45
+ raise NotImplementedError.new "link_request is not implemented"
46
+ end
47
+
48
+ def find_in_batches(collection, batch_size)
49
+ if collection.is_a? Array
50
+ collection.each do |element|
51
+ yield element
52
+ end
53
+ else
54
+ collection.find_in_batches(batch_size: batch_size) do |group|
55
+ # Output progress information
56
+ config.logger.info "PROCESSED: #{processed_links}, NEXT GROUP SIZE: #{group.size}".yellow
57
+
58
+ # for debuggin purposes we do not want to process everything
59
+ if config.debug && processed_links >= config.batch_size
60
+ config.logger.debug "FINISHED first batch (#{@batch_size} records), exiting".yellow
61
+ return
62
+ end
63
+
64
+ group.each do |element|
65
+ yield element
66
+ end
67
+ end
68
+ end
69
+ end
70
+
71
+ def run
72
+ processed_links = 0
73
+ hydra = Typhoeus::Hydra.new(max_concurrency: config.max_concurrency, max_total_connections: config.max_total_connections)
74
+
75
+ find_in_batches(config.links.call, config.batch_size) do |link|
76
+ # any custom pre-processing
77
+ pre_process_link(link)
78
+
79
+ if link.urls.empty?
80
+ # In the event that we have a link that actually has no urls associated with it
81
+ report_broken_link link.id, :response_code => :has_no_urls if config.report_records_without_urls
82
+ else
83
+ # Each record has 2 urls associated with it, process each separately
84
+ link.urls.each do |url|
85
+ config.logger.debug "link: #{link.serializable_hash}, url: #{url}".yellow
86
+
87
+ link_request = config.new_link_request.call(
88
+ url[:url],
89
+ followlocation: true,
90
+ method: :head,
91
+ ssl_verifypeer: false,
92
+ ssl_verifyhost: 2,
93
+ cookiefile: config.cookies_file,
94
+ cookiejar: config.cookies_file,
95
+ link_id: link.id,
96
+ url_type_code: url[:url_type_code],
97
+ timeout: config.max_timeout,
98
+ connecttimeout: config.max_timeout,
99
+ max_retries: config.max_retries,
100
+ forbid_reuse: 1,
101
+ nosignal: 1
102
+ )
103
+
104
+ link_request.on_complete do |response|
105
+ processed_links += 1
106
+
107
+ if ([:operation_timedout, :couldnt_resolve_host].include? response.return_code) && response.request.retry_request?
108
+ config.logger.info "#{response.return_code} - #{response.effective_url} timed out, retrying".yellow
109
+ hydra.queue response.request
110
+ elsif response.return_code == :got_nothing && response.request.options[:method] != :get
111
+ config.logger.info "#{response.return_code} - #{response.effective_url} empty response, attempting GET request instead".yellow
112
+
113
+ # set to GET request since HEAD may fail in some cases
114
+ response.request.options[:method] = :get
115
+ hydra.queue response.request
116
+ else
117
+ config.process_response.call response
118
+ end
119
+ end
120
+
121
+ hydra.queue link_request
122
+ end
123
+ end
124
+
125
+ hydra.run
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,72 @@
1
+ module UrlProcessor
2
+
3
+ class Config
4
+ # configuration parameters
5
+ attr_accessor :batch_size
6
+ attr_accessor :max_concurrency
7
+ attr_accessor :max_retries
8
+ attr_accessor :cookies_file
9
+ attr_accessor :max_total_connections
10
+ attr_accessor :max_timeout
11
+
12
+ # debugging
13
+ attr_reader :debug
14
+ def debug=(val)
15
+ @debug = val
16
+ update_logging_level
17
+ end
18
+
19
+ # logging
20
+ attr_reader :logger
21
+
22
+ def log=(logging_location)
23
+ @logger = Logger.new(logging_location)
24
+ update_logging_level
25
+ end
26
+
27
+ # get individual link
28
+ attr_reader :get_link_by_id
29
+ def retrieves_links_by_id_with(&block)
30
+ @get_link_by_id = block
31
+ end
32
+
33
+ # get all links
34
+ attr_reader :links
35
+ def retrieves_all_links_with(&block)
36
+ @links = block
37
+ end
38
+
39
+ # create new link request
40
+ attr_reader :new_link_request
41
+ def creates_new_link_request_with(&block)
42
+ @new_link_request = block
43
+ end
44
+
45
+ # process responses
46
+ attr_reader :process_response
47
+ def processes_response_with(&block)
48
+ @process_response = block
49
+ end
50
+
51
+ def validate!
52
+ raise NotImplementedError.new("retrieves_links_by_id_with not set") if get_link_by_id.nil?
53
+ raise NotImplementedError.new("retrieves_all_links_with not set") if links.nil?
54
+ raise NotImplementedError.new("creates_new_link_request_with not set") if new_link_request.nil?
55
+ raise NotImplementedError.new("processes_response_with not set") if process_response.nil?
56
+ end
57
+
58
+ private
59
+
60
+ def update_logging_level
61
+ unless logger.nil?
62
+ if debug
63
+ logger.level = Logger::DEBUG
64
+ else
65
+ logger.level = Logger::WARN
66
+ end
67
+ end
68
+ end
69
+
70
+ end
71
+
72
+ end
@@ -0,0 +1,9 @@
1
+ module UrlProcessor
2
+
3
+ class Error < StandardError
4
+ end
5
+
6
+ class NotConfiguredError < Error
7
+ end
8
+
9
+ end
@@ -0,0 +1,12 @@
1
+ module UrlProcessor
2
+ class LinkRequest < RetryableRequest
3
+ attr_accessor :link_id
4
+ attr_accessor :url_type_code
5
+
6
+ def initialize(base_url, options={})
7
+ @link_id = options.delete(:link_id)
8
+ @url_type_code = options.delete(:url_type_code)
9
+ super(base_url, options)
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,27 @@
1
+ module UrlProcessor
2
+ class RetryableRequest < Typhoeus::Request
3
+ def initialize(base_url, options={})
4
+ @attempts = 0
5
+ @max_retries = options.delete(:max_retries) || 3
6
+ super(base_url, options)
7
+ end
8
+
9
+ def on_complete(&block)
10
+ @attempts += 1
11
+ super(&block)
12
+ end
13
+
14
+ def retry_request
15
+ if retry_request?
16
+ options[:method] = :get
17
+ return self
18
+ else
19
+ return nil
20
+ end
21
+ end
22
+
23
+ def retry_request?
24
+ @attempts <= @max_retries
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,16 @@
1
+ module UrlProcessor
2
+ class Runner
3
+
4
+ #attr_reader :callbacks
5
+
6
+ def initialize(config)
7
+ raise ArgumentError.new("invalid config '#{config}', expected ::Config") unless config.is_a? UrlProcessor::Config
8
+ @runner = Base.new(config)
9
+ end
10
+
11
+ def run
12
+ @runner.run
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,3 @@
1
+ module UrlProcessor
2
+ VERSION = "0.5.0"
3
+ end
data/lib/url_processor.rb CHANGED
@@ -0,0 +1,24 @@
1
+ require 'typhoeus'
2
+ require 'colorize'
3
+ require 'ostruct'
4
+ require 'url_processor/version'
5
+ require 'url_processor/error'
6
+ require 'url_processor/runner'
7
+ require 'url_processor/config'
8
+ require 'url_processor/base'
9
+ require 'url_processor/retryable_request'
10
+ require 'url_processor/link_request'
11
+
12
+ module UrlProcessor
13
+
14
+ def self.create(&block)
15
+ config = Config.new
16
+ block.call config
17
+
18
+ # validate before returning
19
+ config.validate!
20
+
21
+ Runner.new(config)
22
+ end
23
+
24
+ end
data/spec/spec_helper.rb CHANGED
@@ -1,29 +1,2 @@
1
- require 'simplecov'
2
-
3
- module SimpleCov::Configuration
4
- def clean_filters
5
- @filters = []
6
- end
7
- end
8
-
9
- SimpleCov.configure do
10
- clean_filters
11
- load_adapter 'test_frameworks'
12
- end
13
-
14
- ENV["COVERAGE"] && SimpleCov.start do
15
- add_filter "/.rvm/"
16
- end
17
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
18
- $LOAD_PATH.unshift(File.dirname(__FILE__))
19
-
20
- require 'rspec'
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
21
2
  require 'url_processor'
22
-
23
- # Requires supporting files with custom matchers and macros, etc,
24
- # in ./support/ and its subdirectories.
25
- Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
26
-
27
- RSpec.configure do |config|
28
-
29
- end
@@ -0,0 +1,26 @@
1
+ require 'spec_helper'
2
+
3
+ describe UrlProcessor::Base do
4
+ let(:config) { UrlProcessor::Config.new }
5
+
6
+ describe '.new' do
7
+ it 'can be created' do
8
+ UrlProcessor::Base.new(config)
9
+ end
10
+
11
+ it 'throws an error if config is not set' do
12
+ expect { UrlProcessor::Base.new }.to raise_error ArgumentError
13
+ end
14
+
15
+ it 'throws an error if config is not of type UrlProcessor::Config' do
16
+ expect { UrlProcessor::Base.new( double('not a valid type') ) }.to raise_error ArgumentError
17
+ end
18
+ end
19
+
20
+ describe :attributes do
21
+ it 'sets config' do
22
+ url_processor = UrlProcessor::Base.new(config)
23
+ url_processor.config.should eq config
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,146 @@
1
+ require 'spec_helper'
2
+
3
+ describe UrlProcessor::Config do
4
+ let(:config) { UrlProcessor::Config.new }
5
+
6
+ describe :validation do
7
+ before(:each) do
8
+ config.debug = true
9
+ config.batch_size = 1
10
+ config.max_concurrency = 1
11
+ config.max_retries = 1
12
+ config.cookies_file = 'test'
13
+ config.max_total_connections = 1
14
+ config.max_timeout = 1
15
+ config.log = STDOUT
16
+
17
+ config.retrieves_links_by_id_with do |val|
18
+ end
19
+
20
+ config.retrieves_all_links_with do
21
+ end
22
+
23
+ config.creates_new_link_request_with do |url, params|
24
+ end
25
+
26
+ config.processes_response_with do |response|
27
+ end
28
+ end
29
+
30
+ it 'is valid' do
31
+ config.validate!
32
+ end
33
+
34
+ it 'throws an error if get_link_by_id is not set' do
35
+ config.stub(:get_link_by_id).and_return(nil)
36
+ expect { config.validate! }.to raise_error NotImplementedError
37
+ end
38
+
39
+ it 'throws an error if links is not set' do
40
+ config.stub(:links).and_return(nil)
41
+ expect { config.validate! }.to raise_error NotImplementedError
42
+ end
43
+
44
+ it 'throws an error if new_link_request is not set' do
45
+ config.stub(:new_link_request).and_return(nil)
46
+ expect { config.validate! }.to raise_error NotImplementedError
47
+ end
48
+
49
+ it 'throws an error if process_response is not set' do
50
+ config.stub(:process_response).and_return(nil)
51
+ expect { config.validate! }.to raise_error NotImplementedError
52
+ end
53
+ end
54
+
55
+ describe :attributes do
56
+ it 'sets attributes' do
57
+ [
58
+ :debug,
59
+ :batch_size,
60
+ :max_concurrency,
61
+ :max_retries,
62
+ :cookies_file,
63
+ :max_total_connections,
64
+ :max_timeout
65
+ ].each do |param|
66
+ config.should respond_to(param)
67
+ config.should respond_to("#{param}=")
68
+ end
69
+ end
70
+ end
71
+
72
+ describe :logging do
73
+ it 'is nil by default' do
74
+ config.logger.should be_nil
75
+ end
76
+
77
+ it 'creates a valid logger object when log= is set' do
78
+ config.log = STDOUT
79
+ config.logger.should be_a Logger
80
+ end
81
+
82
+ # Debug
83
+ it 'sets logging level to DEBUG if debug is set first' do
84
+ config.debug = true
85
+ config.log = STDOUT
86
+ config.logger.level.should eq Logger::DEBUG
87
+ end
88
+
89
+ it 'sets logging level to DEBUG if debug is set last' do
90
+ config.log = STDOUT
91
+ config.debug = true
92
+ config.logger.level.should eq Logger::DEBUG
93
+ end
94
+
95
+ it 'sets logging level to WARN if debug level is not specified' do
96
+ config.log = STDOUT
97
+ config.logger.level.should eq Logger::WARN
98
+ end
99
+
100
+ it 'sets logging level to WARN if debug is changed from true -> false' do
101
+ config.debug = true
102
+ config.log = STDOUT
103
+ config.debug = false
104
+ config.logger.level.should eq Logger::WARN
105
+ end
106
+ end
107
+
108
+ describe :callbacks do
109
+
110
+ it 'sets get_link_by_id' do
111
+ config.retrieves_links_by_id_with do |link_id|
112
+ link_id.should eq 456
113
+ end
114
+
115
+ config.get_link_by_id.call(456)
116
+ end
117
+
118
+ it 'sets links' do
119
+ expected_response = double('expected_response')
120
+ config.retrieves_all_links_with do
121
+ expected_response
122
+ end
123
+
124
+ config.links.call.should eq expected_response
125
+ end
126
+
127
+ it 'sets new_link_request' do
128
+ expected_response = double('expected_response')
129
+ config.creates_new_link_request_with do |url, params|
130
+ expected_response
131
+ end
132
+
133
+ config.new_link_request.call.should eq expected_response
134
+ end
135
+
136
+ it 'sets process_response' do
137
+ expected_response = double('expected_response')
138
+ config.processes_response_with do |response|
139
+ expected_response
140
+ end
141
+
142
+ config.process_response.call.should eq expected_response
143
+ end
144
+ end
145
+
146
+ end
@@ -0,0 +1,47 @@
1
+ require 'spec_helper'
2
+
3
+ describe UrlProcessor::Runner do
4
+ let(:config) { UrlProcessor::Config.new }
5
+
6
+ describe ".new" do
7
+ describe :happy do
8
+ it 'can be created' do
9
+ UrlProcessor::Runner.new(config)
10
+ end
11
+
12
+ it 'creates a UrlProcessor::Base with the configuration object' do
13
+ UrlProcessor::Base.should_receive(:new).with(config)
14
+ runner = UrlProcessor::Runner.new(config)
15
+ end
16
+ end
17
+
18
+ describe :sad do
19
+ it 'raises an error if configuration not provided' do
20
+ expect { UrlProcessor::Runner.new }.to raise_error ArgumentError
21
+ end
22
+
23
+ it 'raises an error if configuration is not a UrlProcessor::Config type' do
24
+ invalid_config = double('invalid_config')
25
+
26
+ expect { UrlProcessor::Runner.new(invalid_config) }.to raise_error ArgumentError
27
+ end
28
+ end
29
+ end
30
+
31
+ describe ".run" do
32
+
33
+ it 'can be called' do
34
+ runner = UrlProcessor::Runner.new(config)
35
+ runner.should respond_to :run
36
+ end
37
+
38
+ it 'calls the run on the base runner' do
39
+ UrlProcessor::Base.any_instance.should_receive(:run)
40
+
41
+ runner = UrlProcessor::Runner.new(config)
42
+ runner.run
43
+ end
44
+
45
+ end
46
+
47
+ end
@@ -1,7 +1,36 @@
1
- require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
1
+ require 'spec_helper'
2
+
3
+ describe UrlProcessor do
4
+
5
+ describe :gem_attributes do
6
+ it 'should have a version number' do
7
+ UrlProcessor::VERSION.should_not be_nil
8
+ end
9
+ end
10
+
11
+ describe '.create' do
12
+
13
+ it 'requires a block with a config argument' do
14
+ UrlProcessor::Config.any_instance.stub(:validate!)
15
+ UrlProcessor::Config.any_instance.should_receive(:some_random_call)
16
+
17
+ UrlProcessor.create do |config|
18
+ config.some_random_call
19
+ end
20
+ end
21
+
22
+ it 'throws an error if a block is not passed' do
23
+ expect { UrlProcessor.create }.to raise_error NoMethodError
24
+ end
25
+
26
+ it 'returns a runner' do
27
+ processor = UrlProcessor.create do |config|
28
+ config.stub(:validate!)
29
+ end
30
+
31
+ processor.should be_a UrlProcessor::Runner
32
+ end
2
33
 
3
- describe "UrlProcessor" do
4
- it "fails" do
5
- fail "hey buddy, you should probably rename this file and start specing for real"
6
34
  end
35
+
7
36
  end
@@ -1,63 +1,31 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
- # -*- encoding: utf-8 -*-
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'url_processor/version'
5
5
 
6
- Gem::Specification.new do |s|
7
- s.name = "url_processor"
8
- s.version = "0.1.0"
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "url_processor"
8
+ spec.version = UrlProcessor::VERSION
9
+ spec.authors = ["Eric Hayes"]
10
+ spec.email = ["eric@deployfx.com"]
11
+ spec.summary = %q{Fast and reliable link checker.}
12
+ spec.description = %q{Fast and easy way to validate tons of urls without locking up your system or eating up too much memory.}
13
+ spec.homepage = "https://github.com/ejhayes/url_processor"
14
+ spec.license = "MIT"
9
15
 
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Eric Hayes"]
12
- s.date = "2014-02-20"
13
- s.description = "Fast and easy way to validate tons of urls without locking up your system or eating up too much memory."
14
- s.email = "eric@deployfx.com"
15
- s.extra_rdoc_files = [
16
- "LICENSE.txt",
17
- "README.rdoc"
18
- ]
19
- s.files = [
20
- ".document",
21
- ".rspec",
22
- "Gemfile",
23
- "Gemfile.lock",
24
- "LICENSE.txt",
25
- "README.rdoc",
26
- "Rakefile",
27
- "VERSION",
28
- "lib/url_processor.rb",
29
- "spec/spec_helper.rb",
30
- "spec/url_processor_spec.rb",
31
- "url_processor.gemspec"
32
- ]
33
- s.homepage = "http://github.com/ejhayes/url_processor"
34
- s.licenses = ["MIT"]
35
- s.require_paths = ["lib"]
36
- s.rubygems_version = "1.8.23"
37
- s.summary = "Fast and reliable link checker."
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
38
20
 
39
- if s.respond_to? :specification_version then
40
- s.specification_version = 3
21
+ # gem dependencies
22
+ spec.add_runtime_dependency "typhoeus", "~> 0.6.7"
23
+ spec.add_runtime_dependency "colorize", "~> 0.6.0"
41
24
 
42
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
43
- s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
44
- s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
45
- s.add_development_dependency(%q<bundler>, ["~> 1.0"])
46
- s.add_development_dependency(%q<jeweler>, ["~> 2.0.1"])
47
- s.add_development_dependency(%q<simplecov>, [">= 0"])
48
- else
49
- s.add_dependency(%q<rspec>, ["~> 2.8.0"])
50
- s.add_dependency(%q<rdoc>, ["~> 3.12"])
51
- s.add_dependency(%q<bundler>, ["~> 1.0"])
52
- s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
53
- s.add_dependency(%q<simplecov>, [">= 0"])
54
- end
55
- else
56
- s.add_dependency(%q<rspec>, ["~> 2.8.0"])
57
- s.add_dependency(%q<rdoc>, ["~> 3.12"])
58
- s.add_dependency(%q<bundler>, ["~> 1.0"])
59
- s.add_dependency(%q<jeweler>, ["~> 2.0.1"])
60
- s.add_dependency(%q<simplecov>, [">= 0"])
61
- end
25
+ # not required to run the gem
26
+ spec.add_development_dependency "bundler", "~> 1.5"
27
+ spec.add_development_dependency "rake"
28
+ spec.add_development_dependency "debugger"
29
+ spec.add_development_dependency "rspec"
30
+ spec.add_development_dependency "trollop"
62
31
  end
63
-
metadata CHANGED
@@ -1,84 +1,102 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_processor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
5
- prerelease:
4
+ version: 0.5.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Eric Hayes
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-02-20 00:00:00.000000000 Z
11
+ date: 2014-04-23 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
- name: rspec
14
+ name: typhoeus
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
17
  - - ~>
20
18
  - !ruby/object:Gem::Version
21
- version: 2.8.0
22
- type: :development
19
+ version: 0.6.7
20
+ type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
24
  - - ~>
28
25
  - !ruby/object:Gem::Version
29
- version: 2.8.0
26
+ version: 0.6.7
30
27
  - !ruby/object:Gem::Dependency
31
- name: rdoc
28
+ name: colorize
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
31
  - - ~>
36
32
  - !ruby/object:Gem::Version
37
- version: '3.12'
38
- type: :development
33
+ version: 0.6.0
34
+ type: :runtime
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
38
  - - ~>
44
39
  - !ruby/object:Gem::Version
45
- version: '3.12'
40
+ version: 0.6.0
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: bundler
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
45
  - - ~>
52
46
  - !ruby/object:Gem::Version
53
- version: '1.0'
47
+ version: '1.5'
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
52
  - - ~>
60
53
  - !ruby/object:Gem::Version
61
- version: '1.0'
54
+ version: '1.5'
62
55
  - !ruby/object:Gem::Dependency
63
- name: jeweler
56
+ name: rake
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
- - - ~>
59
+ - - ! '>='
68
60
  - !ruby/object:Gem::Version
69
- version: 2.0.1
61
+ version: '0'
70
62
  type: :development
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
- - - ~>
66
+ - - ! '>='
76
67
  - !ruby/object:Gem::Version
77
- version: 2.0.1
68
+ version: '0'
78
69
  - !ruby/object:Gem::Dependency
79
- name: simplecov
70
+ name: debugger
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: trollop
80
99
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
100
  requirements:
83
101
  - - ! '>='
84
102
  - !ruby/object:Gem::Version
@@ -86,58 +104,69 @@ dependencies:
86
104
  type: :development
87
105
  prerelease: false
88
106
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
107
  requirements:
91
108
  - - ! '>='
92
109
  - !ruby/object:Gem::Version
93
110
  version: '0'
94
111
  description: Fast and easy way to validate tons of urls without locking up your system
95
112
  or eating up too much memory.
96
- email: eric@deployfx.com
97
- executables: []
113
+ email:
114
+ - eric@deployfx.com
115
+ executables:
116
+ - url_processor
98
117
  extensions: []
99
- extra_rdoc_files:
100
- - LICENSE.txt
101
- - README.rdoc
118
+ extra_rdoc_files: []
102
119
  files:
103
- - .document
120
+ - .gitignore
104
121
  - .rspec
122
+ - .travis.yml
105
123
  - Gemfile
106
- - Gemfile.lock
107
124
  - LICENSE.txt
108
- - README.rdoc
125
+ - README.md
109
126
  - Rakefile
110
- - VERSION
127
+ - TODO
128
+ - bin/url_processor
111
129
  - lib/url_processor.rb
130
+ - lib/url_processor/base.rb
131
+ - lib/url_processor/config.rb
132
+ - lib/url_processor/error.rb
133
+ - lib/url_processor/link_request.rb
134
+ - lib/url_processor/retryable_request.rb
135
+ - lib/url_processor/runner.rb
136
+ - lib/url_processor/version.rb
112
137
  - spec/spec_helper.rb
138
+ - spec/url_processor/base_spec.rb
139
+ - spec/url_processor/config_spec.rb
140
+ - spec/url_processor/runner_spec.rb
113
141
  - spec/url_processor_spec.rb
114
142
  - url_processor.gemspec
115
- homepage: http://github.com/ejhayes/url_processor
143
+ homepage: https://github.com/ejhayes/url_processor
116
144
  licenses:
117
145
  - MIT
146
+ metadata: {}
118
147
  post_install_message:
119
148
  rdoc_options: []
120
149
  require_paths:
121
150
  - lib
122
151
  required_ruby_version: !ruby/object:Gem::Requirement
123
- none: false
124
152
  requirements:
125
153
  - - ! '>='
126
154
  - !ruby/object:Gem::Version
127
155
  version: '0'
128
- segments:
129
- - 0
130
- hash: 4095729639175042089
131
156
  required_rubygems_version: !ruby/object:Gem::Requirement
132
- none: false
133
157
  requirements:
134
158
  - - ! '>='
135
159
  - !ruby/object:Gem::Version
136
160
  version: '0'
137
161
  requirements: []
138
162
  rubyforge_project:
139
- rubygems_version: 1.8.23
163
+ rubygems_version: 2.2.2
140
164
  signing_key:
141
- specification_version: 3
165
+ specification_version: 4
142
166
  summary: Fast and reliable link checker.
143
- test_files: []
167
+ test_files:
168
+ - spec/spec_helper.rb
169
+ - spec/url_processor/base_spec.rb
170
+ - spec/url_processor/config_spec.rb
171
+ - spec/url_processor/runner_spec.rb
172
+ - spec/url_processor_spec.rb
data/.document DELETED
@@ -1,5 +0,0 @@
1
- lib/**/*.rb
2
- bin/*
3
- -
4
- features/**/*.feature
5
- LICENSE.txt
data/Gemfile.lock DELETED
@@ -1,72 +0,0 @@
1
- GEM
2
- remote: http://rubygems.org/
3
- specs:
4
- addressable (2.3.5)
5
- builder (3.2.2)
6
- descendants_tracker (0.0.3)
7
- diff-lcs (1.1.3)
8
- docile (1.1.3)
9
- faraday (0.9.0)
10
- multipart-post (>= 1.2, < 3)
11
- git (1.2.14)
12
- github_api (0.11.2)
13
- addressable (~> 2.3)
14
- descendants_tracker (~> 0.0.1)
15
- faraday (~> 0.8, < 0.10)
16
- hashie (>= 1.2)
17
- multi_json (>= 1.7.5, < 2.0)
18
- nokogiri (~> 1.6.0)
19
- oauth2
20
- hashie (2.0.5)
21
- highline (1.6.20)
22
- jeweler (2.0.1)
23
- builder
24
- bundler (>= 1.0)
25
- git (>= 1.2.5)
26
- github_api
27
- highline (>= 1.6.15)
28
- nokogiri (>= 1.5.10)
29
- rake
30
- rdoc
31
- json (1.8.1)
32
- jwt (0.1.11)
33
- multi_json (>= 1.5)
34
- mini_portile (0.5.2)
35
- multi_json (1.8.4)
36
- multi_xml (0.5.5)
37
- multipart-post (2.0.0)
38
- nokogiri (1.6.1)
39
- mini_portile (~> 0.5.0)
40
- oauth2 (0.9.3)
41
- faraday (>= 0.8, < 0.10)
42
- jwt (~> 0.1.8)
43
- multi_json (~> 1.3)
44
- multi_xml (~> 0.5)
45
- rack (~> 1.2)
46
- rack (1.5.2)
47
- rake (10.1.1)
48
- rdoc (3.12.2)
49
- json (~> 1.4)
50
- rspec (2.8.0)
51
- rspec-core (~> 2.8.0)
52
- rspec-expectations (~> 2.8.0)
53
- rspec-mocks (~> 2.8.0)
54
- rspec-core (2.8.0)
55
- rspec-expectations (2.8.0)
56
- diff-lcs (~> 1.1.2)
57
- rspec-mocks (2.8.0)
58
- simplecov (0.8.2)
59
- docile (~> 1.1.0)
60
- multi_json
61
- simplecov-html (~> 0.8.0)
62
- simplecov-html (0.8.0)
63
-
64
- PLATFORMS
65
- ruby
66
-
67
- DEPENDENCIES
68
- bundler (~> 1.0)
69
- jeweler (~> 2.0.1)
70
- rdoc (~> 3.12)
71
- rspec (~> 2.8.0)
72
- simplecov
data/README.rdoc DELETED
@@ -1,19 +0,0 @@
1
- = url_processor
2
-
3
- Description goes here.
4
-
5
- == Contributing to url_processor
6
-
7
- * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
8
- * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
9
- * Fork the project.
10
- * Start a feature/bugfix branch.
11
- * Commit and push until you are happy with your contribution.
12
- * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
- * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
-
15
- == Copyright
16
-
17
- Copyright (c) 2014 Eric Hayes. See LICENSE.txt for
18
- further details.
19
-
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.1.0