http_utilities 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +22 -0
- data/README +15 -0
- data/Rakefile +87 -0
- data/VERSION +1 -0
- data/http_utilities.gemspec +78 -0
- data/lib/generators/active_record/http_utilities_generator.rb +21 -0
- data/lib/generators/active_record/templates/migration.rb +34 -0
- data/lib/generators/active_record/templates/proxy.rb +3 -0
- data/lib/generators/helpers/file_helper.rb +35 -0
- data/lib/generators/helpers/orm_helpers.rb +15 -0
- data/lib/generators/http_utilities/http_utilities_generator.rb +25 -0
- data/lib/generators/templates/http_utilities.rb +2 -0
- data/lib/generators/templates/user_agents.yml +3419 -0
- data/lib/http_utilities/http/adapters/curb.rb +107 -0
- data/lib/http_utilities/http/adapters/net_http.rb +130 -0
- data/lib/http_utilities/http/adapters/open_uri.rb +46 -0
- data/lib/http_utilities/http/client.rb +22 -0
- data/lib/http_utilities/http/cookies.rb +49 -0
- data/lib/http_utilities/http/format.rb +26 -0
- data/lib/http_utilities/http/get.rb +67 -0
- data/lib/http_utilities/http/logger.rb +11 -0
- data/lib/http_utilities/http/mechanize/client.rb +197 -0
- data/lib/http_utilities/http/post.rb +32 -0
- data/lib/http_utilities/http/proxy_support.rb +88 -0
- data/lib/http_utilities/http/request.rb +20 -0
- data/lib/http_utilities/http/response.rb +50 -0
- data/lib/http_utilities/http/url.rb +48 -0
- data/lib/http_utilities/http/user_agent.rb +3380 -0
- data/lib/http_utilities/jobs/resque/proxies/check_proxies_job.rb +15 -0
- data/lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb +21 -0
- data/lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb +17 -0
- data/lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb +22 -0
- data/lib/http_utilities/proxies/proxy_checker.rb +122 -0
- data/lib/http_utilities/proxies/proxy_module.rb +70 -0
- data/lib/http_utilities/proxies/proxy_seeder.rb +104 -0
- data/lib/http_utilities/railtie.rb +11 -0
- data/lib/http_utilities.rb +47 -0
- data/lib/tasks/http_utilities_tasks.rake +19 -0
- data/spec/database.yml.example +10 -0
- data/spec/http_utilities/client_spec.rb +145 -0
- data/spec/http_utilities/mechanize_client_spec.rb +35 -0
- data/spec/http_utilities/proxy_checker_spec.rb +11 -0
- data/spec/http_utilities/proxy_seeder_spec.rb +24 -0
- data/spec/http_utilities/proxy_spec.rb +114 -0
- data/spec/models.rb +6 -0
- data/spec/schema.rb +30 -0
- data/spec/spec_helper.rb +50 -0
- metadata +209 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b148d1430516c285cb9cf4fc58712acabd0d268c
|
4
|
+
data.tar.gz: 8a18be0594fca823f8abfe88ac54a194bf9b9602
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4ee2b56ece9840c4160969ea00f7f79611bbdae4cfe77702bb21c18f726c6d3b6fbb6fd8978ffdfa3bf0b82b28e4accd3c514126ef3ad16b4b3afd5722f7abcc
|
7
|
+
data.tar.gz: 3541bf3e7c7e79183c14636c5aec50c626494cc20c43c97c4d3451e4ab882881342cb2314bb717bc1be7b409cfb9fa319e4c063f91b9ef829678166c7fb77ad0
|
data/Gemfile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
gem "nokogiri", ">= 1.5.5"
|
4
|
+
gem "mechanize", ">= 2.5"
|
5
|
+
gem "multi_xml", ">= 0.5"
|
6
|
+
|
7
|
+
gem "activerecord-import", :require => false
|
8
|
+
|
9
|
+
platforms :ruby do
|
10
|
+
gem 'curb'
|
11
|
+
end
|
12
|
+
|
13
|
+
group :development, :test do
|
14
|
+
gem 'rails'
|
15
|
+
gem 'jeweler'
|
16
|
+
gem 'rspec'
|
17
|
+
gem 'sqlite3'
|
18
|
+
|
19
|
+
platforms :ruby do
|
20
|
+
gem "mysql2", ">= 0.3.11"
|
21
|
+
end
|
22
|
+
end
|
data/README
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
===== Http-Utilities =====
|
2
|
+
|
3
|
+
Http-Utilities is a wrapper for common HTTP-libraries (Net::Http, Open Uri, Curb) using a unified API.
|
4
|
+
|
5
|
+
I personally use this in quite a few projects and decided to extract it into a separate gem.
|
6
|
+
|
7
|
+
Features:
|
8
|
+
- Support for Net:Http, Open Uri, Curb
|
9
|
+
- Proxy support (adds a proxy model). Enables you to randomly use different proxies on every request.
|
10
|
+
- Proxy checker (comes with Resque jobs for scheduling)
|
11
|
+
- Yaml-file containing a few thousand User Agents that are randomly used on every request.
|
12
|
+
|
13
|
+
There are some specs but the gem needs more test/spec coverage. Working on it though.
|
14
|
+
|
15
|
+
This is primarily intended for private use, but get back to me if you use it and run into issues.
|
data/Rakefile
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
## helper functions
|
2
|
+
|
3
|
+
def name
|
4
|
+
@name ||= Dir['*.gemspec'].first.split('.').first
|
5
|
+
end
|
6
|
+
|
7
|
+
def version
|
8
|
+
line = File.read("lib/#{name}.rb")[/^\s*VERSION\s*=\s*.*/]
|
9
|
+
line.match(/.*VERSION\s*=\s*['"](.*)['"]/)[1]
|
10
|
+
end
|
11
|
+
|
12
|
+
def gemspec_file
|
13
|
+
"#{name}.gemspec"
|
14
|
+
end
|
15
|
+
|
16
|
+
def gem_file
|
17
|
+
"#{name}-#{version}.gem"
|
18
|
+
end
|
19
|
+
|
20
|
+
def replace_header(head, header_name)
|
21
|
+
head.sub!(/(\.#{header_name}\s*= ').*'/) { "#{$1}#{send(header_name)}'"}
|
22
|
+
end
|
23
|
+
|
24
|
+
begin
|
25
|
+
# Rspec 2.0
|
26
|
+
require 'rspec/core/rake_task'
|
27
|
+
|
28
|
+
desc 'Default: run specs'
|
29
|
+
task :default => :spec
|
30
|
+
RSpec::Core::RakeTask.new do |t|
|
31
|
+
t.pattern = "spec/**/*_spec.rb"
|
32
|
+
end
|
33
|
+
|
34
|
+
RSpec::Core::RakeTask.new('rcov') do |t|
|
35
|
+
t.pattern = "spec/**/*_spec.rb"
|
36
|
+
t.rcov = true
|
37
|
+
t.rcov_opts = ['--exclude', 'spec']
|
38
|
+
end
|
39
|
+
|
40
|
+
rescue LoadError
|
41
|
+
puts "Rspec not available. Install it with: gem install rspec"
|
42
|
+
end
|
43
|
+
|
44
|
+
## release management tasks
|
45
|
+
|
46
|
+
desc "Commit, create tag v#{version} and build and push #{gem_file} to Rubygems"
|
47
|
+
task :release => :build do
|
48
|
+
sh "git commit --allow-empty -a -m 'Release #{version}'"
|
49
|
+
sh "git tag v#{version}"
|
50
|
+
sh "git push"
|
51
|
+
sh "git push origin v#{version}"
|
52
|
+
sh "gem push pkg/#{gem_file}"
|
53
|
+
end
|
54
|
+
|
55
|
+
desc "Build #{gem_file} into the pkg directory"
|
56
|
+
task :build => :gemspec do
|
57
|
+
sh "mkdir -p pkg"
|
58
|
+
sh "gem build #{gemspec_file}"
|
59
|
+
sh "mv #{gem_file} pkg"
|
60
|
+
end
|
61
|
+
|
62
|
+
desc "Generate #{gemspec_file}"
|
63
|
+
task :gemspec do
|
64
|
+
# read spec file and split out manifest section
|
65
|
+
spec = File.read(gemspec_file)
|
66
|
+
head, manifest, tail = spec.split(" # = MANIFEST =\n")
|
67
|
+
|
68
|
+
# replace name version and date
|
69
|
+
replace_header(head, :name)
|
70
|
+
replace_header(head, :version)
|
71
|
+
|
72
|
+
# determine file list from git ls-files
|
73
|
+
files = `git ls-files`.
|
74
|
+
split("\n").
|
75
|
+
sort.
|
76
|
+
reject { |file| file =~ /^\./ }.
|
77
|
+
reject { |file| file =~ /^(rdoc|pkg)/ }.
|
78
|
+
map { |file| " #{file}" }.
|
79
|
+
join("\n")
|
80
|
+
|
81
|
+
# piece file back together and write
|
82
|
+
manifest = " s.files = %w[\n#{files}\n ]\n"
|
83
|
+
spec = [head, manifest, tail].join(" # = MANIFEST =\n")
|
84
|
+
File.open(gemspec_file, 'w') { |io| io.write(spec) }
|
85
|
+
puts "Updated #{gemspec_file}"
|
86
|
+
end
|
87
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.5
|
@@ -0,0 +1,78 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.specification_version = 2 if s.respond_to? :specification_version=
|
3
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.3.5") if s.respond_to? :required_rubygems_version=
|
4
|
+
|
5
|
+
s.name = "http_utilities"
|
6
|
+
s.version = "1.0.1"
|
7
|
+
|
8
|
+
s.authors = ["Sebastian Johnsson"]
|
9
|
+
s.date = "2012-11-22"
|
10
|
+
s.description = "Wrapper for common Http Libraries (Net:HTTP/Open URI/Curl)"
|
11
|
+
|
12
|
+
s.homepage = "http://github.com/Agiley/http_utilities"
|
13
|
+
s.summary = "Wrapper for common Http Libraries (Net:HTTP/Open URI/Curl)"
|
14
|
+
|
15
|
+
s.add_dependency(%q<nokogiri>, [">= 1.5.5"])
|
16
|
+
s.add_dependency(%q<mechanize>, [">= 2.5"])
|
17
|
+
s.add_dependency(%q<multi_xml>, [">= 0.5"])
|
18
|
+
s.add_dependency(%q<activerecord-import>, [">= 0"])
|
19
|
+
|
20
|
+
s.add_development_dependency(%q<rails>, [">= 0"])
|
21
|
+
s.add_development_dependency(%q<rspec>, [">= 0"])
|
22
|
+
s.add_development_dependency(%q<sqlite3>, [">= 0"])
|
23
|
+
s.add_development_dependency(%q<mysql2>, [">= 0.3.11"])
|
24
|
+
|
25
|
+
# = MANIFEST =
|
26
|
+
s.files = %w[
|
27
|
+
Gemfile
|
28
|
+
README
|
29
|
+
Rakefile
|
30
|
+
VERSION
|
31
|
+
http_utilities.gemspec
|
32
|
+
lib/generators/active_record/http_utilities_generator.rb
|
33
|
+
lib/generators/active_record/templates/migration.rb
|
34
|
+
lib/generators/active_record/templates/proxy.rb
|
35
|
+
lib/generators/helpers/file_helper.rb
|
36
|
+
lib/generators/helpers/orm_helpers.rb
|
37
|
+
lib/generators/http_utilities/http_utilities_generator.rb
|
38
|
+
lib/generators/templates/http_utilities.rb
|
39
|
+
lib/generators/templates/user_agents.yml
|
40
|
+
lib/http_utilities.rb
|
41
|
+
lib/http_utilities/http/adapters/curb.rb
|
42
|
+
lib/http_utilities/http/adapters/net_http.rb
|
43
|
+
lib/http_utilities/http/adapters/open_uri.rb
|
44
|
+
lib/http_utilities/http/client.rb
|
45
|
+
lib/http_utilities/http/cookies.rb
|
46
|
+
lib/http_utilities/http/format.rb
|
47
|
+
lib/http_utilities/http/get.rb
|
48
|
+
lib/http_utilities/http/logger.rb
|
49
|
+
lib/http_utilities/http/mechanize/client.rb
|
50
|
+
lib/http_utilities/http/post.rb
|
51
|
+
lib/http_utilities/http/proxy_support.rb
|
52
|
+
lib/http_utilities/http/request.rb
|
53
|
+
lib/http_utilities/http/response.rb
|
54
|
+
lib/http_utilities/http/url.rb
|
55
|
+
lib/http_utilities/http/user_agent.rb
|
56
|
+
lib/http_utilities/jobs/resque/proxies/check_proxies_job.rb
|
57
|
+
lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb
|
58
|
+
lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb
|
59
|
+
lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb
|
60
|
+
lib/http_utilities/proxies/proxy_checker.rb
|
61
|
+
lib/http_utilities/proxies/proxy_module.rb
|
62
|
+
lib/http_utilities/proxies/proxy_seeder.rb
|
63
|
+
lib/http_utilities/railtie.rb
|
64
|
+
lib/tasks/http_utilities_tasks.rake
|
65
|
+
spec/database.yml.example
|
66
|
+
spec/http_utilities/client_spec.rb
|
67
|
+
spec/http_utilities/mechanize_client_spec.rb
|
68
|
+
spec/http_utilities/proxy_checker_spec.rb
|
69
|
+
spec/http_utilities/proxy_seeder_spec.rb
|
70
|
+
spec/http_utilities/proxy_spec.rb
|
71
|
+
spec/models.rb
|
72
|
+
spec/schema.rb
|
73
|
+
spec/spec_helper.rb
|
74
|
+
]
|
75
|
+
# = MANIFEST =
|
76
|
+
|
77
|
+
s.test_files = s.files.select { |path| path =~ %r{^spec/*/.+\.rb} }
|
78
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'rails/generators/active_record'
|
2
|
+
require 'generators/helpers/orm_helpers'
|
3
|
+
|
4
|
+
module ActiveRecord
|
5
|
+
module Generators
|
6
|
+
class HttpUtilitiesGenerator < ActiveRecord::Generators::Base
|
7
|
+
|
8
|
+
include HttpUtilities::Generators::OrmHelpers
|
9
|
+
source_root File.expand_path("../templates", __FILE__)
|
10
|
+
|
11
|
+
def copy_proxy_migration
|
12
|
+
migration_template "migration.rb", "db/migrate/create_proxies" unless model_exists?('proxy') && behavior == :invoke
|
13
|
+
end
|
14
|
+
|
15
|
+
def copy_proxy_model
|
16
|
+
template "proxy.rb", "app/models/proxy.rb" unless model_exists?('proxy') && behavior == :invoke
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
class CreateProxies < ActiveRecord::Migration
|
2
|
+
def self.up
|
3
|
+
create_table :proxies do |t|
|
4
|
+
|
5
|
+
t.string :host, :null => false
|
6
|
+
t.integer :port, :null => false
|
7
|
+
t.string :username
|
8
|
+
t.string :password
|
9
|
+
|
10
|
+
t.string :protocol, :null => false, :default => 'http'
|
11
|
+
t.string :proxy_type, :null => false, :defaut => 'public'
|
12
|
+
t.string :category
|
13
|
+
|
14
|
+
t.datetime :last_checked_at
|
15
|
+
t.boolean :valid_proxy, :null => false, :default => false
|
16
|
+
t.integer :successful_attempts, :null => false, :default => 0
|
17
|
+
t.integer :failed_attempts, :null => false, :default => 0
|
18
|
+
|
19
|
+
t.timestamps
|
20
|
+
end
|
21
|
+
|
22
|
+
add_index :proxies, [:host, :port], :unique => true, :name => 'index_unique_proxy'
|
23
|
+
add_index :proxies, :protocol, :name => 'index_protocol'
|
24
|
+
add_index :proxies, :proxy_type, :name => 'index_proxy_type'
|
25
|
+
add_index :proxies, :category, :name => 'index_category'
|
26
|
+
add_index :proxies, :valid_proxy, :name => 'index_valid_proxy'
|
27
|
+
add_index :proxies, :successful_attempts, :name => 'index_successful_attempts'
|
28
|
+
add_index :proxies, :failed_attempts, :name => 'index_failed_attempts'
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.down
|
32
|
+
drop_table :proxies
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Generators
|
3
|
+
module FileHelper
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
def copy_dir(source, destination)
|
8
|
+
root_dir=File.join(self.class.source_root, source)
|
9
|
+
Dir[File.join(root_dir, "**/*")].each do |file|
|
10
|
+
relative = file.gsub(/^#{root_dir}\//, '')
|
11
|
+
copy_file file, File.join(Rails.root, destination, relative) if File.file?(file)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def append_to_file(source, destination, prepend_with = "\n")
|
16
|
+
source_file = File.join(self.class.source_root, source)
|
17
|
+
destination_file = File.join(Rails.root, destination)
|
18
|
+
|
19
|
+
if (File.exists?(source_file) && File.exists?(destination_file))
|
20
|
+
source_data = []
|
21
|
+
File.open(source_file, 'r') {|f| source_data = f.readlines("\n") }
|
22
|
+
open(destination_file, 'a') { |dest_file|
|
23
|
+
dest_file << prepend_with if (prepend_with)
|
24
|
+
source_data.each { |line| dest_file.puts line }
|
25
|
+
}
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def file_exists? path
|
30
|
+
File.exists?(File.join(destination_root, path))
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Generators
|
3
|
+
module OrmHelpers
|
4
|
+
|
5
|
+
def model_exists?(model)
|
6
|
+
File.exists?(File.join(destination_root, model_path(model)))
|
7
|
+
end
|
8
|
+
|
9
|
+
def model_path(model)
|
10
|
+
@model_path ||= File.join("app", "models", "#{model}.rb")
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'generators/helpers/file_helper'
|
2
|
+
|
3
|
+
module HttpUtilities
|
4
|
+
module Generators
|
5
|
+
class HttpUtilitiesGenerator < Rails::Generators::Base
|
6
|
+
include HttpUtilities::Generators::FileHelper
|
7
|
+
namespace "http_utilities"
|
8
|
+
source_root File.expand_path("../../templates", __FILE__)
|
9
|
+
|
10
|
+
class_option :orm
|
11
|
+
hook_for :orm
|
12
|
+
|
13
|
+
desc "Copies an initializer, a .yml-file containing user-agents as well as a proxy model."
|
14
|
+
|
15
|
+
#def copy_initializer
|
16
|
+
# template "http_utilities.rb", "config/initializers/http_utilities.rb" unless file_exists?("config/initializers/http_utilities.rb")
|
17
|
+
#end
|
18
|
+
|
19
|
+
#def copy_user_agents
|
20
|
+
# template "user_agents.yml", "config/http_utilities/user_agents.yml" unless file_exists?("config/http_utilities/user_agents.yml")
|
21
|
+
#end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|