http_utilities 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +22 -0
  3. data/README +15 -0
  4. data/Rakefile +87 -0
  5. data/VERSION +1 -0
  6. data/http_utilities.gemspec +78 -0
  7. data/lib/generators/active_record/http_utilities_generator.rb +21 -0
  8. data/lib/generators/active_record/templates/migration.rb +34 -0
  9. data/lib/generators/active_record/templates/proxy.rb +3 -0
  10. data/lib/generators/helpers/file_helper.rb +35 -0
  11. data/lib/generators/helpers/orm_helpers.rb +15 -0
  12. data/lib/generators/http_utilities/http_utilities_generator.rb +25 -0
  13. data/lib/generators/templates/http_utilities.rb +2 -0
  14. data/lib/generators/templates/user_agents.yml +3419 -0
  15. data/lib/http_utilities/http/adapters/curb.rb +107 -0
  16. data/lib/http_utilities/http/adapters/net_http.rb +130 -0
  17. data/lib/http_utilities/http/adapters/open_uri.rb +46 -0
  18. data/lib/http_utilities/http/client.rb +22 -0
  19. data/lib/http_utilities/http/cookies.rb +49 -0
  20. data/lib/http_utilities/http/format.rb +26 -0
  21. data/lib/http_utilities/http/get.rb +67 -0
  22. data/lib/http_utilities/http/logger.rb +11 -0
  23. data/lib/http_utilities/http/mechanize/client.rb +197 -0
  24. data/lib/http_utilities/http/post.rb +32 -0
  25. data/lib/http_utilities/http/proxy_support.rb +88 -0
  26. data/lib/http_utilities/http/request.rb +20 -0
  27. data/lib/http_utilities/http/response.rb +50 -0
  28. data/lib/http_utilities/http/url.rb +48 -0
  29. data/lib/http_utilities/http/user_agent.rb +3380 -0
  30. data/lib/http_utilities/jobs/resque/proxies/check_proxies_job.rb +15 -0
  31. data/lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb +21 -0
  32. data/lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb +17 -0
  33. data/lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb +22 -0
  34. data/lib/http_utilities/proxies/proxy_checker.rb +122 -0
  35. data/lib/http_utilities/proxies/proxy_module.rb +70 -0
  36. data/lib/http_utilities/proxies/proxy_seeder.rb +104 -0
  37. data/lib/http_utilities/railtie.rb +11 -0
  38. data/lib/http_utilities.rb +47 -0
  39. data/lib/tasks/http_utilities_tasks.rake +19 -0
  40. data/spec/database.yml.example +10 -0
  41. data/spec/http_utilities/client_spec.rb +145 -0
  42. data/spec/http_utilities/mechanize_client_spec.rb +35 -0
  43. data/spec/http_utilities/proxy_checker_spec.rb +11 -0
  44. data/spec/http_utilities/proxy_seeder_spec.rb +24 -0
  45. data/spec/http_utilities/proxy_spec.rb +114 -0
  46. data/spec/models.rb +6 -0
  47. data/spec/schema.rb +30 -0
  48. data/spec/spec_helper.rb +50 -0
  49. metadata +209 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b148d1430516c285cb9cf4fc58712acabd0d268c
4
+ data.tar.gz: 8a18be0594fca823f8abfe88ac54a194bf9b9602
5
+ SHA512:
6
+ metadata.gz: 4ee2b56ece9840c4160969ea00f7f79611bbdae4cfe77702bb21c18f726c6d3b6fbb6fd8978ffdfa3bf0b82b28e4accd3c514126ef3ad16b4b3afd5722f7abcc
7
+ data.tar.gz: 3541bf3e7c7e79183c14636c5aec50c626494cc20c43c97c4d3451e4ab882881342cb2314bb717bc1be7b409cfb9fa319e4c063f91b9ef829678166c7fb77ad0
data/Gemfile ADDED
@@ -0,0 +1,22 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem "nokogiri", ">= 1.5.5"
4
+ gem "mechanize", ">= 2.5"
5
+ gem "multi_xml", ">= 0.5"
6
+
7
+ gem "activerecord-import", :require => false
8
+
9
+ platforms :ruby do
10
+ gem 'curb'
11
+ end
12
+
13
+ group :development, :test do
14
+ gem 'rails'
15
+ gem 'jeweler'
16
+ gem 'rspec'
17
+ gem 'sqlite3'
18
+
19
+ platforms :ruby do
20
+ gem "mysql2", ">= 0.3.11"
21
+ end
22
+ end
data/README ADDED
@@ -0,0 +1,15 @@
1
+ ===== Http-Utilities =====
2
+
3
+ Http-Utilities is a wrapper for common HTTP-libraries (Net::Http, Open Uri, Curb) using a unified API.
4
+
5
+ I personally use this in quite a few projects and decided to extract it into a separate gem.
6
+
7
+ Features:
8
+ - Support for Net:Http, Open Uri, Curb
9
+ - Proxy support (adds a proxy model). Enables you to randomly use different proxies on every request.
10
+ - Proxy checker (comes with Resque jobs for scheduling)
11
+ - Yaml-file containing a few thousand User Agents that are randomly used on every request.
12
+
13
+ There are some specs but the gem needs more test/spec coverage. Working on it though.
14
+
15
+ This is primarily intended for private use, but get back to me if you use it and run into issues.
data/Rakefile ADDED
@@ -0,0 +1,87 @@
1
+ ## helper functions
2
+
3
+ def name
4
+ @name ||= Dir['*.gemspec'].first.split('.').first
5
+ end
6
+
7
+ def version
8
+ line = File.read("lib/#{name}.rb")[/^\s*VERSION\s*=\s*.*/]
9
+ line.match(/.*VERSION\s*=\s*['"](.*)['"]/)[1]
10
+ end
11
+
12
+ def gemspec_file
13
+ "#{name}.gemspec"
14
+ end
15
+
16
+ def gem_file
17
+ "#{name}-#{version}.gem"
18
+ end
19
+
20
+ def replace_header(head, header_name)
21
+ head.sub!(/(\.#{header_name}\s*= ').*'/) { "#{$1}#{send(header_name)}'"}
22
+ end
23
+
24
+ begin
25
+ # Rspec 2.0
26
+ require 'rspec/core/rake_task'
27
+
28
+ desc 'Default: run specs'
29
+ task :default => :spec
30
+ RSpec::Core::RakeTask.new do |t|
31
+ t.pattern = "spec/**/*_spec.rb"
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new('rcov') do |t|
35
+ t.pattern = "spec/**/*_spec.rb"
36
+ t.rcov = true
37
+ t.rcov_opts = ['--exclude', 'spec']
38
+ end
39
+
40
+ rescue LoadError
41
+ puts "Rspec not available. Install it with: gem install rspec"
42
+ end
43
+
44
+ ## release management tasks
45
+
46
+ desc "Commit, create tag v#{version} and build and push #{gem_file} to Rubygems"
47
+ task :release => :build do
48
+ sh "git commit --allow-empty -a -m 'Release #{version}'"
49
+ sh "git tag v#{version}"
50
+ sh "git push"
51
+ sh "git push origin v#{version}"
52
+ sh "gem push pkg/#{gem_file}"
53
+ end
54
+
55
+ desc "Build #{gem_file} into the pkg directory"
56
+ task :build => :gemspec do
57
+ sh "mkdir -p pkg"
58
+ sh "gem build #{gemspec_file}"
59
+ sh "mv #{gem_file} pkg"
60
+ end
61
+
62
+ desc "Generate #{gemspec_file}"
63
+ task :gemspec do
64
+ # read spec file and split out manifest section
65
+ spec = File.read(gemspec_file)
66
+ head, manifest, tail = spec.split(" # = MANIFEST =\n")
67
+
68
+ # replace name version and date
69
+ replace_header(head, :name)
70
+ replace_header(head, :version)
71
+
72
+ # determine file list from git ls-files
73
+ files = `git ls-files`.
74
+ split("\n").
75
+ sort.
76
+ reject { |file| file =~ /^\./ }.
77
+ reject { |file| file =~ /^(rdoc|pkg)/ }.
78
+ map { |file| " #{file}" }.
79
+ join("\n")
80
+
81
+ # piece file back together and write
82
+ manifest = " s.files = %w[\n#{files}\n ]\n"
83
+ spec = [head, manifest, tail].join(" # = MANIFEST =\n")
84
+ File.open(gemspec_file, 'w') { |io| io.write(spec) }
85
+ puts "Updated #{gemspec_file}"
86
+ end
87
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.5
@@ -0,0 +1,78 @@
1
+ Gem::Specification.new do |s|
2
+ s.specification_version = 2 if s.respond_to? :specification_version=
3
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.3.5") if s.respond_to? :required_rubygems_version=
4
+
5
+ s.name = "http_utilities"
6
+ s.version = "1.0.1"
7
+
8
+ s.authors = ["Sebastian Johnsson"]
9
+ s.date = "2012-11-22"
10
+ s.description = "Wrapper for common Http Libraries (Net:HTTP/Open URI/Curl)"
11
+
12
+ s.homepage = "http://github.com/Agiley/http_utilities"
13
+ s.summary = "Wrapper for common Http Libraries (Net:HTTP/Open URI/Curl)"
14
+
15
+ s.add_dependency(%q<nokogiri>, [">= 1.5.5"])
16
+ s.add_dependency(%q<mechanize>, [">= 2.5"])
17
+ s.add_dependency(%q<multi_xml>, [">= 0.5"])
18
+ s.add_dependency(%q<activerecord-import>, [">= 0"])
19
+
20
+ s.add_development_dependency(%q<rails>, [">= 0"])
21
+ s.add_development_dependency(%q<rspec>, [">= 0"])
22
+ s.add_development_dependency(%q<sqlite3>, [">= 0"])
23
+ s.add_development_dependency(%q<mysql2>, [">= 0.3.11"])
24
+
25
+ # = MANIFEST =
26
+ s.files = %w[
27
+ Gemfile
28
+ README
29
+ Rakefile
30
+ VERSION
31
+ http_utilities.gemspec
32
+ lib/generators/active_record/http_utilities_generator.rb
33
+ lib/generators/active_record/templates/migration.rb
34
+ lib/generators/active_record/templates/proxy.rb
35
+ lib/generators/helpers/file_helper.rb
36
+ lib/generators/helpers/orm_helpers.rb
37
+ lib/generators/http_utilities/http_utilities_generator.rb
38
+ lib/generators/templates/http_utilities.rb
39
+ lib/generators/templates/user_agents.yml
40
+ lib/http_utilities.rb
41
+ lib/http_utilities/http/adapters/curb.rb
42
+ lib/http_utilities/http/adapters/net_http.rb
43
+ lib/http_utilities/http/adapters/open_uri.rb
44
+ lib/http_utilities/http/client.rb
45
+ lib/http_utilities/http/cookies.rb
46
+ lib/http_utilities/http/format.rb
47
+ lib/http_utilities/http/get.rb
48
+ lib/http_utilities/http/logger.rb
49
+ lib/http_utilities/http/mechanize/client.rb
50
+ lib/http_utilities/http/post.rb
51
+ lib/http_utilities/http/proxy_support.rb
52
+ lib/http_utilities/http/request.rb
53
+ lib/http_utilities/http/response.rb
54
+ lib/http_utilities/http/url.rb
55
+ lib/http_utilities/http/user_agent.rb
56
+ lib/http_utilities/jobs/resque/proxies/check_proxies_job.rb
57
+ lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb
58
+ lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb
59
+ lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb
60
+ lib/http_utilities/proxies/proxy_checker.rb
61
+ lib/http_utilities/proxies/proxy_module.rb
62
+ lib/http_utilities/proxies/proxy_seeder.rb
63
+ lib/http_utilities/railtie.rb
64
+ lib/tasks/http_utilities_tasks.rake
65
+ spec/database.yml.example
66
+ spec/http_utilities/client_spec.rb
67
+ spec/http_utilities/mechanize_client_spec.rb
68
+ spec/http_utilities/proxy_checker_spec.rb
69
+ spec/http_utilities/proxy_seeder_spec.rb
70
+ spec/http_utilities/proxy_spec.rb
71
+ spec/models.rb
72
+ spec/schema.rb
73
+ spec/spec_helper.rb
74
+ ]
75
+ # = MANIFEST =
76
+
77
+ s.test_files = s.files.select { |path| path =~ %r{^spec/*/.+\.rb} }
78
+ end
@@ -0,0 +1,21 @@
1
+ require 'rails/generators/active_record'
2
+ require 'generators/helpers/orm_helpers'
3
+
4
+ module ActiveRecord
5
+ module Generators
6
+ class HttpUtilitiesGenerator < ActiveRecord::Generators::Base
7
+
8
+ include HttpUtilities::Generators::OrmHelpers
9
+ source_root File.expand_path("../templates", __FILE__)
10
+
11
+ def copy_proxy_migration
12
+ migration_template "migration.rb", "db/migrate/create_proxies" unless model_exists?('proxy') && behavior == :invoke
13
+ end
14
+
15
+ def copy_proxy_model
16
+ template "proxy.rb", "app/models/proxy.rb" unless model_exists?('proxy') && behavior == :invoke
17
+ end
18
+
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,34 @@
1
+ class CreateProxies < ActiveRecord::Migration
2
+ def self.up
3
+ create_table :proxies do |t|
4
+
5
+ t.string :host, :null => false
6
+ t.integer :port, :null => false
7
+ t.string :username
8
+ t.string :password
9
+
10
+ t.string :protocol, :null => false, :default => 'http'
11
+ t.string :proxy_type, :null => false, :defaut => 'public'
12
+ t.string :category
13
+
14
+ t.datetime :last_checked_at
15
+ t.boolean :valid_proxy, :null => false, :default => false
16
+ t.integer :successful_attempts, :null => false, :default => 0
17
+ t.integer :failed_attempts, :null => false, :default => 0
18
+
19
+ t.timestamps
20
+ end
21
+
22
+ add_index :proxies, [:host, :port], :unique => true, :name => 'index_unique_proxy'
23
+ add_index :proxies, :protocol, :name => 'index_protocol'
24
+ add_index :proxies, :proxy_type, :name => 'index_proxy_type'
25
+ add_index :proxies, :category, :name => 'index_category'
26
+ add_index :proxies, :valid_proxy, :name => 'index_valid_proxy'
27
+ add_index :proxies, :successful_attempts, :name => 'index_successful_attempts'
28
+ add_index :proxies, :failed_attempts, :name => 'index_failed_attempts'
29
+ end
30
+
31
+ def self.down
32
+ drop_table :proxies
33
+ end
34
+ end
@@ -0,0 +1,3 @@
1
+ class Proxy < ActiveRecord::Base
2
+ include HttpUtilities::Proxies::ProxyModule
3
+ end
@@ -0,0 +1,35 @@
1
+ module HttpUtilities
2
+ module Generators
3
+ module FileHelper
4
+
5
+ private
6
+
7
+ def copy_dir(source, destination)
8
+ root_dir=File.join(self.class.source_root, source)
9
+ Dir[File.join(root_dir, "**/*")].each do |file|
10
+ relative = file.gsub(/^#{root_dir}\//, '')
11
+ copy_file file, File.join(Rails.root, destination, relative) if File.file?(file)
12
+ end
13
+ end
14
+
15
+ def append_to_file(source, destination, prepend_with = "\n")
16
+ source_file = File.join(self.class.source_root, source)
17
+ destination_file = File.join(Rails.root, destination)
18
+
19
+ if (File.exists?(source_file) && File.exists?(destination_file))
20
+ source_data = []
21
+ File.open(source_file, 'r') {|f| source_data = f.readlines("\n") }
22
+ open(destination_file, 'a') { |dest_file|
23
+ dest_file << prepend_with if (prepend_with)
24
+ source_data.each { |line| dest_file.puts line }
25
+ }
26
+ end
27
+ end
28
+
29
+ def file_exists? path
30
+ File.exists?(File.join(destination_root, path))
31
+ end
32
+
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,15 @@
1
+ module HttpUtilities
2
+ module Generators
3
+ module OrmHelpers
4
+
5
+ def model_exists?(model)
6
+ File.exists?(File.join(destination_root, model_path(model)))
7
+ end
8
+
9
+ def model_path(model)
10
+ @model_path ||= File.join("app", "models", "#{model}.rb")
11
+ end
12
+
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,25 @@
1
+ require 'generators/helpers/file_helper'
2
+
3
+ module HttpUtilities
4
+ module Generators
5
+ class HttpUtilitiesGenerator < Rails::Generators::Base
6
+ include HttpUtilities::Generators::FileHelper
7
+ namespace "http_utilities"
8
+ source_root File.expand_path("../../templates", __FILE__)
9
+
10
+ class_option :orm
11
+ hook_for :orm
12
+
13
+ desc "Copies an initializer, a .yml-file containing user-agents as well as a proxy model."
14
+
15
+ #def copy_initializer
16
+ # template "http_utilities.rb", "config/initializers/http_utilities.rb" unless file_exists?("config/initializers/http_utilities.rb")
17
+ #end
18
+
19
+ #def copy_user_agents
20
+ # template "user_agents.yml", "config/http_utilities/user_agents.yml" unless file_exists?("config/http_utilities/user_agents.yml")
21
+ #end
22
+
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,2 @@
1
+ HttpUtilities.setup do |config|
2
+ end