http_validator 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,15 @@
1
1
  require 'curb'
2
2
  require 'uri'
3
3
  require 'nokogiri'
4
+ require 'platform_helpers'
5
+
4
6
  class HttpValidator
5
- VERSION = '0.1.3'
7
+ VERSION = '0.1.4'
8
+ end
9
+
10
+ module Constants
11
+ BASE = File.expand_path(File.dirname(__FILE__) + '/..')
12
+ USER_AGENTS = File.readlines(BASE + '/config/user_agents')
6
13
  end
7
14
 
8
15
  module HTTP
@@ -10,7 +17,7 @@ module HTTP
10
17
  def self.parse(str)
11
18
  headers = {}
12
19
  str.gsub(/\r/, '').split(/\n/).each { |i|
13
- k, v = i.split(':')
20
+ k, v = i.split(': ')
14
21
  headers[k] = v
15
22
  }
16
23
  headers
@@ -18,14 +25,18 @@ module HTTP
18
25
  end
19
26
 
20
27
  class Browser
21
- attr_accessor :base_url, :elements, :element_info
28
+ attr_accessor :base_url, :elements, :element_info, :browser, :random_user_agent
22
29
  def initialize(url='')
30
+ rand(Time.now.tv_sec)
23
31
  @base_url = url
32
+ @random_user_agent = false
24
33
  @browser = Curl::Easy.new
25
- #@browser.enable_cookies = true
34
+ @browser.max_redirects=3
35
+ @browser.enable_cookies = true
26
36
  @browser.cookiejar = 'cookies.txt'
27
37
  @browser.headers["User-Agent"] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1"
28
- @element_types = %w(script img link)
38
+ @element_types = %w(script img link a)
39
+ @moved_keys = ['HTTP/1.1 302 Moved Temporarily', 'HTTP/1.1 301 Moved Permanently']
29
40
  clear
30
41
  end
31
42
 
@@ -35,14 +46,31 @@ module HTTP
35
46
  end
36
47
 
37
48
  def element_details(e)
38
- return if %w(text/css text/javascript before-content data after-content stylesheet top).include?(e)
39
49
  if e =~ /^http:\/\/|^https:\/\//
40
- @browser.url = e
50
+ url = e
41
51
  else
42
- @browser.url = "#{@base_url.gsub(/\/$/, '')}/#{e}"
52
+ url = "#{@base_url.gsub(/\/$/, '')}/#{e}"
53
+ end
54
+ url.gsub!(/\/\//, '/')
55
+ url.gsub!(/http:\//, 'http://')
56
+ url.gsub!(/https:\//, 'https://')
57
+
58
+ @browser.url = url.strip
59
+ if @random_user_agent
60
+ index = rand(Constants::USER_AGENTS.length)
61
+ @browser.headers["User-Agent"] = Constants::USER_AGENTS[index].gsub(/\n/, '')
43
62
  end
44
63
  @browser.perform
64
+
45
65
  headers = HTTP::Header.parse(@browser.header_str)
66
+ @moved_keys.each { |k|
67
+ if headers.has_key?(k)
68
+ @elements << headers['Location']
69
+ @elements.delete(e)
70
+ @element_info.delete(e)
71
+ return
72
+ end
73
+ }
46
74
  if @element_info.has_key?(e)
47
75
  @element_info[e] << {:expected_size => headers['Content-Length'].strip.to_i,
48
76
  :actual_size => @browser.body_str.length, :headers => headers}
@@ -62,16 +90,40 @@ module HTTP
62
90
  doc = Nokogiri::HTML(@browser.body_str)
63
91
  @element_types.each { |elem|
64
92
  e = nil
93
+
65
94
  doc.search("//#{elem}").each { |item|
66
- next unless item.attribute_nodes.length > 0
67
- item.attribute_nodes.each { |node|
68
- next unless node.value =~ /\//
69
- e = node.value
70
- break
95
+ begin
96
+ if elem == 'a' || elem == 'link'
97
+ e = item.attributes['href'].value
98
+ elsif %(img script).include?(elem)
99
+ e = item.attributes['src'].value
100
+ end
101
+ rescue => ex
102
+ end
103
+ @elements << e unless e == nil
104
+ }
105
+ }
106
+ end
107
+ end
108
+
109
+ module UserAgent
110
+ @browser = HTTP::Browser.new.browser
111
+ def self.get
112
+ save_to = File.expand_path(File.dirname(__FILE__) + '/../config/user_agents.yaml')
113
+ agents = []
114
+ @browser.url = 'http://www.useragentstring.com/pages/All/'
115
+ @browser.perform
116
+ doc = Nokogiri::HTML(@browser.body_str)
117
+
118
+ fd = File.open(save_to, 'w+')
119
+ doc.search("//ul").each { |ul|
120
+ ul.search("//li").each { |li|
121
+ li.search("//a").each { |a|
122
+ fd.puts a.text
71
123
  }
72
- @elements << e
73
124
  }
74
125
  }
126
+ fd.close
75
127
  end
76
128
  end
77
129
 
@@ -80,10 +132,10 @@ module HTTP
80
132
 
81
133
  def self.display_report(element_info, format='plain')
82
134
  if format == 'plain'
83
- printf "%-100s%15s%15s\n\n", "Element", "Expected Size", "Actual Size"
135
+ printf "%-90s%15s%15s\n\n", "Element", "Expected Size", "Actual Size"
84
136
  element_info.each_pair { |resource, values|
85
137
  values.each { |item|
86
- printf "%-100s%15s%15s\n", resource, item[:expected_size], item[:actual_size]
138
+ printf "%-90s%15s%15s\n", resource, item[:expected_size], item[:actual_size]
87
139
  }
88
140
  }
89
141
  elsif format == 'cucumber_example'
@@ -97,10 +149,18 @@ module HTTP
97
149
  end
98
150
 
99
151
  def self.run(url='', params={})
100
- p params
101
152
  raise ArgumentError unless url =~ /^http:\/\/|https:\/\//
153
+ params[:random_user_agent] ||= 'false'
102
154
  URI.parse(url) # see if parsable, blow up if not
103
155
  @browser.base_url = url
156
+ if params.has_key?(:proxy)
157
+ @browser.browser.proxy_url = params[:proxy]
158
+ end
159
+
160
+ if params[:random_user_agent].to_bool
161
+ @browser.random_user_agent=true
162
+ end
163
+
104
164
  @browser.clear
105
165
  @browser.get_elements
106
166
  @browser.elements.each { |e| @browser.element_details(e) }
@@ -113,8 +173,10 @@ module HTTP
113
173
  end
114
174
  end
115
175
 
116
- #HTTP::Validator.run('http://slashdot.org')
117
- #HTTP::Validator.run('https://github.com')
176
+ #HTTP::Validator.run('http://www.gap.com', {:display_report => {:format => 'plain'}})
177
+ #HTTP::Validator.run('http://127.0.0.1:9090', {:random_user_agent => true, :display_report => {:format => 'plain'}})
178
+ #HTTP::Validator.run('http://127.0.0.1:9090', {:display_report => {:format => 'plain'}})
179
+ #HTTP::Validator.run('https://github.com', {:display_report => {:format => 'plain'}})
118
180
 
119
181
  #HTTP::Validator.run('http://127.0.0.1:9090', :display_report => {:format => 'cucumber_example'})
120
182
 
@@ -8,6 +8,6 @@ Feature: Download elements from source html
8
8
 
9
9
  Examples:
10
10
  |page |
11
- |http://slashdot.org |
12
- |https://github.com |
13
- |http://www.yahoo.com |
11
+ #|http://127.0.0.1:9090 |
12
+ #|http://www.yahoo.com |
13
+ |http://www.gap.com |
@@ -1,15 +1,27 @@
1
1
  require 'rspec'
2
- require 'http_validator'
2
+ base = File.expand_path(File.dirname(__FILE__) + '/../../..')
3
+ require base + '/lib/http_validator'
4
+
5
+ Before do
6
+ @proxy = nil
7
+ @params = {:random_user_agent => true}
8
+ end
9
+
10
+ Given /the user is using a proxy (.+) then set a proxy/ do |proxy|
11
+ puts "Proxy set to #{proxy}"
12
+ @proxy = proxy
13
+ end
3
14
 
4
15
  Given /a user has landed on a (.+)/ do |url|
5
- @element_info = HTTP::Validator.run(url)
16
+ @params[:proxy] = @proxy if @proxy
17
+ @element_info = HTTP::Validator.run(url, @params)
6
18
  end
7
19
 
8
20
  Then /all resource elements should download with the correct size/ do
9
21
  @element_info.each_pair { |resource, values|
10
22
  values.each { |item|
11
- puts "Verifying #{resource} downloaded size #{item[:actual_size]} matches Content-Length #{item[:expected_size]}\n"
12
- item[:actual_size].should equal item[:expected_size]
23
+ puts "Verifying #{resource} downloaded size #{item[:actual_size]} matches Content-Length #{item[:expected_size]}\n<br>"
24
+ item[:actual_size].should == item[:expected_size]
13
25
  }
14
26
  }
15
27
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: http_validator
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.1.3
5
+ version: 0.1.4
6
6
  platform: ruby
7
7
  authors:
8
8
  - Cliff Cyphers
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-10-12 00:00:00 Z
13
+ date: 2011-10-14 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: curb
@@ -24,16 +24,27 @@ dependencies:
24
24
  type: :runtime
25
25
  version_requirements: *id001
26
26
  - !ruby/object:Gem::Dependency
27
- name: hoe
27
+ name: platform_helpers
28
28
  prerelease: false
29
29
  requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - "="
33
+ - !ruby/object:Gem::Version
34
+ version: 0.1.2
35
+ type: :runtime
36
+ version_requirements: *id002
37
+ - !ruby/object:Gem::Dependency
38
+ name: hoe
39
+ prerelease: false
40
+ requirement: &id003 !ruby/object:Gem::Requirement
30
41
  none: false
31
42
  requirements:
32
43
  - - ~>
33
44
  - !ruby/object:Gem::Version
34
45
  version: "2.12"
35
46
  type: :development
36
- version_requirements: *id002
47
+ version_requirements: *id003
37
48
  description: |-
38
49
  Validate http request content length vs actual for
39
50
  elements such as img, link, etc..
@@ -56,6 +67,7 @@ files:
56
67
  - lib/http_validator.rb
57
68
  - test/features/step_defn/verify.rb
58
69
  - test/features/resource_size.feature
70
+ - config/user_agents
59
71
  - .gemtest
60
72
  homepage: http://github.com/ccyphers
61
73
  licenses: []