http_validator 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,8 +1,15 @@
1
1
  require 'curb'
2
2
  require 'uri'
3
3
  require 'nokogiri'
4
+ require 'platform_helpers'
5
+
4
6
  class HttpValidator
5
- VERSION = '0.1.3'
7
+ VERSION = '0.1.4'
8
+ end
9
+
10
+ module Constants
11
+ BASE = File.expand_path(File.dirname(__FILE__) + '/..')
12
+ USER_AGENTS = File.readlines(BASE + '/config/user_agents')
6
13
  end
7
14
 
8
15
  module HTTP
@@ -10,7 +17,7 @@ module HTTP
10
17
  def self.parse(str)
11
18
  headers = {}
12
19
  str.gsub(/\r/, '').split(/\n/).each { |i|
13
- k, v = i.split(':')
20
+ k, v = i.split(': ')
14
21
  headers[k] = v
15
22
  }
16
23
  headers
@@ -18,14 +25,18 @@ module HTTP
18
25
  end
19
26
 
20
27
  class Browser
21
- attr_accessor :base_url, :elements, :element_info
28
+ attr_accessor :base_url, :elements, :element_info, :browser, :random_user_agent
22
29
  def initialize(url='')
30
+ rand(Time.now.tv_sec)
23
31
  @base_url = url
32
+ @random_user_agent = false
24
33
  @browser = Curl::Easy.new
25
- #@browser.enable_cookies = true
34
+ @browser.max_redirects=3
35
+ @browser.enable_cookies = true
26
36
  @browser.cookiejar = 'cookies.txt'
27
37
  @browser.headers["User-Agent"] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1"
28
- @element_types = %w(script img link)
38
+ @element_types = %w(script img link a)
39
+ @moved_keys = ['HTTP/1.1 302 Moved Temporarily', 'HTTP/1.1 301 Moved Permanently']
29
40
  clear
30
41
  end
31
42
 
@@ -35,14 +46,31 @@ module HTTP
35
46
  end
36
47
 
37
48
  def element_details(e)
38
- return if %w(text/css text/javascript before-content data after-content stylesheet top).include?(e)
39
49
  if e =~ /^http:\/\/|^https:\/\//
40
- @browser.url = e
50
+ url = e
41
51
  else
42
- @browser.url = "#{@base_url.gsub(/\/$/, '')}/#{e}"
52
+ url = "#{@base_url.gsub(/\/$/, '')}/#{e}"
53
+ end
54
+ url.gsub!(/\/\//, '/')
55
+ url.gsub!(/http:\//, 'http://')
56
+ url.gsub!(/https:\//, 'https://')
57
+
58
+ @browser.url = url.strip
59
+ if @random_user_agent
60
+ index = rand(Constants::USER_AGENTS.length)
61
+ @browser.headers["User-Agent"] = Constants::USER_AGENTS[index].gsub(/\n/, '')
43
62
  end
44
63
  @browser.perform
64
+
45
65
  headers = HTTP::Header.parse(@browser.header_str)
66
+ @moved_keys.each { |k|
67
+ if headers.has_key?(k)
68
+ @elements << headers['Location']
69
+ @elements.delete(e)
70
+ @element_info.delete(e)
71
+ return
72
+ end
73
+ }
46
74
  if @element_info.has_key?(e)
47
75
  @element_info[e] << {:expected_size => headers['Content-Length'].strip.to_i,
48
76
  :actual_size => @browser.body_str.length, :headers => headers}
@@ -62,16 +90,40 @@ module HTTP
62
90
  doc = Nokogiri::HTML(@browser.body_str)
63
91
  @element_types.each { |elem|
64
92
  e = nil
93
+
65
94
  doc.search("//#{elem}").each { |item|
66
- next unless item.attribute_nodes.length > 0
67
- item.attribute_nodes.each { |node|
68
- next unless node.value =~ /\//
69
- e = node.value
70
- break
95
+ begin
96
+ if elem == 'a' || elem == 'link'
97
+ e = item.attributes['href'].value
98
+ elsif %(img script).include?(elem)
99
+ e = item.attributes['src'].value
100
+ end
101
+ rescue => ex
102
+ end
103
+ @elements << e unless e == nil
104
+ }
105
+ }
106
+ end
107
+ end
108
+
109
+ module UserAgent
110
+ @browser = HTTP::Browser.new.browser
111
+ def self.get
112
+ save_to = File.expand_path(File.dirname(__FILE__) + '/../config/user_agents.yaml')
113
+ agents = []
114
+ @browser.url = 'http://www.useragentstring.com/pages/All/'
115
+ @browser.perform
116
+ doc = Nokogiri::HTML(@browser.body_str)
117
+
118
+ fd = File.open(save_to, 'w+')
119
+ doc.search("//ul").each { |ul|
120
+ ul.search("//li").each { |li|
121
+ li.search("//a").each { |a|
122
+ fd.puts a.text
71
123
  }
72
- @elements << e
73
124
  }
74
125
  }
126
+ fd.close
75
127
  end
76
128
  end
77
129
 
@@ -80,10 +132,10 @@ module HTTP
80
132
 
81
133
  def self.display_report(element_info, format='plain')
82
134
  if format == 'plain'
83
- printf "%-100s%15s%15s\n\n", "Element", "Expected Size", "Actual Size"
135
+ printf "%-90s%15s%15s\n\n", "Element", "Expected Size", "Actual Size"
84
136
  element_info.each_pair { |resource, values|
85
137
  values.each { |item|
86
- printf "%-100s%15s%15s\n", resource, item[:expected_size], item[:actual_size]
138
+ printf "%-90s%15s%15s\n", resource, item[:expected_size], item[:actual_size]
87
139
  }
88
140
  }
89
141
  elsif format == 'cucumber_example'
@@ -97,10 +149,18 @@ module HTTP
97
149
  end
98
150
 
99
151
  def self.run(url='', params={})
100
- p params
101
152
  raise ArgumentError unless url =~ /^http:\/\/|https:\/\//
153
+ params[:random_user_agent] ||= 'false'
102
154
  URI.parse(url) # see if parsable, blow up if not
103
155
  @browser.base_url = url
156
+ if params.has_key?(:proxy)
157
+ @browser.browser.proxy_url = params[:proxy]
158
+ end
159
+
160
+ if params[:random_user_agent].to_bool
161
+ @browser.random_user_agent=true
162
+ end
163
+
104
164
  @browser.clear
105
165
  @browser.get_elements
106
166
  @browser.elements.each { |e| @browser.element_details(e) }
@@ -113,8 +173,10 @@ module HTTP
113
173
  end
114
174
  end
115
175
 
116
- #HTTP::Validator.run('http://slashdot.org')
117
- #HTTP::Validator.run('https://github.com')
176
+ #HTTP::Validator.run('http://www.gap.com', {:display_report => {:format => 'plain'}})
177
+ #HTTP::Validator.run('http://127.0.0.1:9090', {:random_user_agent => true, :display_report => {:format => 'plain'}})
178
+ #HTTP::Validator.run('http://127.0.0.1:9090', {:display_report => {:format => 'plain'}})
179
+ #HTTP::Validator.run('https://github.com', {:display_report => {:format => 'plain'}})
118
180
 
119
181
  #HTTP::Validator.run('http://127.0.0.1:9090', :display_report => {:format => 'cucumber_example'})
120
182
 
@@ -8,6 +8,6 @@ Feature: Download elements from source html
8
8
 
9
9
  Examples:
10
10
  |page |
11
- |http://slashdot.org |
12
- |https://github.com |
13
- |http://www.yahoo.com |
11
+ #|http://127.0.0.1:9090 |
12
+ #|http://www.yahoo.com |
13
+ |http://www.gap.com |
@@ -1,15 +1,27 @@
1
1
  require 'rspec'
2
- require 'http_validator'
2
+ base = File.expand_path(File.dirname(__FILE__) + '/../../..')
3
+ require base + '/lib/http_validator'
4
+
5
+ Before do
6
+ @proxy = nil
7
+ @params = {:random_user_agent => true}
8
+ end
9
+
10
+ Given /the user is using a proxy (.+) then set a proxy/ do |proxy|
11
+ puts "Proxy set to #{proxy}"
12
+ @proxy = proxy
13
+ end
3
14
 
4
15
  Given /a user has landed on a (.+)/ do |url|
5
- @element_info = HTTP::Validator.run(url)
16
+ @params[:proxy] = @proxy if @proxy
17
+ @element_info = HTTP::Validator.run(url, @params)
6
18
  end
7
19
 
8
20
  Then /all resource elements should download with the correct size/ do
9
21
  @element_info.each_pair { |resource, values|
10
22
  values.each { |item|
11
- puts "Verifying #{resource} downloaded size #{item[:actual_size]} matches Content-Length #{item[:expected_size]}\n"
12
- item[:actual_size].should equal item[:expected_size]
23
+ puts "Verifying #{resource} downloaded size #{item[:actual_size]} matches Content-Length #{item[:expected_size]}\n<br>"
24
+ item[:actual_size].should == item[:expected_size]
13
25
  }
14
26
  }
15
27
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: http_validator
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.1.3
5
+ version: 0.1.4
6
6
  platform: ruby
7
7
  authors:
8
8
  - Cliff Cyphers
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-10-12 00:00:00 Z
13
+ date: 2011-10-14 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: curb
@@ -24,16 +24,27 @@ dependencies:
24
24
  type: :runtime
25
25
  version_requirements: *id001
26
26
  - !ruby/object:Gem::Dependency
27
- name: hoe
27
+ name: platform_helpers
28
28
  prerelease: false
29
29
  requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - "="
33
+ - !ruby/object:Gem::Version
34
+ version: 0.1.2
35
+ type: :runtime
36
+ version_requirements: *id002
37
+ - !ruby/object:Gem::Dependency
38
+ name: hoe
39
+ prerelease: false
40
+ requirement: &id003 !ruby/object:Gem::Requirement
30
41
  none: false
31
42
  requirements:
32
43
  - - ~>
33
44
  - !ruby/object:Gem::Version
34
45
  version: "2.12"
35
46
  type: :development
36
- version_requirements: *id002
47
+ version_requirements: *id003
37
48
  description: |-
38
49
  Validate http request content length vs actual for
39
50
  elements such as img, link, etc..
@@ -56,6 +67,7 @@ files:
56
67
  - lib/http_validator.rb
57
68
  - test/features/step_defn/verify.rb
58
69
  - test/features/resource_size.feature
70
+ - config/user_agents
59
71
  - .gemtest
60
72
  homepage: http://github.com/ccyphers
61
73
  licenses: []