horseman 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +6 -0
- data/Gemfile.lock +42 -0
- data/Manifest +20 -0
- data/README.rdoc +3 -0
- data/Rakefile +24 -0
- data/horseman.gemspec +32 -0
- data/lib/horseman/browser.rb +32 -0
- data/lib/horseman/connection.rb +42 -0
- data/lib/horseman/cookies.rb +80 -0
- data/lib/horseman/hidden_fields.rb +13 -0
- data/lib/horseman/response.rb +26 -0
- data/lib/horseman/version.rb +3 -0
- data/lib/horseman.rb +1 -0
- data/spec/horseman/browser_spec.rb +35 -0
- data/spec/horseman/connection_spec.rb +64 -0
- data/spec/horseman/cookies_spec.rb +74 -0
- data/spec/horseman/hidden_fields_spec.rb +44 -0
- data/spec/horseman/response_spec.rb +13 -0
- data/spec/mocks.rb +50 -0
- data/spec/spec_helper.rb +1 -0
- metadata +89 -0
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
horseman (0.0.1)
|
5
|
+
nokogiri (>= 1.5.0)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: http://rubygems.org/
|
9
|
+
specs:
|
10
|
+
allison (2.0.3)
|
11
|
+
diff-lcs (1.1.3)
|
12
|
+
echoe (4.6.3)
|
13
|
+
allison (>= 2.0.3)
|
14
|
+
gemcutter (>= 0.7.0)
|
15
|
+
rake (>= 0.9.2)
|
16
|
+
rdoc (>= 3.6.1)
|
17
|
+
rubyforge (>= 2.0.4)
|
18
|
+
gemcutter (0.7.0)
|
19
|
+
json (1.6.2)
|
20
|
+
json_pure (1.6.2)
|
21
|
+
nokogiri (1.5.0)
|
22
|
+
rake (0.9.2.2)
|
23
|
+
rdoc (3.11)
|
24
|
+
json (~> 1.4)
|
25
|
+
rspec (2.7.0)
|
26
|
+
rspec-core (~> 2.7.0)
|
27
|
+
rspec-expectations (~> 2.7.0)
|
28
|
+
rspec-mocks (~> 2.7.0)
|
29
|
+
rspec-core (2.7.1)
|
30
|
+
rspec-expectations (2.7.0)
|
31
|
+
diff-lcs (~> 1.1.2)
|
32
|
+
rspec-mocks (2.7.0)
|
33
|
+
rubyforge (2.0.4)
|
34
|
+
json_pure (>= 1.1.7)
|
35
|
+
|
36
|
+
PLATFORMS
|
37
|
+
ruby
|
38
|
+
|
39
|
+
DEPENDENCIES
|
40
|
+
echoe (~> 4.6.3)
|
41
|
+
horseman!
|
42
|
+
rspec (~> 2.7)
|
data/Manifest
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Gemfile
|
2
|
+
Gemfile.lock
|
3
|
+
Manifest
|
4
|
+
README.rdoc
|
5
|
+
Rakefile
|
6
|
+
horseman.gemspec
|
7
|
+
lib/horseman.rb
|
8
|
+
lib/horseman/browser.rb
|
9
|
+
lib/horseman/connection.rb
|
10
|
+
lib/horseman/cookies.rb
|
11
|
+
lib/horseman/hidden_fields.rb
|
12
|
+
lib/horseman/response.rb
|
13
|
+
lib/horseman/version.rb
|
14
|
+
spec/horseman/browser_spec.rb
|
15
|
+
spec/horseman/connection_spec.rb
|
16
|
+
spec/horseman/cookies_spec.rb
|
17
|
+
spec/horseman/hidden_fields_spec.rb
|
18
|
+
spec/horseman/response_spec.rb
|
19
|
+
spec/mocks.rb
|
20
|
+
spec/spec_helper.rb
|
data/README.rdoc
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
require 'echoe'
|
4
|
+
|
5
|
+
Echoe.new("horseman", "0.0.1") do |p|
|
6
|
+
p.description = "Headless HTTP crawler/scraper for ASP.NET WebForms applications"
|
7
|
+
p.url = "http://jarrodpeace.com"
|
8
|
+
p.author = "Jarrod Peace"
|
9
|
+
p.email = "peace.jarrod@gmail.com"
|
10
|
+
p.ignore_pattern = FileList[".gitignore"]
|
11
|
+
p.development_dependencies = []
|
12
|
+
p.runtime_dependencies = ["nokogiri >=1.5.0"]
|
13
|
+
end
|
14
|
+
|
15
|
+
Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
|
16
|
+
|
17
|
+
|
18
|
+
desc "Default task - runs specs"
|
19
|
+
task :default => :spec
|
20
|
+
|
21
|
+
desc "Run specs"
|
22
|
+
RSpec::Core::RakeTask.new do |t|
|
23
|
+
t.rspec_opts = '-cfd'
|
24
|
+
end
|
data/horseman.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = "horseman"
|
5
|
+
s.version = "0.0.1"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Jarrod Peace"]
|
9
|
+
s.date = "2012-01-06"
|
10
|
+
s.description = "Headless HTTP crawler/scraper for ASP.NET WebForms applications"
|
11
|
+
s.email = "peace.jarrod@gmail.com"
|
12
|
+
s.extra_rdoc_files = ["README.rdoc", "lib/horseman.rb", "lib/horseman/browser.rb", "lib/horseman/connection.rb", "lib/horseman/cookies.rb", "lib/horseman/hidden_fields.rb", "lib/horseman/response.rb", "lib/horseman/version.rb"]
|
13
|
+
s.files = ["Gemfile", "Gemfile.lock", "Manifest", "README.rdoc", "Rakefile", "horseman.gemspec", "lib/horseman.rb", "lib/horseman/browser.rb", "lib/horseman/connection.rb", "lib/horseman/cookies.rb", "lib/horseman/hidden_fields.rb", "lib/horseman/response.rb", "lib/horseman/version.rb", "spec/horseman/browser_spec.rb", "spec/horseman/connection_spec.rb", "spec/horseman/cookies_spec.rb", "spec/horseman/hidden_fields_spec.rb", "spec/horseman/response_spec.rb", "spec/mocks.rb", "spec/spec_helper.rb"]
|
14
|
+
s.homepage = "http://jarrodpeace.com"
|
15
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Horseman", "--main", "README.rdoc"]
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
s.rubyforge_project = "horseman"
|
18
|
+
s.rubygems_version = "1.8.10"
|
19
|
+
s.summary = "Headless HTTP crawler/scraper for ASP.NET WebForms applications"
|
20
|
+
|
21
|
+
if s.respond_to? :specification_version then
|
22
|
+
s.specification_version = 3
|
23
|
+
|
24
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
25
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 1.5.0"])
|
26
|
+
else
|
27
|
+
s.add_dependency(%q<nokogiri>, [">= 1.5.0"])
|
28
|
+
end
|
29
|
+
else
|
30
|
+
s.add_dependency(%q<nokogiri>, [">= 1.5.0"])
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'horseman/response'
|
2
|
+
|
3
|
+
module Horseman
|
4
|
+
class Browser
|
5
|
+
attr_accessor :base_url
|
6
|
+
attr_reader :cookies, :connection, :last_response
|
7
|
+
|
8
|
+
def initialize(connection, base_url='')
|
9
|
+
@connection = connection
|
10
|
+
@base_url = base_url
|
11
|
+
@cookies = Horseman::Cookies.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def clear_session
|
15
|
+
@cookies.clear
|
16
|
+
end
|
17
|
+
|
18
|
+
def get!(path = '/')
|
19
|
+
request = @connection.build_request(:url => "#{@base_url}#{path}", :verb => :get)
|
20
|
+
exec(request)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def exec(request)
|
26
|
+
request['cookie'] = @cookies.to_s
|
27
|
+
response = @connection.exec_request(request)
|
28
|
+
@cookies.update(response.get_fields('set-cookie'))
|
29
|
+
@last_response = Horseman::Response.new(response.body)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'net/http'
|
3
|
+
require 'net/https'
|
4
|
+
|
5
|
+
module Horseman
|
6
|
+
class Connection
|
7
|
+
attr_reader :http
|
8
|
+
|
9
|
+
def url=(url)
|
10
|
+
@uri = URI.parse(url)
|
11
|
+
build_http
|
12
|
+
end
|
13
|
+
|
14
|
+
def exec_request(request)
|
15
|
+
@http.request(request)
|
16
|
+
end
|
17
|
+
|
18
|
+
def build_request(options={})
|
19
|
+
self.url = options[:url] unless options[:url].nil?
|
20
|
+
options[:verb] == (:get || nil) ? build_get_request : build_post_request(options[:form])
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def build_http
|
26
|
+
@http = Net::HTTP.new(@uri.host, @uri.port)
|
27
|
+
if (@uri.port == 443)
|
28
|
+
@http.use_ssl = true
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def build_get_request
|
33
|
+
return Net::HTTP::Get.new(@uri.request_uri)
|
34
|
+
end
|
35
|
+
|
36
|
+
def build_post_request(form)
|
37
|
+
ret = Net::HTTP::Post.new(@uri.request_uri)
|
38
|
+
ret.form_data = form unless form.nil?
|
39
|
+
return ret
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
module Horseman
|
2
|
+
class Cookie
|
3
|
+
attr_reader :value, :domain, :path, :expiration
|
4
|
+
|
5
|
+
def initialize(value, attributes)
|
6
|
+
@value = value
|
7
|
+
attributes.each {|a| parse_attribute(a)}
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def parse_attribute(attribute)
|
13
|
+
md = /(\w+)=(.*)/.match(attribute)
|
14
|
+
if md
|
15
|
+
case md.captures[0].downcase
|
16
|
+
when 'domain'
|
17
|
+
@domain = md.captures[1]
|
18
|
+
when 'path'
|
19
|
+
@path = md.captures[1]
|
20
|
+
when 'expires'
|
21
|
+
@expiration = DateTime.parse(md.captures[1])
|
22
|
+
when 'max-age'
|
23
|
+
@expiration = DateTime.now + (md.captures[1] / (60 * 60 * 24))
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class Cookies
|
30
|
+
def initialize
|
31
|
+
clear
|
32
|
+
end
|
33
|
+
|
34
|
+
def [](cookie_name)
|
35
|
+
return @dict[cookie_name].value unless @dict[cookie_name].nil?
|
36
|
+
end
|
37
|
+
|
38
|
+
def get(cookie_name)
|
39
|
+
return @dict[cookie_name]
|
40
|
+
end
|
41
|
+
|
42
|
+
def clear
|
43
|
+
@dict = {}
|
44
|
+
end
|
45
|
+
|
46
|
+
def count
|
47
|
+
@dict.count
|
48
|
+
end
|
49
|
+
|
50
|
+
def empty?
|
51
|
+
@dict.count == 0
|
52
|
+
end
|
53
|
+
|
54
|
+
def to_s
|
55
|
+
@dict.map {|k,v| "#{k}=#{v.value}"}.join('; ')
|
56
|
+
end
|
57
|
+
|
58
|
+
def update(header)
|
59
|
+
if header.is_a?(Array)
|
60
|
+
header.each {|h| parse_header(h)}
|
61
|
+
else
|
62
|
+
parse_header(header) unless header.nil?
|
63
|
+
end
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
|
69
|
+
def parse_header(header)
|
70
|
+
nvp, *attributes = *(header.split(';'))
|
71
|
+
raise ArgumentError if nvp.nil?
|
72
|
+
md = /(\w+)=(.*)/.match(nvp)
|
73
|
+
raise ArgumentError if md.nil?
|
74
|
+
name = md.captures[0]
|
75
|
+
value = md.captures[1]
|
76
|
+
|
77
|
+
@dict.merge!({name => Horseman::Cookie.new(value, attributes)})
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Horseman
|
2
|
+
class HiddenFields
|
3
|
+
attr_reader :tokens
|
4
|
+
|
5
|
+
def initialize(html)
|
6
|
+
rx = /<input.* type=["']hidden["'].* name=["'](\S+)["'].* value=["'](\S*)["'].* \/>/
|
7
|
+
@tokens = {}
|
8
|
+
html.scan(rx).each {|field|
|
9
|
+
@tokens[field[0]] = field[1]
|
10
|
+
}
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Horseman
|
2
|
+
class Element
|
3
|
+
attr_accessor :id, :name
|
4
|
+
end
|
5
|
+
class Form < Element
|
6
|
+
end
|
7
|
+
class FormField < Element
|
8
|
+
attr_accessor :type, :value
|
9
|
+
end
|
10
|
+
|
11
|
+
class Response
|
12
|
+
attr_reader :body, :forms
|
13
|
+
|
14
|
+
def initialize(body)
|
15
|
+
@body = body
|
16
|
+
@forms = []
|
17
|
+
parse
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def parse
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/horseman.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'horseman/version'
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'horseman/browser'
|
2
|
+
|
3
|
+
describe Horseman::Browser do
|
4
|
+
include Mocks
|
5
|
+
|
6
|
+
subject {described_class.new(connection, 'http://www.example.com')}
|
7
|
+
|
8
|
+
it "saves cookies" do
|
9
|
+
subject.cookies.should be_empty
|
10
|
+
|
11
|
+
subject.get!
|
12
|
+
subject.cookies.count.should eq 2
|
13
|
+
subject.cookies['name1'].should eq 'value1'
|
14
|
+
subject.cookies['name2'].should eq 'value2'
|
15
|
+
|
16
|
+
subject.connection.should_receive(:exec_request) do |request|
|
17
|
+
request['cookie'].should match /\w+=\w+; \w+=\w+/
|
18
|
+
request['cookie'].should match /name1=value1/
|
19
|
+
request['cookie'].should match /name2=value2/
|
20
|
+
end
|
21
|
+
subject.get!
|
22
|
+
end
|
23
|
+
|
24
|
+
it "empties the cookies when the session is cleared" do
|
25
|
+
subject.get!
|
26
|
+
subject.cookies.should_not be_empty
|
27
|
+
subject.clear_session
|
28
|
+
subject.cookies.should be_empty
|
29
|
+
end
|
30
|
+
|
31
|
+
it "stores information about the last response" do
|
32
|
+
subject.get!
|
33
|
+
subject.last_response.body.should eq html
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'horseman/connection'
|
2
|
+
require 'net/http'
|
3
|
+
|
4
|
+
describe Horseman::Connection do
|
5
|
+
subject do
|
6
|
+
c = described_class.new
|
7
|
+
c.url = 'http://www.example.com/some/path'
|
8
|
+
c
|
9
|
+
end
|
10
|
+
|
11
|
+
context "when building requests" do
|
12
|
+
let(:request) {subject.build_request(:verb => :get)}
|
13
|
+
|
14
|
+
it "uses the proper path" do
|
15
|
+
request.path.should eq '/some/path'
|
16
|
+
end
|
17
|
+
|
18
|
+
context "using GET" do
|
19
|
+
it "uses the proper request type" do
|
20
|
+
request.class.should be Net::HTTP::Get
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
context "using POST" do
|
25
|
+
let(:request) {subject.build_request(:verb => :post)}
|
26
|
+
|
27
|
+
it "uses the proper request type" do
|
28
|
+
request.class.should be Net::HTTP::Post
|
29
|
+
end
|
30
|
+
|
31
|
+
context "with form data" do
|
32
|
+
let(:request) {subject.build_request(:verb => :post, :form => {:field1=>'value1', :field2=>'value2'})}
|
33
|
+
|
34
|
+
it "properly sets request body" do
|
35
|
+
request.body.should eq 'field1=value1&field2=value2'
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context "without form data" do
|
40
|
+
it "properly sets request body" do
|
41
|
+
request.body.should be_nil
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
context "when accessed using http" do
|
48
|
+
it "does not use SSL" do
|
49
|
+
subject.http.use_ssl?.should be_false
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context "when accessed using https" do
|
54
|
+
subject do
|
55
|
+
c = described_class.new
|
56
|
+
c.url = 'https://www.example.com'
|
57
|
+
c
|
58
|
+
end
|
59
|
+
|
60
|
+
it "uses SSL" do
|
61
|
+
subject.http.use_ssl?.should be_true
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'horseman/cookies'
|
2
|
+
|
3
|
+
class Yo
|
4
|
+
def test
|
5
|
+
pp "yo"
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
describe Horseman::Cookies do
|
10
|
+
let(:simple_header) {'name1=value1'}
|
11
|
+
let(:complex_header) {'name2=value2; Domain=www.example.com; Path=/path; Expires=Sun, 1-Jan-2012 00:00:00 GMT'}
|
12
|
+
|
13
|
+
it "starts empty" do
|
14
|
+
subject.should be_empty
|
15
|
+
end
|
16
|
+
|
17
|
+
it "accepts a single header" do
|
18
|
+
subject.update(simple_header)['name1'].should eq 'value1'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "accepts multiple headers" do
|
22
|
+
subject.update([simple_header, complex_header])
|
23
|
+
subject['name1'].should eq 'value1'
|
24
|
+
subject['name2'].should eq 'value2'
|
25
|
+
end
|
26
|
+
|
27
|
+
it "captures attributes" do
|
28
|
+
subject.update(complex_header)
|
29
|
+
subject.get('name2').domain.should eq 'www.example.com'
|
30
|
+
subject.get('name2').path.should eq '/path'
|
31
|
+
subject.get('name2').expiration.should eq DateTime.new(2012, 1, 1, 0, 0, 0, 0)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "accepts an empty array" do
|
35
|
+
subject.update([]).should be_empty
|
36
|
+
end
|
37
|
+
|
38
|
+
it "accepts nil" do
|
39
|
+
subject.update(nil).should be_empty
|
40
|
+
end
|
41
|
+
|
42
|
+
it "raises an exception on an unrecognized header" do
|
43
|
+
expect {subject.update('bad header')}.to raise_error(ArgumentError)
|
44
|
+
end
|
45
|
+
|
46
|
+
it "generates a correct header" do
|
47
|
+
header = subject.update([simple_header, complex_header]).to_s
|
48
|
+
header.should match /\w+=\w+; \w+=\w+/
|
49
|
+
header.should match /name1=value1/
|
50
|
+
header.should match /name2=value2/
|
51
|
+
end
|
52
|
+
|
53
|
+
context "with prexisting values" do
|
54
|
+
subject do
|
55
|
+
described_class.new.update('name1=other_value')
|
56
|
+
end
|
57
|
+
|
58
|
+
it "returns nil for uninitialized values" do
|
59
|
+
subject['doesnt_exist'].should be_nil
|
60
|
+
end
|
61
|
+
|
62
|
+
it "merges new values" do
|
63
|
+
subject.update(complex_header)
|
64
|
+
subject['name1'].should eq 'other_value'
|
65
|
+
subject['name2'].should eq 'value2'
|
66
|
+
end
|
67
|
+
|
68
|
+
it "overwrites existing values" do
|
69
|
+
subject.update(simple_header)
|
70
|
+
subject['name1'].should eq 'value1'
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'horseman/hidden_fields'
|
2
|
+
|
3
|
+
describe Horseman::HiddenFields do
|
4
|
+
|
5
|
+
it "parses a single simple hidden input field" do
|
6
|
+
html = %{<input type="hidden" name="test" value="test_data" />}
|
7
|
+
cut = described_class.new(html)
|
8
|
+
|
9
|
+
cut.tokens.size.should == 1
|
10
|
+
cut.tokens['test'].should == 'test_data'
|
11
|
+
end
|
12
|
+
|
13
|
+
it "parses a single complex hidden input field" do
|
14
|
+
html = %{<input attr0="value0" type="hidden" attr1="value1" name="test" attr2="value2" value="test_data" attr3="value3" />}
|
15
|
+
cut = described_class.new(html)
|
16
|
+
|
17
|
+
cut.tokens.size.should == 1
|
18
|
+
cut.tokens['test'].should == 'test_data'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "parses multiple hidden input fields" do
|
22
|
+
html = %{
|
23
|
+
<input type="hidden" name="test" value="test_data" />
|
24
|
+
<input type="hidden" name="foo" value="bar" />
|
25
|
+
<some other="tag"></some>
|
26
|
+
<input type="hidden" name="dee" value="dum" />
|
27
|
+
}
|
28
|
+
cut = described_class.new(html)
|
29
|
+
|
30
|
+
cut.tokens.size.should == 3
|
31
|
+
cut.tokens['test'].should == 'test_data'
|
32
|
+
cut.tokens['foo'].should == 'bar'
|
33
|
+
cut.tokens['dee'].should == 'dum'
|
34
|
+
end
|
35
|
+
|
36
|
+
it "handles single quotes, too" do
|
37
|
+
html = %{<input type='hidden' name='test' value='test_data' />}
|
38
|
+
cut = described_class.new(html)
|
39
|
+
|
40
|
+
cut.tokens.size.should == 1
|
41
|
+
cut.tokens['test'].should == 'test_data'
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'horseman/response'
|
2
|
+
|
3
|
+
describe Horseman::Response do
|
4
|
+
include Mocks
|
5
|
+
|
6
|
+
subject { described_class.new(html) }
|
7
|
+
|
8
|
+
it "parses forms" do
|
9
|
+
subject.forms.count.should eq 2
|
10
|
+
subject.forms[0].id.should eq 'form1'
|
11
|
+
subject.forms[1].id.should eq 'form2'
|
12
|
+
end
|
13
|
+
end
|
data/spec/mocks.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'horseman/connection'
|
2
|
+
|
3
|
+
module Mocks
|
4
|
+
|
5
|
+
def html
|
6
|
+
%{
|
7
|
+
<html>
|
8
|
+
<head></head>
|
9
|
+
<body>
|
10
|
+
<form id="form1">
|
11
|
+
<input type="text" name="name1" value="value1" />
|
12
|
+
<input type="submit" value="OK" />
|
13
|
+
</form>
|
14
|
+
<form id="form2">
|
15
|
+
<input type="text" name="name2" value="value2" />
|
16
|
+
<input type="submit" value="OK" />
|
17
|
+
</form>
|
18
|
+
</body>
|
19
|
+
</html>
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
def cookies
|
24
|
+
['name1=value1; Domain=www.example.com; Path=/path; Expires=Sun, 1-Jan-2012 00:00:00 GMT',
|
25
|
+
'name2=value2; Domain=www.example.com; Path=/path; Expires=Sun, 1-Jan-2012 00:00:00 GMT']
|
26
|
+
end
|
27
|
+
|
28
|
+
def response
|
29
|
+
m = double("HttpResponse")
|
30
|
+
m.stub(:[]) do |key|
|
31
|
+
case key
|
32
|
+
when 'set-cookie'
|
33
|
+
cookies.join(', ')
|
34
|
+
end
|
35
|
+
end
|
36
|
+
m.stub(:get_fields) do |key|
|
37
|
+
case key
|
38
|
+
when 'set-cookie'
|
39
|
+
cookies
|
40
|
+
end
|
41
|
+
end
|
42
|
+
m.stub(:body) { html }
|
43
|
+
m
|
44
|
+
end
|
45
|
+
|
46
|
+
def connection
|
47
|
+
Horseman::Connection.any_instance.stub(:exec_request) { response }
|
48
|
+
Horseman::Connection.new
|
49
|
+
end
|
50
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'mocks'
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: horseman
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Jarrod Peace
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-01-06 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: &70095638639800 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.5.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70095638639800
|
25
|
+
description: Headless HTTP crawler/scraper for ASP.NET WebForms applications
|
26
|
+
email: peace.jarrod@gmail.com
|
27
|
+
executables: []
|
28
|
+
extensions: []
|
29
|
+
extra_rdoc_files:
|
30
|
+
- README.rdoc
|
31
|
+
- lib/horseman.rb
|
32
|
+
- lib/horseman/browser.rb
|
33
|
+
- lib/horseman/connection.rb
|
34
|
+
- lib/horseman/cookies.rb
|
35
|
+
- lib/horseman/hidden_fields.rb
|
36
|
+
- lib/horseman/response.rb
|
37
|
+
- lib/horseman/version.rb
|
38
|
+
files:
|
39
|
+
- Gemfile
|
40
|
+
- Gemfile.lock
|
41
|
+
- Manifest
|
42
|
+
- README.rdoc
|
43
|
+
- Rakefile
|
44
|
+
- horseman.gemspec
|
45
|
+
- lib/horseman.rb
|
46
|
+
- lib/horseman/browser.rb
|
47
|
+
- lib/horseman/connection.rb
|
48
|
+
- lib/horseman/cookies.rb
|
49
|
+
- lib/horseman/hidden_fields.rb
|
50
|
+
- lib/horseman/response.rb
|
51
|
+
- lib/horseman/version.rb
|
52
|
+
- spec/horseman/browser_spec.rb
|
53
|
+
- spec/horseman/connection_spec.rb
|
54
|
+
- spec/horseman/cookies_spec.rb
|
55
|
+
- spec/horseman/hidden_fields_spec.rb
|
56
|
+
- spec/horseman/response_spec.rb
|
57
|
+
- spec/mocks.rb
|
58
|
+
- spec/spec_helper.rb
|
59
|
+
homepage: http://jarrodpeace.com
|
60
|
+
licenses: []
|
61
|
+
post_install_message:
|
62
|
+
rdoc_options:
|
63
|
+
- --line-numbers
|
64
|
+
- --inline-source
|
65
|
+
- --title
|
66
|
+
- Horseman
|
67
|
+
- --main
|
68
|
+
- README.rdoc
|
69
|
+
require_paths:
|
70
|
+
- lib
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.2'
|
83
|
+
requirements: []
|
84
|
+
rubyforge_project: horseman
|
85
|
+
rubygems_version: 1.8.10
|
86
|
+
signing_key:
|
87
|
+
specification_version: 3
|
88
|
+
summary: Headless HTTP crawler/scraper for ASP.NET WebForms applications
|
89
|
+
test_files: []
|