horseman 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +6 -0
- data/Gemfile.lock +42 -0
- data/Manifest +20 -0
- data/README.rdoc +3 -0
- data/Rakefile +24 -0
- data/horseman.gemspec +32 -0
- data/lib/horseman/browser.rb +32 -0
- data/lib/horseman/connection.rb +42 -0
- data/lib/horseman/cookies.rb +80 -0
- data/lib/horseman/hidden_fields.rb +13 -0
- data/lib/horseman/response.rb +26 -0
- data/lib/horseman/version.rb +3 -0
- data/lib/horseman.rb +1 -0
- data/spec/horseman/browser_spec.rb +35 -0
- data/spec/horseman/connection_spec.rb +64 -0
- data/spec/horseman/cookies_spec.rb +74 -0
- data/spec/horseman/hidden_fields_spec.rb +44 -0
- data/spec/horseman/response_spec.rb +13 -0
- data/spec/mocks.rb +50 -0
- data/spec/spec_helper.rb +1 -0
- metadata +89 -0
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
horseman (0.0.1)
|
5
|
+
nokogiri (>= 1.5.0)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: http://rubygems.org/
|
9
|
+
specs:
|
10
|
+
allison (2.0.3)
|
11
|
+
diff-lcs (1.1.3)
|
12
|
+
echoe (4.6.3)
|
13
|
+
allison (>= 2.0.3)
|
14
|
+
gemcutter (>= 0.7.0)
|
15
|
+
rake (>= 0.9.2)
|
16
|
+
rdoc (>= 3.6.1)
|
17
|
+
rubyforge (>= 2.0.4)
|
18
|
+
gemcutter (0.7.0)
|
19
|
+
json (1.6.2)
|
20
|
+
json_pure (1.6.2)
|
21
|
+
nokogiri (1.5.0)
|
22
|
+
rake (0.9.2.2)
|
23
|
+
rdoc (3.11)
|
24
|
+
json (~> 1.4)
|
25
|
+
rspec (2.7.0)
|
26
|
+
rspec-core (~> 2.7.0)
|
27
|
+
rspec-expectations (~> 2.7.0)
|
28
|
+
rspec-mocks (~> 2.7.0)
|
29
|
+
rspec-core (2.7.1)
|
30
|
+
rspec-expectations (2.7.0)
|
31
|
+
diff-lcs (~> 1.1.2)
|
32
|
+
rspec-mocks (2.7.0)
|
33
|
+
rubyforge (2.0.4)
|
34
|
+
json_pure (>= 1.1.7)
|
35
|
+
|
36
|
+
PLATFORMS
|
37
|
+
ruby
|
38
|
+
|
39
|
+
DEPENDENCIES
|
40
|
+
echoe (~> 4.6.3)
|
41
|
+
horseman!
|
42
|
+
rspec (~> 2.7)
|
data/Manifest
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Gemfile
|
2
|
+
Gemfile.lock
|
3
|
+
Manifest
|
4
|
+
README.rdoc
|
5
|
+
Rakefile
|
6
|
+
horseman.gemspec
|
7
|
+
lib/horseman.rb
|
8
|
+
lib/horseman/browser.rb
|
9
|
+
lib/horseman/connection.rb
|
10
|
+
lib/horseman/cookies.rb
|
11
|
+
lib/horseman/hidden_fields.rb
|
12
|
+
lib/horseman/response.rb
|
13
|
+
lib/horseman/version.rb
|
14
|
+
spec/horseman/browser_spec.rb
|
15
|
+
spec/horseman/connection_spec.rb
|
16
|
+
spec/horseman/cookies_spec.rb
|
17
|
+
spec/horseman/hidden_fields_spec.rb
|
18
|
+
spec/horseman/response_spec.rb
|
19
|
+
spec/mocks.rb
|
20
|
+
spec/spec_helper.rb
|
data/README.rdoc
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
require 'echoe'
|
4
|
+
|
5
|
+
Echoe.new("horseman", "0.0.1") do |p|
|
6
|
+
p.description = "Headless HTTP crawler/scraper for ASP.NET WebForms applications"
|
7
|
+
p.url = "http://jarrodpeace.com"
|
8
|
+
p.author = "Jarrod Peace"
|
9
|
+
p.email = "peace.jarrod@gmail.com"
|
10
|
+
p.ignore_pattern = FileList[".gitignore"]
|
11
|
+
p.development_dependencies = []
|
12
|
+
p.runtime_dependencies = ["nokogiri >=1.5.0"]
|
13
|
+
end
|
14
|
+
|
15
|
+
Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
|
16
|
+
|
17
|
+
|
18
|
+
desc "Default task - runs specs"
|
19
|
+
task :default => :spec
|
20
|
+
|
21
|
+
desc "Run specs"
|
22
|
+
RSpec::Core::RakeTask.new do |t|
|
23
|
+
t.rspec_opts = '-cfd'
|
24
|
+
end
|
data/horseman.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = "horseman"
|
5
|
+
s.version = "0.0.1"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Jarrod Peace"]
|
9
|
+
s.date = "2012-01-06"
|
10
|
+
s.description = "Headless HTTP crawler/scraper for ASP.NET WebForms applications"
|
11
|
+
s.email = "peace.jarrod@gmail.com"
|
12
|
+
s.extra_rdoc_files = ["README.rdoc", "lib/horseman.rb", "lib/horseman/browser.rb", "lib/horseman/connection.rb", "lib/horseman/cookies.rb", "lib/horseman/hidden_fields.rb", "lib/horseman/response.rb", "lib/horseman/version.rb"]
|
13
|
+
s.files = ["Gemfile", "Gemfile.lock", "Manifest", "README.rdoc", "Rakefile", "horseman.gemspec", "lib/horseman.rb", "lib/horseman/browser.rb", "lib/horseman/connection.rb", "lib/horseman/cookies.rb", "lib/horseman/hidden_fields.rb", "lib/horseman/response.rb", "lib/horseman/version.rb", "spec/horseman/browser_spec.rb", "spec/horseman/connection_spec.rb", "spec/horseman/cookies_spec.rb", "spec/horseman/hidden_fields_spec.rb", "spec/horseman/response_spec.rb", "spec/mocks.rb", "spec/spec_helper.rb"]
|
14
|
+
s.homepage = "http://jarrodpeace.com"
|
15
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Horseman", "--main", "README.rdoc"]
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
s.rubyforge_project = "horseman"
|
18
|
+
s.rubygems_version = "1.8.10"
|
19
|
+
s.summary = "Headless HTTP crawler/scraper for ASP.NET WebForms applications"
|
20
|
+
|
21
|
+
if s.respond_to? :specification_version then
|
22
|
+
s.specification_version = 3
|
23
|
+
|
24
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
25
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 1.5.0"])
|
26
|
+
else
|
27
|
+
s.add_dependency(%q<nokogiri>, [">= 1.5.0"])
|
28
|
+
end
|
29
|
+
else
|
30
|
+
s.add_dependency(%q<nokogiri>, [">= 1.5.0"])
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'horseman/response'
|
2
|
+
|
3
|
+
module Horseman
|
4
|
+
class Browser
|
5
|
+
attr_accessor :base_url
|
6
|
+
attr_reader :cookies, :connection, :last_response
|
7
|
+
|
8
|
+
def initialize(connection, base_url='')
|
9
|
+
@connection = connection
|
10
|
+
@base_url = base_url
|
11
|
+
@cookies = Horseman::Cookies.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def clear_session
|
15
|
+
@cookies.clear
|
16
|
+
end
|
17
|
+
|
18
|
+
def get!(path = '/')
|
19
|
+
request = @connection.build_request(:url => "#{@base_url}#{path}", :verb => :get)
|
20
|
+
exec(request)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def exec(request)
|
26
|
+
request['cookie'] = @cookies.to_s
|
27
|
+
response = @connection.exec_request(request)
|
28
|
+
@cookies.update(response.get_fields('set-cookie'))
|
29
|
+
@last_response = Horseman::Response.new(response.body)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'net/http'
|
3
|
+
require 'net/https'
|
4
|
+
|
5
|
+
module Horseman
|
6
|
+
class Connection
|
7
|
+
attr_reader :http
|
8
|
+
|
9
|
+
def url=(url)
|
10
|
+
@uri = URI.parse(url)
|
11
|
+
build_http
|
12
|
+
end
|
13
|
+
|
14
|
+
def exec_request(request)
|
15
|
+
@http.request(request)
|
16
|
+
end
|
17
|
+
|
18
|
+
def build_request(options={})
|
19
|
+
self.url = options[:url] unless options[:url].nil?
|
20
|
+
options[:verb] == (:get || nil) ? build_get_request : build_post_request(options[:form])
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def build_http
|
26
|
+
@http = Net::HTTP.new(@uri.host, @uri.port)
|
27
|
+
if (@uri.port == 443)
|
28
|
+
@http.use_ssl = true
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def build_get_request
|
33
|
+
return Net::HTTP::Get.new(@uri.request_uri)
|
34
|
+
end
|
35
|
+
|
36
|
+
def build_post_request(form)
|
37
|
+
ret = Net::HTTP::Post.new(@uri.request_uri)
|
38
|
+
ret.form_data = form unless form.nil?
|
39
|
+
return ret
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
module Horseman
|
2
|
+
class Cookie
|
3
|
+
attr_reader :value, :domain, :path, :expiration
|
4
|
+
|
5
|
+
def initialize(value, attributes)
|
6
|
+
@value = value
|
7
|
+
attributes.each {|a| parse_attribute(a)}
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def parse_attribute(attribute)
|
13
|
+
md = /(\w+)=(.*)/.match(attribute)
|
14
|
+
if md
|
15
|
+
case md.captures[0].downcase
|
16
|
+
when 'domain'
|
17
|
+
@domain = md.captures[1]
|
18
|
+
when 'path'
|
19
|
+
@path = md.captures[1]
|
20
|
+
when 'expires'
|
21
|
+
@expiration = DateTime.parse(md.captures[1])
|
22
|
+
when 'max-age'
|
23
|
+
@expiration = DateTime.now + (md.captures[1] / (60 * 60 * 24))
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class Cookies
|
30
|
+
def initialize
|
31
|
+
clear
|
32
|
+
end
|
33
|
+
|
34
|
+
def [](cookie_name)
|
35
|
+
return @dict[cookie_name].value unless @dict[cookie_name].nil?
|
36
|
+
end
|
37
|
+
|
38
|
+
def get(cookie_name)
|
39
|
+
return @dict[cookie_name]
|
40
|
+
end
|
41
|
+
|
42
|
+
def clear
|
43
|
+
@dict = {}
|
44
|
+
end
|
45
|
+
|
46
|
+
def count
|
47
|
+
@dict.count
|
48
|
+
end
|
49
|
+
|
50
|
+
def empty?
|
51
|
+
@dict.count == 0
|
52
|
+
end
|
53
|
+
|
54
|
+
def to_s
|
55
|
+
@dict.map {|k,v| "#{k}=#{v.value}"}.join('; ')
|
56
|
+
end
|
57
|
+
|
58
|
+
def update(header)
|
59
|
+
if header.is_a?(Array)
|
60
|
+
header.each {|h| parse_header(h)}
|
61
|
+
else
|
62
|
+
parse_header(header) unless header.nil?
|
63
|
+
end
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
|
69
|
+
def parse_header(header)
|
70
|
+
nvp, *attributes = *(header.split(';'))
|
71
|
+
raise ArgumentError if nvp.nil?
|
72
|
+
md = /(\w+)=(.*)/.match(nvp)
|
73
|
+
raise ArgumentError if md.nil?
|
74
|
+
name = md.captures[0]
|
75
|
+
value = md.captures[1]
|
76
|
+
|
77
|
+
@dict.merge!({name => Horseman::Cookie.new(value, attributes)})
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Horseman
|
2
|
+
class HiddenFields
|
3
|
+
attr_reader :tokens
|
4
|
+
|
5
|
+
def initialize(html)
|
6
|
+
rx = /<input.* type=["']hidden["'].* name=["'](\S+)["'].* value=["'](\S*)["'].* \/>/
|
7
|
+
@tokens = {}
|
8
|
+
html.scan(rx).each {|field|
|
9
|
+
@tokens[field[0]] = field[1]
|
10
|
+
}
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Horseman
|
2
|
+
class Element
|
3
|
+
attr_accessor :id, :name
|
4
|
+
end
|
5
|
+
class Form < Element
|
6
|
+
end
|
7
|
+
class FormField < Element
|
8
|
+
attr_accessor :type, :value
|
9
|
+
end
|
10
|
+
|
11
|
+
class Response
|
12
|
+
attr_reader :body, :forms
|
13
|
+
|
14
|
+
def initialize(body)
|
15
|
+
@body = body
|
16
|
+
@forms = []
|
17
|
+
parse
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def parse
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/horseman.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'horseman/version'
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'horseman/browser'
|
2
|
+
|
3
|
+
describe Horseman::Browser do
|
4
|
+
include Mocks
|
5
|
+
|
6
|
+
subject {described_class.new(connection, 'http://www.example.com')}
|
7
|
+
|
8
|
+
it "saves cookies" do
|
9
|
+
subject.cookies.should be_empty
|
10
|
+
|
11
|
+
subject.get!
|
12
|
+
subject.cookies.count.should eq 2
|
13
|
+
subject.cookies['name1'].should eq 'value1'
|
14
|
+
subject.cookies['name2'].should eq 'value2'
|
15
|
+
|
16
|
+
subject.connection.should_receive(:exec_request) do |request|
|
17
|
+
request['cookie'].should match /\w+=\w+; \w+=\w+/
|
18
|
+
request['cookie'].should match /name1=value1/
|
19
|
+
request['cookie'].should match /name2=value2/
|
20
|
+
end
|
21
|
+
subject.get!
|
22
|
+
end
|
23
|
+
|
24
|
+
it "empties the cookies when the session is cleared" do
|
25
|
+
subject.get!
|
26
|
+
subject.cookies.should_not be_empty
|
27
|
+
subject.clear_session
|
28
|
+
subject.cookies.should be_empty
|
29
|
+
end
|
30
|
+
|
31
|
+
it "stores information about the last response" do
|
32
|
+
subject.get!
|
33
|
+
subject.last_response.body.should eq html
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'horseman/connection'
|
2
|
+
require 'net/http'
|
3
|
+
|
4
|
+
describe Horseman::Connection do
|
5
|
+
subject do
|
6
|
+
c = described_class.new
|
7
|
+
c.url = 'http://www.example.com/some/path'
|
8
|
+
c
|
9
|
+
end
|
10
|
+
|
11
|
+
context "when building requests" do
|
12
|
+
let(:request) {subject.build_request(:verb => :get)}
|
13
|
+
|
14
|
+
it "uses the proper path" do
|
15
|
+
request.path.should eq '/some/path'
|
16
|
+
end
|
17
|
+
|
18
|
+
context "using GET" do
|
19
|
+
it "uses the proper request type" do
|
20
|
+
request.class.should be Net::HTTP::Get
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
context "using POST" do
|
25
|
+
let(:request) {subject.build_request(:verb => :post)}
|
26
|
+
|
27
|
+
it "uses the proper request type" do
|
28
|
+
request.class.should be Net::HTTP::Post
|
29
|
+
end
|
30
|
+
|
31
|
+
context "with form data" do
|
32
|
+
let(:request) {subject.build_request(:verb => :post, :form => {:field1=>'value1', :field2=>'value2'})}
|
33
|
+
|
34
|
+
it "properly sets request body" do
|
35
|
+
request.body.should eq 'field1=value1&field2=value2'
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context "without form data" do
|
40
|
+
it "properly sets request body" do
|
41
|
+
request.body.should be_nil
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
context "when accessed using http" do
|
48
|
+
it "does not use SSL" do
|
49
|
+
subject.http.use_ssl?.should be_false
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context "when accessed using https" do
|
54
|
+
subject do
|
55
|
+
c = described_class.new
|
56
|
+
c.url = 'https://www.example.com'
|
57
|
+
c
|
58
|
+
end
|
59
|
+
|
60
|
+
it "uses SSL" do
|
61
|
+
subject.http.use_ssl?.should be_true
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'horseman/cookies'
|
2
|
+
|
3
|
+
class Yo
|
4
|
+
def test
|
5
|
+
pp "yo"
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
describe Horseman::Cookies do
|
10
|
+
let(:simple_header) {'name1=value1'}
|
11
|
+
let(:complex_header) {'name2=value2; Domain=www.example.com; Path=/path; Expires=Sun, 1-Jan-2012 00:00:00 GMT'}
|
12
|
+
|
13
|
+
it "starts empty" do
|
14
|
+
subject.should be_empty
|
15
|
+
end
|
16
|
+
|
17
|
+
it "accepts a single header" do
|
18
|
+
subject.update(simple_header)['name1'].should eq 'value1'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "accepts multiple headers" do
|
22
|
+
subject.update([simple_header, complex_header])
|
23
|
+
subject['name1'].should eq 'value1'
|
24
|
+
subject['name2'].should eq 'value2'
|
25
|
+
end
|
26
|
+
|
27
|
+
it "captures attributes" do
|
28
|
+
subject.update(complex_header)
|
29
|
+
subject.get('name2').domain.should eq 'www.example.com'
|
30
|
+
subject.get('name2').path.should eq '/path'
|
31
|
+
subject.get('name2').expiration.should eq DateTime.new(2012, 1, 1, 0, 0, 0, 0)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "accepts an empty array" do
|
35
|
+
subject.update([]).should be_empty
|
36
|
+
end
|
37
|
+
|
38
|
+
it "accepts nil" do
|
39
|
+
subject.update(nil).should be_empty
|
40
|
+
end
|
41
|
+
|
42
|
+
it "raises an exception on an unrecognized header" do
|
43
|
+
expect {subject.update('bad header')}.to raise_error(ArgumentError)
|
44
|
+
end
|
45
|
+
|
46
|
+
it "generates a correct header" do
|
47
|
+
header = subject.update([simple_header, complex_header]).to_s
|
48
|
+
header.should match /\w+=\w+; \w+=\w+/
|
49
|
+
header.should match /name1=value1/
|
50
|
+
header.should match /name2=value2/
|
51
|
+
end
|
52
|
+
|
53
|
+
context "with prexisting values" do
|
54
|
+
subject do
|
55
|
+
described_class.new.update('name1=other_value')
|
56
|
+
end
|
57
|
+
|
58
|
+
it "returns nil for uninitialized values" do
|
59
|
+
subject['doesnt_exist'].should be_nil
|
60
|
+
end
|
61
|
+
|
62
|
+
it "merges new values" do
|
63
|
+
subject.update(complex_header)
|
64
|
+
subject['name1'].should eq 'other_value'
|
65
|
+
subject['name2'].should eq 'value2'
|
66
|
+
end
|
67
|
+
|
68
|
+
it "overwrites existing values" do
|
69
|
+
subject.update(simple_header)
|
70
|
+
subject['name1'].should eq 'value1'
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'horseman/hidden_fields'
|
2
|
+
|
3
|
+
describe Horseman::HiddenFields do
|
4
|
+
|
5
|
+
it "parses a single simple hidden input field" do
|
6
|
+
html = %{<input type="hidden" name="test" value="test_data" />}
|
7
|
+
cut = described_class.new(html)
|
8
|
+
|
9
|
+
cut.tokens.size.should == 1
|
10
|
+
cut.tokens['test'].should == 'test_data'
|
11
|
+
end
|
12
|
+
|
13
|
+
it "parses a single complex hidden input field" do
|
14
|
+
html = %{<input attr0="value0" type="hidden" attr1="value1" name="test" attr2="value2" value="test_data" attr3="value3" />}
|
15
|
+
cut = described_class.new(html)
|
16
|
+
|
17
|
+
cut.tokens.size.should == 1
|
18
|
+
cut.tokens['test'].should == 'test_data'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "parses multiple hidden input fields" do
|
22
|
+
html = %{
|
23
|
+
<input type="hidden" name="test" value="test_data" />
|
24
|
+
<input type="hidden" name="foo" value="bar" />
|
25
|
+
<some other="tag"></some>
|
26
|
+
<input type="hidden" name="dee" value="dum" />
|
27
|
+
}
|
28
|
+
cut = described_class.new(html)
|
29
|
+
|
30
|
+
cut.tokens.size.should == 3
|
31
|
+
cut.tokens['test'].should == 'test_data'
|
32
|
+
cut.tokens['foo'].should == 'bar'
|
33
|
+
cut.tokens['dee'].should == 'dum'
|
34
|
+
end
|
35
|
+
|
36
|
+
it "handles single quotes, too" do
|
37
|
+
html = %{<input type='hidden' name='test' value='test_data' />}
|
38
|
+
cut = described_class.new(html)
|
39
|
+
|
40
|
+
cut.tokens.size.should == 1
|
41
|
+
cut.tokens['test'].should == 'test_data'
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'horseman/response'
|
2
|
+
|
3
|
+
describe Horseman::Response do
|
4
|
+
include Mocks
|
5
|
+
|
6
|
+
subject { described_class.new(html) }
|
7
|
+
|
8
|
+
it "parses forms" do
|
9
|
+
subject.forms.count.should eq 2
|
10
|
+
subject.forms[0].id.should eq 'form1'
|
11
|
+
subject.forms[1].id.should eq 'form2'
|
12
|
+
end
|
13
|
+
end
|
data/spec/mocks.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'horseman/connection'
|
2
|
+
|
3
|
+
module Mocks
|
4
|
+
|
5
|
+
def html
|
6
|
+
%{
|
7
|
+
<html>
|
8
|
+
<head></head>
|
9
|
+
<body>
|
10
|
+
<form id="form1">
|
11
|
+
<input type="text" name="name1" value="value1" />
|
12
|
+
<input type="submit" value="OK" />
|
13
|
+
</form>
|
14
|
+
<form id="form2">
|
15
|
+
<input type="text" name="name2" value="value2" />
|
16
|
+
<input type="submit" value="OK" />
|
17
|
+
</form>
|
18
|
+
</body>
|
19
|
+
</html>
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
def cookies
|
24
|
+
['name1=value1; Domain=www.example.com; Path=/path; Expires=Sun, 1-Jan-2012 00:00:00 GMT',
|
25
|
+
'name2=value2; Domain=www.example.com; Path=/path; Expires=Sun, 1-Jan-2012 00:00:00 GMT']
|
26
|
+
end
|
27
|
+
|
28
|
+
def response
|
29
|
+
m = double("HttpResponse")
|
30
|
+
m.stub(:[]) do |key|
|
31
|
+
case key
|
32
|
+
when 'set-cookie'
|
33
|
+
cookies.join(', ')
|
34
|
+
end
|
35
|
+
end
|
36
|
+
m.stub(:get_fields) do |key|
|
37
|
+
case key
|
38
|
+
when 'set-cookie'
|
39
|
+
cookies
|
40
|
+
end
|
41
|
+
end
|
42
|
+
m.stub(:body) { html }
|
43
|
+
m
|
44
|
+
end
|
45
|
+
|
46
|
+
def connection
|
47
|
+
Horseman::Connection.any_instance.stub(:exec_request) { response }
|
48
|
+
Horseman::Connection.new
|
49
|
+
end
|
50
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'mocks'
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: horseman
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Jarrod Peace
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-01-06 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: &70095638639800 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.5.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70095638639800
|
25
|
+
description: Headless HTTP crawler/scraper for ASP.NET WebForms applications
|
26
|
+
email: peace.jarrod@gmail.com
|
27
|
+
executables: []
|
28
|
+
extensions: []
|
29
|
+
extra_rdoc_files:
|
30
|
+
- README.rdoc
|
31
|
+
- lib/horseman.rb
|
32
|
+
- lib/horseman/browser.rb
|
33
|
+
- lib/horseman/connection.rb
|
34
|
+
- lib/horseman/cookies.rb
|
35
|
+
- lib/horseman/hidden_fields.rb
|
36
|
+
- lib/horseman/response.rb
|
37
|
+
- lib/horseman/version.rb
|
38
|
+
files:
|
39
|
+
- Gemfile
|
40
|
+
- Gemfile.lock
|
41
|
+
- Manifest
|
42
|
+
- README.rdoc
|
43
|
+
- Rakefile
|
44
|
+
- horseman.gemspec
|
45
|
+
- lib/horseman.rb
|
46
|
+
- lib/horseman/browser.rb
|
47
|
+
- lib/horseman/connection.rb
|
48
|
+
- lib/horseman/cookies.rb
|
49
|
+
- lib/horseman/hidden_fields.rb
|
50
|
+
- lib/horseman/response.rb
|
51
|
+
- lib/horseman/version.rb
|
52
|
+
- spec/horseman/browser_spec.rb
|
53
|
+
- spec/horseman/connection_spec.rb
|
54
|
+
- spec/horseman/cookies_spec.rb
|
55
|
+
- spec/horseman/hidden_fields_spec.rb
|
56
|
+
- spec/horseman/response_spec.rb
|
57
|
+
- spec/mocks.rb
|
58
|
+
- spec/spec_helper.rb
|
59
|
+
homepage: http://jarrodpeace.com
|
60
|
+
licenses: []
|
61
|
+
post_install_message:
|
62
|
+
rdoc_options:
|
63
|
+
- --line-numbers
|
64
|
+
- --inline-source
|
65
|
+
- --title
|
66
|
+
- Horseman
|
67
|
+
- --main
|
68
|
+
- README.rdoc
|
69
|
+
require_paths:
|
70
|
+
- lib
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.2'
|
83
|
+
requirements: []
|
84
|
+
rubyforge_project: horseman
|
85
|
+
rubygems_version: 1.8.10
|
86
|
+
signing_key:
|
87
|
+
specification_version: 3
|
88
|
+
summary: Headless HTTP crawler/scraper for ASP.NET WebForms applications
|
89
|
+
test_files: []
|