ronin-web 0.3.0.pre2 → 0.3.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog.md +3 -0
- data/Gemfile +1 -1
- data/README.md +2 -1
- data/Rakefile +4 -4
- data/bin/ronin-web +2 -2
- data/data/ronin/web/user_agents.yml +247 -0
- data/gemspec.yml +3 -6
- data/lib/ronin/network/mixins/web.rb +3 -1
- data/lib/ronin/web/config.rb +34 -0
- data/lib/ronin/web/mechanize.rb +81 -0
- data/lib/ronin/web/spider.rb +7 -2
- data/lib/ronin/web/user_agents.rb +196 -0
- data/lib/ronin/web/version.rb +1 -1
- data/lib/ronin/web/web.rb +61 -74
- data/ronin-web.gemspec +129 -13
- data/spec/web/helpers/rack_app.rb +1 -8
- data/spec/web/mechanize_spec.rb +62 -0
- data/spec/web/user_agents_spec.rb +56 -0
- data/spec/web/web_spec.rb +2 -58
- metadata +14 -6
data/ronin-web.gemspec
CHANGED
@@ -1,15 +1,131 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
Gem::Specification.new do |gemspec|
|
6
|
+
root = File.dirname(__FILE__)
|
7
|
+
lib_dir = File.join(root,'lib')
|
8
|
+
files = if File.directory?('.git')
|
9
|
+
`git ls-files`.split($/)
|
10
|
+
elsif File.directory?('.hg')
|
11
|
+
`hg manifest`.split($/)
|
12
|
+
elsif File.directory?('.svn')
|
13
|
+
`svn ls -R`.split($/).select { |path| File.file?(path) }
|
14
|
+
else
|
15
|
+
Dir['{**/}{.*,*}'].select { |path| File.file?(path) }
|
16
|
+
end
|
17
|
+
|
18
|
+
filter_files = lambda { |paths|
|
19
|
+
case paths
|
20
|
+
when Array
|
21
|
+
(files & paths)
|
22
|
+
when String
|
23
|
+
(files & Dir[paths])
|
24
|
+
end
|
25
|
+
}
|
26
|
+
|
27
|
+
version = {
|
28
|
+
:file => 'ronin/web/version',
|
29
|
+
:constant => 'Ronin::Web::VERSION'
|
30
|
+
}
|
31
|
+
|
32
|
+
defaults = {
|
33
|
+
'name' => File.basename(root),
|
34
|
+
'files' => files,
|
35
|
+
'executables' => filter_files['bin/*'].map { |path| File.basename(path) },
|
36
|
+
'test_files' => filter_files['{test/{**/}*_test.rb,spec/{**/}*_spec.rb}'],
|
37
|
+
'extra_doc_files' => filter_files['*.{txt,rdoc,md,markdown,tt,textile}'],
|
38
|
+
}
|
39
|
+
|
40
|
+
metadata = defaults.merge(YAML.load_file('gemspec.yml'))
|
41
|
+
|
42
|
+
gemspec.name = metadata.fetch('name',defaults[:name])
|
43
|
+
gemspec.version = if metadata['version']
|
44
|
+
metadata['version']
|
45
|
+
else
|
46
|
+
$LOAD_PATH << lib_dir unless $LOAD_PATH.include?(lib_dir)
|
47
|
+
|
48
|
+
require version[:file]
|
49
|
+
eval(version[:constant])
|
50
|
+
end
|
51
|
+
|
52
|
+
gemspec.summary = metadata.fetch('summary',metadata['description'])
|
53
|
+
gemspec.description = metadata.fetch('description',metadata['summary'])
|
54
|
+
|
55
|
+
case metadata['license']
|
56
|
+
when Array
|
57
|
+
gemspec.licenses = metadata['license']
|
58
|
+
when String
|
59
|
+
gemspec.license = metadata['license']
|
60
|
+
end
|
61
|
+
|
62
|
+
case metadata['authors']
|
63
|
+
when Array
|
64
|
+
gemspec.authors = metadata['authors']
|
65
|
+
when String
|
66
|
+
gemspec.author = metadata['authors']
|
67
|
+
end
|
68
|
+
|
69
|
+
gemspec.email = metadata['email']
|
70
|
+
gemspec.homepage = metadata['homepage']
|
71
|
+
|
72
|
+
case metadata['require_paths']
|
73
|
+
when Array
|
74
|
+
gemspec.require_paths = metadata['require_paths']
|
75
|
+
when String
|
76
|
+
gemspec.require_path = metadata['require_paths']
|
77
|
+
end
|
78
|
+
|
79
|
+
gemspec.files = filter_files[metadata['files']]
|
80
|
+
|
81
|
+
gemspec.executables = metadata['executables']
|
82
|
+
gemspec.extensions = metadata['extensions']
|
83
|
+
|
84
|
+
if Gem::VERSION < '1.7.'
|
85
|
+
gemspec.default_executable = gemspec.executables.first
|
86
|
+
end
|
87
|
+
|
88
|
+
gemspec.test_files = filter_files[metadata['test_files']]
|
89
|
+
|
90
|
+
unless gemspec.files.include?('.document')
|
91
|
+
gemspec.extra_rdoc_files = metadata['extra_doc_files']
|
92
|
+
end
|
93
|
+
|
94
|
+
gemspec.post_install_message = metadata['post_install_message']
|
95
|
+
gemspec.requirements = metadata['requirements']
|
96
|
+
|
97
|
+
if gemspec.respond_to?(:required_ruby_version=)
|
98
|
+
gemspec.required_ruby_version = metadata['required_ruby_version']
|
99
|
+
end
|
100
|
+
|
101
|
+
if gemspec.respond_to?(:required_rubygems_version=)
|
102
|
+
gemspec.required_rubygems_version = metadata['required_ruby_version']
|
103
|
+
end
|
104
|
+
|
105
|
+
parse_versions = lambda { |versions|
|
106
|
+
case versions
|
107
|
+
when Array
|
108
|
+
versions.map { |v| v.to_s }
|
109
|
+
when String
|
110
|
+
versions.split(/,\s*/)
|
111
|
+
end
|
112
|
+
}
|
113
|
+
|
114
|
+
if metadata['dependencies']
|
115
|
+
metadata['dependencies'].each do |name,versions|
|
116
|
+
gemspec.add_dependency(name,parse_versions[versions])
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
if metadata['runtime_dependencies']
|
121
|
+
metadata['runtime_dependencies'].each do |name,versions|
|
122
|
+
gemspec.add_runtime_dependency(name,parse_versions[versions])
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
if metadata['development_dependencies']
|
127
|
+
metadata['development_dependencies'].each do |name,versions|
|
128
|
+
gemspec.add_development_dependency(name,parse_versions[versions])
|
129
|
+
end
|
14
130
|
end
|
15
131
|
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'ronin/web/mechanize'
|
3
|
+
|
4
|
+
describe Web::Mechanize do
|
5
|
+
describe "#initialize" do
|
6
|
+
describe ":user_agent" do
|
7
|
+
before(:all) do
|
8
|
+
Web.user_agent = 'test'
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should default to Web.user_agent" do
|
12
|
+
described_class.new.user_agent.should == 'test'
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should support using a custom User-Agent string" do
|
16
|
+
agent = described_class.new(:user_agent => 'test2')
|
17
|
+
|
18
|
+
agent.user_agent.should == 'test2'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should support using a custom User-Agent alias" do
|
22
|
+
agent = described_class.new(:user_agent_alias => 'iPhone')
|
23
|
+
|
24
|
+
agent.user_agent.should == "Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3"
|
25
|
+
end
|
26
|
+
|
27
|
+
after(:all) do
|
28
|
+
Web.user_agent = nil
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe ":proxy" do
|
33
|
+
let(:host) { '127.0.0.1' }
|
34
|
+
let(:port) { 8080 }
|
35
|
+
let(:proxy) {
|
36
|
+
Network::HTTP::Proxy.new(:host => host, :port => port)
|
37
|
+
}
|
38
|
+
|
39
|
+
before(:all) do
|
40
|
+
Web.proxy = {:host => 'www.example.com', :port => port}
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should default to Web.proxy" do
|
44
|
+
agent = described_class.new
|
45
|
+
|
46
|
+
agent.proxy_addr.should == Web.proxy.host
|
47
|
+
agent.proxy_port.should == Web.proxy.port
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should support using custom proxies" do
|
51
|
+
agent = described_class.new(:proxy => proxy)
|
52
|
+
|
53
|
+
agent.proxy_addr.should == host
|
54
|
+
agent.proxy_port.should == port
|
55
|
+
end
|
56
|
+
|
57
|
+
after(:all) do
|
58
|
+
Web.proxy = nil
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'ronin/web/user_agents'
|
3
|
+
|
4
|
+
describe Web::UserAgents do
|
5
|
+
it "should list the categories of User-Agent strings" do
|
6
|
+
subject.categories.should_not be_empty
|
7
|
+
end
|
8
|
+
|
9
|
+
describe "#[]" do
|
10
|
+
context "with Symbol" do
|
11
|
+
it "should select User-Agent strings by group name" do
|
12
|
+
subject[:ie].should_not be_nil
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should return nil if the group exists" do
|
16
|
+
subject[:foobarbaz].should be_nil
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
context "with String" do
|
21
|
+
it "should select User-Agent strings by substring" do
|
22
|
+
subject['MSIE'].should_not be_nil
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should return nil if no User-Agent matches the substring" do
|
26
|
+
subject['FooBarBaz'].should be_nil
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
context "with Regexp" do
|
31
|
+
it "should select User-Agent strings by Regexp" do
|
32
|
+
subject[/AppleWebKit/i].should_not be_nil
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should return nil if no User-Agent matches the Regexp" do
|
36
|
+
subject[/FooBarBaz/i].should be_nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "#fetch" do
|
42
|
+
it "should fetch a User-Agent string" do
|
43
|
+
subject.fetch(:ie).should_not be_nil
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should raise an ArgumentError if no match was found" do
|
47
|
+
lambda {
|
48
|
+
subject.fetch(:foobarbaz)
|
49
|
+
}.should raise_error(ArgumentError)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should return the default value if no match was found" do
|
53
|
+
subject.fetch(:foobarbaz,'default').should == 'default'
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/spec/web/web_spec.rb
CHANGED
@@ -81,64 +81,8 @@ describe Web do
|
|
81
81
|
end
|
82
82
|
|
83
83
|
describe "agent" do
|
84
|
-
it "should
|
85
|
-
Web.agent.
|
86
|
-
end
|
87
|
-
|
88
|
-
describe ":user_agent" do
|
89
|
-
before(:all) do
|
90
|
-
Web.user_agent = 'test'
|
91
|
-
end
|
92
|
-
|
93
|
-
it "should default to Web.user_agent" do
|
94
|
-
Web.agent.user_agent.should == 'test'
|
95
|
-
end
|
96
|
-
|
97
|
-
it "should support using a custom User-Agent string" do
|
98
|
-
agent = Web.agent(:user_agent => 'test2')
|
99
|
-
|
100
|
-
agent.user_agent.should == 'test2'
|
101
|
-
end
|
102
|
-
|
103
|
-
it "should support using a custom User-Agent alias" do
|
104
|
-
agent = Web.agent(:user_agent_alias => 'iPhone')
|
105
|
-
|
106
|
-
agent.user_agent.should == "Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3"
|
107
|
-
end
|
108
|
-
|
109
|
-
after(:all) do
|
110
|
-
Web.user_agent = nil
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
describe ":proxy" do
|
115
|
-
let(:host) { '127.0.0.1' }
|
116
|
-
let(:port) { 8080 }
|
117
|
-
|
118
|
-
before(:all) do
|
119
|
-
Web.proxy = {:host => 'www.example.com', :port => port}
|
120
|
-
end
|
121
|
-
|
122
|
-
it "should default to Web.proxy" do
|
123
|
-
agent = Web.agent
|
124
|
-
|
125
|
-
agent.proxy_addr.should == Web.proxy.host
|
126
|
-
agent.proxy_port.should == Web.proxy.port
|
127
|
-
end
|
128
|
-
|
129
|
-
it "should support using custom proxies" do
|
130
|
-
agent = Web.agent(:proxy => Network::HTTP::Proxy.new(
|
131
|
-
:host => host,
|
132
|
-
:port => port
|
133
|
-
))
|
134
|
-
|
135
|
-
agent.proxy_addr.should == host
|
136
|
-
agent.proxy_port.should == port
|
137
|
-
end
|
138
|
-
|
139
|
-
after(:all) do
|
140
|
-
Web.proxy = nil
|
141
|
-
end
|
84
|
+
it "should be persistent" do
|
85
|
+
Web.agent.object_id.should == Web.agent.object_id
|
142
86
|
end
|
143
87
|
end
|
144
88
|
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: ronin-web
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease: 6
|
5
|
-
version: 0.3.0.
|
5
|
+
version: 0.3.0.rc1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Postmodern
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
13
|
+
date: 2011-07-08 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: nokogiri
|
@@ -74,7 +74,7 @@ dependencies:
|
|
74
74
|
requirements:
|
75
75
|
- - ~>
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version: 0.3
|
77
|
+
version: "0.3"
|
78
78
|
type: :runtime
|
79
79
|
prerelease: false
|
80
80
|
version_requirements: *id006
|
@@ -85,7 +85,7 @@ dependencies:
|
|
85
85
|
requirements:
|
86
86
|
- - ~>
|
87
87
|
- !ruby/object:Gem::Version
|
88
|
-
version: 0.2
|
88
|
+
version: "0.2"
|
89
89
|
type: :runtime
|
90
90
|
prerelease: false
|
91
91
|
version_requirements: *id007
|
@@ -96,7 +96,7 @@ dependencies:
|
|
96
96
|
requirements:
|
97
97
|
- - ~>
|
98
98
|
- !ruby/object:Gem::Version
|
99
|
-
version: 1.1
|
99
|
+
version: "1.1"
|
100
100
|
type: :runtime
|
101
101
|
prerelease: false
|
102
102
|
version_requirements: *id008
|
@@ -144,9 +144,11 @@ files:
|
|
144
144
|
- README.md
|
145
145
|
- Rakefile
|
146
146
|
- bin/ronin-web
|
147
|
+
- data/ronin/web/user_agents.yml
|
147
148
|
- gemspec.yml
|
148
149
|
- lib/ronin/network/mixins/web.rb
|
149
150
|
- lib/ronin/web.rb
|
151
|
+
- lib/ronin/web/config.rb
|
150
152
|
- lib/ronin/web/extensions.rb
|
151
153
|
- lib/ronin/web/extensions/nokogiri.rb
|
152
154
|
- lib/ronin/web/extensions/nokogiri/xml.rb
|
@@ -155,6 +157,7 @@ files:
|
|
155
157
|
- lib/ronin/web/extensions/nokogiri/xml/element.rb
|
156
158
|
- lib/ronin/web/extensions/nokogiri/xml/node.rb
|
157
159
|
- lib/ronin/web/extensions/nokogiri/xml/text.rb
|
160
|
+
- lib/ronin/web/mechanize.rb
|
158
161
|
- lib/ronin/web/middleware.rb
|
159
162
|
- lib/ronin/web/middleware/base.rb
|
160
163
|
- lib/ronin/web/middleware/directories.rb
|
@@ -182,6 +185,7 @@ files:
|
|
182
185
|
- lib/ronin/web/server/base.rb
|
183
186
|
- lib/ronin/web/server/web.rb
|
184
187
|
- lib/ronin/web/spider.rb
|
188
|
+
- lib/ronin/web/user_agents.rb
|
185
189
|
- lib/ronin/web/version.rb
|
186
190
|
- lib/ronin/web/web.rb
|
187
191
|
- ronin-web.gemspec
|
@@ -197,6 +201,7 @@ files:
|
|
197
201
|
- spec/web/helpers/root/test2/test2.txt
|
198
202
|
- spec/web/helpers/root/test3.txt
|
199
203
|
- spec/web/helpers/root/test3/test3.txt
|
204
|
+
- spec/web/mechanize_spec.rb
|
200
205
|
- spec/web/middleware/directories_spec.rb
|
201
206
|
- spec/web/middleware/files_spec.rb
|
202
207
|
- spec/web/middleware/filters/campaign_filter_spec.rb
|
@@ -215,6 +220,7 @@ files:
|
|
215
220
|
- spec/web/server/classes/public2/static2.txt
|
216
221
|
- spec/web/server/classes/sub_app.rb
|
217
222
|
- spec/web/server/classes/test_app.rb
|
223
|
+
- spec/web/user_agents_spec.rb
|
218
224
|
- spec/web/web_spec.rb
|
219
225
|
homepage: http://github.com/ronin-ruby/ronin-web
|
220
226
|
licenses:
|
@@ -239,7 +245,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
239
245
|
requirements: []
|
240
246
|
|
241
247
|
rubyforge_project: ronin-web
|
242
|
-
rubygems_version: 1.8.
|
248
|
+
rubygems_version: 1.8.5
|
243
249
|
signing_key:
|
244
250
|
specification_version: 3
|
245
251
|
summary: A Ruby library for Ronin that provides support for web scraping and spidering functionality.
|
@@ -259,4 +265,6 @@ test_files:
|
|
259
265
|
- spec/web/middleware/response_spec.rb
|
260
266
|
- spec/web/middleware/proxy_spec.rb
|
261
267
|
- spec/web/extensions/nokogiri_spec.rb
|
268
|
+
- spec/web/user_agents_spec.rb
|
269
|
+
- spec/web/mechanize_spec.rb
|
262
270
|
- spec/web/web_spec.rb
|