ronin-web 0.3.0.pre2 → 0.3.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog.md +3 -0
- data/Gemfile +1 -1
- data/README.md +2 -1
- data/Rakefile +4 -4
- data/bin/ronin-web +2 -2
- data/data/ronin/web/user_agents.yml +247 -0
- data/gemspec.yml +3 -6
- data/lib/ronin/network/mixins/web.rb +3 -1
- data/lib/ronin/web/config.rb +34 -0
- data/lib/ronin/web/mechanize.rb +81 -0
- data/lib/ronin/web/spider.rb +7 -2
- data/lib/ronin/web/user_agents.rb +196 -0
- data/lib/ronin/web/version.rb +1 -1
- data/lib/ronin/web/web.rb +61 -74
- data/ronin-web.gemspec +129 -13
- data/spec/web/helpers/rack_app.rb +1 -8
- data/spec/web/mechanize_spec.rb +62 -0
- data/spec/web/user_agents_spec.rb +56 -0
- data/spec/web/web_spec.rb +2 -58
- metadata +14 -6
data/ronin-web.gemspec
CHANGED
@@ -1,15 +1,131 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
Gem::Specification.new do |gemspec|
|
6
|
+
root = File.dirname(__FILE__)
|
7
|
+
lib_dir = File.join(root,'lib')
|
8
|
+
files = if File.directory?('.git')
|
9
|
+
`git ls-files`.split($/)
|
10
|
+
elsif File.directory?('.hg')
|
11
|
+
`hg manifest`.split($/)
|
12
|
+
elsif File.directory?('.svn')
|
13
|
+
`svn ls -R`.split($/).select { |path| File.file?(path) }
|
14
|
+
else
|
15
|
+
Dir['{**/}{.*,*}'].select { |path| File.file?(path) }
|
16
|
+
end
|
17
|
+
|
18
|
+
filter_files = lambda { |paths|
|
19
|
+
case paths
|
20
|
+
when Array
|
21
|
+
(files & paths)
|
22
|
+
when String
|
23
|
+
(files & Dir[paths])
|
24
|
+
end
|
25
|
+
}
|
26
|
+
|
27
|
+
version = {
|
28
|
+
:file => 'ronin/web/version',
|
29
|
+
:constant => 'Ronin::Web::VERSION'
|
30
|
+
}
|
31
|
+
|
32
|
+
defaults = {
|
33
|
+
'name' => File.basename(root),
|
34
|
+
'files' => files,
|
35
|
+
'executables' => filter_files['bin/*'].map { |path| File.basename(path) },
|
36
|
+
'test_files' => filter_files['{test/{**/}*_test.rb,spec/{**/}*_spec.rb}'],
|
37
|
+
'extra_doc_files' => filter_files['*.{txt,rdoc,md,markdown,tt,textile}'],
|
38
|
+
}
|
39
|
+
|
40
|
+
metadata = defaults.merge(YAML.load_file('gemspec.yml'))
|
41
|
+
|
42
|
+
gemspec.name = metadata.fetch('name',defaults[:name])
|
43
|
+
gemspec.version = if metadata['version']
|
44
|
+
metadata['version']
|
45
|
+
else
|
46
|
+
$LOAD_PATH << lib_dir unless $LOAD_PATH.include?(lib_dir)
|
47
|
+
|
48
|
+
require version[:file]
|
49
|
+
eval(version[:constant])
|
50
|
+
end
|
51
|
+
|
52
|
+
gemspec.summary = metadata.fetch('summary',metadata['description'])
|
53
|
+
gemspec.description = metadata.fetch('description',metadata['summary'])
|
54
|
+
|
55
|
+
case metadata['license']
|
56
|
+
when Array
|
57
|
+
gemspec.licenses = metadata['license']
|
58
|
+
when String
|
59
|
+
gemspec.license = metadata['license']
|
60
|
+
end
|
61
|
+
|
62
|
+
case metadata['authors']
|
63
|
+
when Array
|
64
|
+
gemspec.authors = metadata['authors']
|
65
|
+
when String
|
66
|
+
gemspec.author = metadata['authors']
|
67
|
+
end
|
68
|
+
|
69
|
+
gemspec.email = metadata['email']
|
70
|
+
gemspec.homepage = metadata['homepage']
|
71
|
+
|
72
|
+
case metadata['require_paths']
|
73
|
+
when Array
|
74
|
+
gemspec.require_paths = metadata['require_paths']
|
75
|
+
when String
|
76
|
+
gemspec.require_path = metadata['require_paths']
|
77
|
+
end
|
78
|
+
|
79
|
+
gemspec.files = filter_files[metadata['files']]
|
80
|
+
|
81
|
+
gemspec.executables = metadata['executables']
|
82
|
+
gemspec.extensions = metadata['extensions']
|
83
|
+
|
84
|
+
if Gem::VERSION < '1.7.'
|
85
|
+
gemspec.default_executable = gemspec.executables.first
|
86
|
+
end
|
87
|
+
|
88
|
+
gemspec.test_files = filter_files[metadata['test_files']]
|
89
|
+
|
90
|
+
unless gemspec.files.include?('.document')
|
91
|
+
gemspec.extra_rdoc_files = metadata['extra_doc_files']
|
92
|
+
end
|
93
|
+
|
94
|
+
gemspec.post_install_message = metadata['post_install_message']
|
95
|
+
gemspec.requirements = metadata['requirements']
|
96
|
+
|
97
|
+
if gemspec.respond_to?(:required_ruby_version=)
|
98
|
+
gemspec.required_ruby_version = metadata['required_ruby_version']
|
99
|
+
end
|
100
|
+
|
101
|
+
if gemspec.respond_to?(:required_rubygems_version=)
|
102
|
+
gemspec.required_rubygems_version = metadata['required_ruby_version']
|
103
|
+
end
|
104
|
+
|
105
|
+
parse_versions = lambda { |versions|
|
106
|
+
case versions
|
107
|
+
when Array
|
108
|
+
versions.map { |v| v.to_s }
|
109
|
+
when String
|
110
|
+
versions.split(/,\s*/)
|
111
|
+
end
|
112
|
+
}
|
113
|
+
|
114
|
+
if metadata['dependencies']
|
115
|
+
metadata['dependencies'].each do |name,versions|
|
116
|
+
gemspec.add_dependency(name,parse_versions[versions])
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
if metadata['runtime_dependencies']
|
121
|
+
metadata['runtime_dependencies'].each do |name,versions|
|
122
|
+
gemspec.add_runtime_dependency(name,parse_versions[versions])
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
if metadata['development_dependencies']
|
127
|
+
metadata['development_dependencies'].each do |name,versions|
|
128
|
+
gemspec.add_development_dependency(name,parse_versions[versions])
|
129
|
+
end
|
14
130
|
end
|
15
131
|
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'ronin/web/mechanize'
|
3
|
+
|
4
|
+
describe Web::Mechanize do
|
5
|
+
describe "#initialize" do
|
6
|
+
describe ":user_agent" do
|
7
|
+
before(:all) do
|
8
|
+
Web.user_agent = 'test'
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should default to Web.user_agent" do
|
12
|
+
described_class.new.user_agent.should == 'test'
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should support using a custom User-Agent string" do
|
16
|
+
agent = described_class.new(:user_agent => 'test2')
|
17
|
+
|
18
|
+
agent.user_agent.should == 'test2'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should support using a custom User-Agent alias" do
|
22
|
+
agent = described_class.new(:user_agent_alias => 'iPhone')
|
23
|
+
|
24
|
+
agent.user_agent.should == "Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3"
|
25
|
+
end
|
26
|
+
|
27
|
+
after(:all) do
|
28
|
+
Web.user_agent = nil
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe ":proxy" do
|
33
|
+
let(:host) { '127.0.0.1' }
|
34
|
+
let(:port) { 8080 }
|
35
|
+
let(:proxy) {
|
36
|
+
Network::HTTP::Proxy.new(:host => host, :port => port)
|
37
|
+
}
|
38
|
+
|
39
|
+
before(:all) do
|
40
|
+
Web.proxy = {:host => 'www.example.com', :port => port}
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should default to Web.proxy" do
|
44
|
+
agent = described_class.new
|
45
|
+
|
46
|
+
agent.proxy_addr.should == Web.proxy.host
|
47
|
+
agent.proxy_port.should == Web.proxy.port
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should support using custom proxies" do
|
51
|
+
agent = described_class.new(:proxy => proxy)
|
52
|
+
|
53
|
+
agent.proxy_addr.should == host
|
54
|
+
agent.proxy_port.should == port
|
55
|
+
end
|
56
|
+
|
57
|
+
after(:all) do
|
58
|
+
Web.proxy = nil
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'ronin/web/user_agents'
|
3
|
+
|
4
|
+
describe Web::UserAgents do
|
5
|
+
it "should list the categories of User-Agent strings" do
|
6
|
+
subject.categories.should_not be_empty
|
7
|
+
end
|
8
|
+
|
9
|
+
describe "#[]" do
|
10
|
+
context "with Symbol" do
|
11
|
+
it "should select User-Agent strings by group name" do
|
12
|
+
subject[:ie].should_not be_nil
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should return nil if the group exists" do
|
16
|
+
subject[:foobarbaz].should be_nil
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
context "with String" do
|
21
|
+
it "should select User-Agent strings by substring" do
|
22
|
+
subject['MSIE'].should_not be_nil
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should return nil if no User-Agent matches the substring" do
|
26
|
+
subject['FooBarBaz'].should be_nil
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
context "with Regexp" do
|
31
|
+
it "should select User-Agent strings by Regexp" do
|
32
|
+
subject[/AppleWebKit/i].should_not be_nil
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should return nil if no User-Agent matches the Regexp" do
|
36
|
+
subject[/FooBarBaz/i].should be_nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "#fetch" do
|
42
|
+
it "should fetch a User-Agent string" do
|
43
|
+
subject.fetch(:ie).should_not be_nil
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should raise an ArgumentError if no match was found" do
|
47
|
+
lambda {
|
48
|
+
subject.fetch(:foobarbaz)
|
49
|
+
}.should raise_error(ArgumentError)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should return the default value if no match was found" do
|
53
|
+
subject.fetch(:foobarbaz,'default').should == 'default'
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/spec/web/web_spec.rb
CHANGED
@@ -81,64 +81,8 @@ describe Web do
|
|
81
81
|
end
|
82
82
|
|
83
83
|
describe "agent" do
|
84
|
-
it "should
|
85
|
-
Web.agent.
|
86
|
-
end
|
87
|
-
|
88
|
-
describe ":user_agent" do
|
89
|
-
before(:all) do
|
90
|
-
Web.user_agent = 'test'
|
91
|
-
end
|
92
|
-
|
93
|
-
it "should default to Web.user_agent" do
|
94
|
-
Web.agent.user_agent.should == 'test'
|
95
|
-
end
|
96
|
-
|
97
|
-
it "should support using a custom User-Agent string" do
|
98
|
-
agent = Web.agent(:user_agent => 'test2')
|
99
|
-
|
100
|
-
agent.user_agent.should == 'test2'
|
101
|
-
end
|
102
|
-
|
103
|
-
it "should support using a custom User-Agent alias" do
|
104
|
-
agent = Web.agent(:user_agent_alias => 'iPhone')
|
105
|
-
|
106
|
-
agent.user_agent.should == "Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3"
|
107
|
-
end
|
108
|
-
|
109
|
-
after(:all) do
|
110
|
-
Web.user_agent = nil
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
describe ":proxy" do
|
115
|
-
let(:host) { '127.0.0.1' }
|
116
|
-
let(:port) { 8080 }
|
117
|
-
|
118
|
-
before(:all) do
|
119
|
-
Web.proxy = {:host => 'www.example.com', :port => port}
|
120
|
-
end
|
121
|
-
|
122
|
-
it "should default to Web.proxy" do
|
123
|
-
agent = Web.agent
|
124
|
-
|
125
|
-
agent.proxy_addr.should == Web.proxy.host
|
126
|
-
agent.proxy_port.should == Web.proxy.port
|
127
|
-
end
|
128
|
-
|
129
|
-
it "should support using custom proxies" do
|
130
|
-
agent = Web.agent(:proxy => Network::HTTP::Proxy.new(
|
131
|
-
:host => host,
|
132
|
-
:port => port
|
133
|
-
))
|
134
|
-
|
135
|
-
agent.proxy_addr.should == host
|
136
|
-
agent.proxy_port.should == port
|
137
|
-
end
|
138
|
-
|
139
|
-
after(:all) do
|
140
|
-
Web.proxy = nil
|
141
|
-
end
|
84
|
+
it "should be persistent" do
|
85
|
+
Web.agent.object_id.should == Web.agent.object_id
|
142
86
|
end
|
143
87
|
end
|
144
88
|
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: ronin-web
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease: 6
|
5
|
-
version: 0.3.0.
|
5
|
+
version: 0.3.0.rc1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Postmodern
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
13
|
+
date: 2011-07-08 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: nokogiri
|
@@ -74,7 +74,7 @@ dependencies:
|
|
74
74
|
requirements:
|
75
75
|
- - ~>
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version: 0.3
|
77
|
+
version: "0.3"
|
78
78
|
type: :runtime
|
79
79
|
prerelease: false
|
80
80
|
version_requirements: *id006
|
@@ -85,7 +85,7 @@ dependencies:
|
|
85
85
|
requirements:
|
86
86
|
- - ~>
|
87
87
|
- !ruby/object:Gem::Version
|
88
|
-
version: 0.2
|
88
|
+
version: "0.2"
|
89
89
|
type: :runtime
|
90
90
|
prerelease: false
|
91
91
|
version_requirements: *id007
|
@@ -96,7 +96,7 @@ dependencies:
|
|
96
96
|
requirements:
|
97
97
|
- - ~>
|
98
98
|
- !ruby/object:Gem::Version
|
99
|
-
version: 1.1
|
99
|
+
version: "1.1"
|
100
100
|
type: :runtime
|
101
101
|
prerelease: false
|
102
102
|
version_requirements: *id008
|
@@ -144,9 +144,11 @@ files:
|
|
144
144
|
- README.md
|
145
145
|
- Rakefile
|
146
146
|
- bin/ronin-web
|
147
|
+
- data/ronin/web/user_agents.yml
|
147
148
|
- gemspec.yml
|
148
149
|
- lib/ronin/network/mixins/web.rb
|
149
150
|
- lib/ronin/web.rb
|
151
|
+
- lib/ronin/web/config.rb
|
150
152
|
- lib/ronin/web/extensions.rb
|
151
153
|
- lib/ronin/web/extensions/nokogiri.rb
|
152
154
|
- lib/ronin/web/extensions/nokogiri/xml.rb
|
@@ -155,6 +157,7 @@ files:
|
|
155
157
|
- lib/ronin/web/extensions/nokogiri/xml/element.rb
|
156
158
|
- lib/ronin/web/extensions/nokogiri/xml/node.rb
|
157
159
|
- lib/ronin/web/extensions/nokogiri/xml/text.rb
|
160
|
+
- lib/ronin/web/mechanize.rb
|
158
161
|
- lib/ronin/web/middleware.rb
|
159
162
|
- lib/ronin/web/middleware/base.rb
|
160
163
|
- lib/ronin/web/middleware/directories.rb
|
@@ -182,6 +185,7 @@ files:
|
|
182
185
|
- lib/ronin/web/server/base.rb
|
183
186
|
- lib/ronin/web/server/web.rb
|
184
187
|
- lib/ronin/web/spider.rb
|
188
|
+
- lib/ronin/web/user_agents.rb
|
185
189
|
- lib/ronin/web/version.rb
|
186
190
|
- lib/ronin/web/web.rb
|
187
191
|
- ronin-web.gemspec
|
@@ -197,6 +201,7 @@ files:
|
|
197
201
|
- spec/web/helpers/root/test2/test2.txt
|
198
202
|
- spec/web/helpers/root/test3.txt
|
199
203
|
- spec/web/helpers/root/test3/test3.txt
|
204
|
+
- spec/web/mechanize_spec.rb
|
200
205
|
- spec/web/middleware/directories_spec.rb
|
201
206
|
- spec/web/middleware/files_spec.rb
|
202
207
|
- spec/web/middleware/filters/campaign_filter_spec.rb
|
@@ -215,6 +220,7 @@ files:
|
|
215
220
|
- spec/web/server/classes/public2/static2.txt
|
216
221
|
- spec/web/server/classes/sub_app.rb
|
217
222
|
- spec/web/server/classes/test_app.rb
|
223
|
+
- spec/web/user_agents_spec.rb
|
218
224
|
- spec/web/web_spec.rb
|
219
225
|
homepage: http://github.com/ronin-ruby/ronin-web
|
220
226
|
licenses:
|
@@ -239,7 +245,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
239
245
|
requirements: []
|
240
246
|
|
241
247
|
rubyforge_project: ronin-web
|
242
|
-
rubygems_version: 1.8.
|
248
|
+
rubygems_version: 1.8.5
|
243
249
|
signing_key:
|
244
250
|
specification_version: 3
|
245
251
|
summary: A Ruby library for Ronin that provides support for web scraping and spidering functionality.
|
@@ -259,4 +265,6 @@ test_files:
|
|
259
265
|
- spec/web/middleware/response_spec.rb
|
260
266
|
- spec/web/middleware/proxy_spec.rb
|
261
267
|
- spec/web/extensions/nokogiri_spec.rb
|
268
|
+
- spec/web/user_agents_spec.rb
|
269
|
+
- spec/web/mechanize_spec.rb
|
262
270
|
- spec/web/web_spec.rb
|