ronin-web-spider 0.1.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.github/workflows/ruby.yml +31 -0
- data/.gitignore +13 -0
- data/.rspec +1 -0
- data/.ruby-version +1 -0
- data/.yardopts +1 -0
- data/COPYING.txt +165 -0
- data/ChangeLog.md +19 -0
- data/Gemfile +31 -0
- data/README.md +139 -0
- data/Rakefile +31 -0
- data/gemspec.yml +27 -0
- data/lib/ronin/web/spider/agent.rb +302 -0
- data/lib/ronin/web/spider/archive.rb +116 -0
- data/lib/ronin/web/spider/exceptions.rb +36 -0
- data/lib/ronin/web/spider/git_archive.rb +194 -0
- data/lib/ronin/web/spider/version.rb +27 -0
- data/lib/ronin/web/spider.rb +115 -0
- data/ronin-web-spider.gemspec +61 -0
- data/spec/agent_spec.rb +585 -0
- data/spec/archive_spec.rb +91 -0
- data/spec/example_app.rb +27 -0
- data/spec/git_archive_spec.rb +137 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/spider_spec.rb +252 -0
- metadata +122 -0
@@ -0,0 +1,115 @@
|
|
1
|
+
#
|
2
|
+
# ronin-web-spider - A collection of common web spidering routines.
|
3
|
+
#
|
4
|
+
# Copyright (c) 2006-2022 Hal Brodigan (postmodern.mod3 at gmail.com)
|
5
|
+
#
|
6
|
+
# ronin-web-spider is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU Lesser General Public License as published
|
8
|
+
# by the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# ronin-web-spider is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU Lesser General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU Lesser General Public License
|
17
|
+
# along with ronin-web-spider. If not, see <https://www.gnu.org/licenses/>.
|
18
|
+
#
|
19
|
+
|
20
|
+
require 'ronin/web/spider/agent'
|
21
|
+
require 'ronin/web/spider/version'
|
22
|
+
|
23
|
+
module Ronin
|
24
|
+
module Web
|
25
|
+
module Spider
|
26
|
+
#
|
27
|
+
# Creates a new agent and begin spidering at the given URL.
|
28
|
+
#
|
29
|
+
# @param [URI::HTTP, String] url
|
30
|
+
# The URL to start spidering at.
|
31
|
+
#
|
32
|
+
# @param [Hash{Symbol => Object}] kwargs
|
33
|
+
# Additional keyword arguments. See {Agent#initialize}.
|
34
|
+
#
|
35
|
+
# @yield [agent]
|
36
|
+
# If a block is given, it will be passed the newly created agent
|
37
|
+
# before it begins spidering.
|
38
|
+
#
|
39
|
+
# @yieldparam [Agent] agent
|
40
|
+
# The newly created agent.
|
41
|
+
#
|
42
|
+
# @see https://rubydoc.info/gems/spidr/Spidr/Agent#start_at-class_method
|
43
|
+
#
|
44
|
+
def self.start_at(url,**kwargs,&block)
|
45
|
+
Agent.start_at(url,**kwargs,&block)
|
46
|
+
end
|
47
|
+
|
48
|
+
#
|
49
|
+
# Creates a new agent and spiders the given host.
|
50
|
+
#
|
51
|
+
# @param [String] name
|
52
|
+
# The host-name to spider.
|
53
|
+
#
|
54
|
+
# @param [Hash{Symbol => Object}] kwargs
|
55
|
+
# Additional keyword arguments. See {Agent#initialize}.
|
56
|
+
#
|
57
|
+
# @yield [agent]
|
58
|
+
# If a block is given, it will be passed the newly created agent
|
59
|
+
# before it begins spidering.
|
60
|
+
#
|
61
|
+
# @yieldparam [Agent] agent
|
62
|
+
# The newly created agent.
|
63
|
+
#
|
64
|
+
# @see https://rubydoc.info/gems/spidr/Spidr/Agent#host-class_method
|
65
|
+
#
|
66
|
+
def self.host(name,**kwargs,&block)
|
67
|
+
Agent.host(name,**kwargs,&block)
|
68
|
+
end
|
69
|
+
|
70
|
+
#
|
71
|
+
# Creates a new agent and spiders the web-site located at the given URL.
|
72
|
+
#
|
73
|
+
# @param [URI::HTTP, String] url
|
74
|
+
# The web-site to spider.
|
75
|
+
#
|
76
|
+
# @param [Hash{Symbol => Object}] kwargs
|
77
|
+
# Additional keyword arguments. See {Agent#initialize}.
|
78
|
+
#
|
79
|
+
# @yield [agent]
|
80
|
+
# If a block is given, it will be passed the newly created agent
|
81
|
+
# before it begins spidering.
|
82
|
+
#
|
83
|
+
# @yieldparam [Agent] agent
|
84
|
+
# The newly created agent.
|
85
|
+
#
|
86
|
+
# @see https://rubydoc.info/gems/spidr/Spidr/Agent#site-class_method
|
87
|
+
#
|
88
|
+
def self.site(url,**kwargs,&block)
|
89
|
+
Agent.site(url,**kwargs,&block)
|
90
|
+
end
|
91
|
+
|
92
|
+
#
|
93
|
+
# Creates a new agent and spiders the entire domain.
|
94
|
+
#
|
95
|
+
# @param [String] name
|
96
|
+
# The top-level domain to spider.
|
97
|
+
#
|
98
|
+
# @param [Hash{Symbol => Object}] kwargs
|
99
|
+
# Additional keyword arguments. See {Agent#initialize}.
|
100
|
+
#
|
101
|
+
# @yield [agent]
|
102
|
+
# If a block is given, it will be passed the newly created agent
|
103
|
+
# before it begins spidering.
|
104
|
+
#
|
105
|
+
# @yieldparam [Agent] agent
|
106
|
+
# The newly created agent.
|
107
|
+
#
|
108
|
+
# @see https://rubydoc.info/gems/spidr/Spidr/Agent#domain-class_method
|
109
|
+
#
|
110
|
+
def self.domain(name,**kwargs,&block)
|
111
|
+
Agent.domain(name,**kwargs,&block)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gemspec = YAML.load_file('gemspec.yml')
|
7
|
+
|
8
|
+
gem.name = gemspec.fetch('name')
|
9
|
+
gem.version = gemspec.fetch('version') do
|
10
|
+
lib_dir = File.join(File.dirname(__FILE__),'lib')
|
11
|
+
$LOAD_PATH << lib_dir unless $LOAD_PATH.include?(lib_dir)
|
12
|
+
|
13
|
+
require 'ronin/web/spider/version'
|
14
|
+
Ronin::Web::Spider::VERSION
|
15
|
+
end
|
16
|
+
|
17
|
+
gem.summary = gemspec['summary']
|
18
|
+
gem.description = gemspec['description']
|
19
|
+
gem.licenses = Array(gemspec['license'])
|
20
|
+
gem.authors = Array(gemspec['authors'])
|
21
|
+
gem.email = gemspec['email']
|
22
|
+
gem.homepage = gemspec['homepage']
|
23
|
+
gem.metadata = gemspec['metadata'] if gemspec['metadata']
|
24
|
+
|
25
|
+
glob = lambda { |patterns| gem.files & Dir[*patterns] }
|
26
|
+
|
27
|
+
gem.files = `git ls-files`.split($/)
|
28
|
+
gem.files = glob[gemspec['files']] if gemspec['files']
|
29
|
+
gem.files += Array(gemspec['generated_files'])
|
30
|
+
|
31
|
+
gem.executables = gemspec.fetch('executables') do
|
32
|
+
glob['bin/*'].map { |path| File.basename(path) }
|
33
|
+
end
|
34
|
+
|
35
|
+
gem.extensions = glob[gemspec['extensions'] || 'ext/**/extconf.rb']
|
36
|
+
gem.test_files = glob[gemspec['test_files'] || 'spec/{**/}*_spec.rb']
|
37
|
+
gem.extra_rdoc_files = glob[gemspec['extra_doc_files'] || '*.{txt,md}']
|
38
|
+
|
39
|
+
gem.require_paths = Array(gemspec.fetch('require_paths') {
|
40
|
+
%w[ext lib].select { |dir| File.directory?(dir) }
|
41
|
+
})
|
42
|
+
|
43
|
+
gem.requirements = gemspec['requirements']
|
44
|
+
gem.required_ruby_version = gemspec['required_ruby_version']
|
45
|
+
gem.required_rubygems_version = gemspec['required_rubygems_version']
|
46
|
+
gem.post_install_message = gemspec['post_install_message']
|
47
|
+
|
48
|
+
split = lambda { |string| string.split(/,\s*/) }
|
49
|
+
|
50
|
+
if gemspec['dependencies']
|
51
|
+
gemspec['dependencies'].each do |name,versions|
|
52
|
+
gem.add_dependency(name,split[versions])
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
if gemspec['development_dependencies']
|
57
|
+
gemspec['development_dependencies'].each do |name,versions|
|
58
|
+
gem.add_development_dependency(name,split[versions])
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|