ronin-web-spider 0.1.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.github/workflows/ruby.yml +31 -0
- data/.gitignore +13 -0
- data/.rspec +1 -0
- data/.ruby-version +1 -0
- data/.yardopts +1 -0
- data/COPYING.txt +165 -0
- data/ChangeLog.md +19 -0
- data/Gemfile +31 -0
- data/README.md +139 -0
- data/Rakefile +31 -0
- data/gemspec.yml +27 -0
- data/lib/ronin/web/spider/agent.rb +302 -0
- data/lib/ronin/web/spider/archive.rb +116 -0
- data/lib/ronin/web/spider/exceptions.rb +36 -0
- data/lib/ronin/web/spider/git_archive.rb +194 -0
- data/lib/ronin/web/spider/version.rb +27 -0
- data/lib/ronin/web/spider.rb +115 -0
- data/ronin-web-spider.gemspec +61 -0
- data/spec/agent_spec.rb +585 -0
- data/spec/archive_spec.rb +91 -0
- data/spec/example_app.rb +27 -0
- data/spec/git_archive_spec.rb +137 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/spider_spec.rb +252 -0
- metadata +122 -0
@@ -0,0 +1,115 @@
|
|
1
|
+
#
|
2
|
+
# ronin-web-spider - A collection of common web spidering routines.
|
3
|
+
#
|
4
|
+
# Copyright (c) 2006-2022 Hal Brodigan (postmodern.mod3 at gmail.com)
|
5
|
+
#
|
6
|
+
# ronin-web-spider is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU Lesser General Public License as published
|
8
|
+
# by the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# ronin-web-spider is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU Lesser General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU Lesser General Public License
|
17
|
+
# along with ronin-web-spider. If not, see <https://www.gnu.org/licenses/>.
|
18
|
+
#
|
19
|
+
|
20
|
+
require 'ronin/web/spider/agent'
|
21
|
+
require 'ronin/web/spider/version'
|
22
|
+
|
23
|
+
module Ronin
|
24
|
+
module Web
|
25
|
+
module Spider
|
26
|
+
#
|
27
|
+
# Creates a new agent and begin spidering at the given URL.
|
28
|
+
#
|
29
|
+
# @param [URI::HTTP, String] url
|
30
|
+
# The URL to start spidering at.
|
31
|
+
#
|
32
|
+
# @param [Hash{Symbol => Object}] kwargs
|
33
|
+
# Additional keyword arguments. See {Agent#initialize}.
|
34
|
+
#
|
35
|
+
# @yield [agent]
|
36
|
+
# If a block is given, it will be passed the newly created agent
|
37
|
+
# before it begins spidering.
|
38
|
+
#
|
39
|
+
# @yieldparam [Agent] agent
|
40
|
+
# The newly created agent.
|
41
|
+
#
|
42
|
+
# @see https://rubydoc.info/gems/spidr/Spidr/Agent#start_at-class_method
|
43
|
+
#
|
44
|
+
def self.start_at(url,**kwargs,&block)
|
45
|
+
Agent.start_at(url,**kwargs,&block)
|
46
|
+
end
|
47
|
+
|
48
|
+
#
|
49
|
+
# Creates a new agent and spiders the given host.
|
50
|
+
#
|
51
|
+
# @param [String] name
|
52
|
+
# The host-name to spider.
|
53
|
+
#
|
54
|
+
# @param [Hash{Symbol => Object}] kwargs
|
55
|
+
# Additional keyword arguments. See {Agent#initialize}.
|
56
|
+
#
|
57
|
+
# @yield [agent]
|
58
|
+
# If a block is given, it will be passed the newly created agent
|
59
|
+
# before it begins spidering.
|
60
|
+
#
|
61
|
+
# @yieldparam [Agent] agent
|
62
|
+
# The newly created agent.
|
63
|
+
#
|
64
|
+
# @see https://rubydoc.info/gems/spidr/Spidr/Agent#host-class_method
|
65
|
+
#
|
66
|
+
def self.host(name,**kwargs,&block)
|
67
|
+
Agent.host(name,**kwargs,&block)
|
68
|
+
end
|
69
|
+
|
70
|
+
#
|
71
|
+
# Creates a new agent and spiders the web-site located at the given URL.
|
72
|
+
#
|
73
|
+
# @param [URI::HTTP, String] url
|
74
|
+
# The web-site to spider.
|
75
|
+
#
|
76
|
+
# @param [Hash{Symbol => Object}] kwargs
|
77
|
+
# Additional keyword arguments. See {Agent#initialize}.
|
78
|
+
#
|
79
|
+
# @yield [agent]
|
80
|
+
# If a block is given, it will be passed the newly created agent
|
81
|
+
# before it begins spidering.
|
82
|
+
#
|
83
|
+
# @yieldparam [Agent] agent
|
84
|
+
# The newly created agent.
|
85
|
+
#
|
86
|
+
# @see https://rubydoc.info/gems/spidr/Spidr/Agent#site-class_method
|
87
|
+
#
|
88
|
+
def self.site(url,**kwargs,&block)
|
89
|
+
Agent.site(url,**kwargs,&block)
|
90
|
+
end
|
91
|
+
|
92
|
+
#
|
93
|
+
# Creates a new agent and spiders the entire domain.
|
94
|
+
#
|
95
|
+
# @param [String] name
|
96
|
+
# The top-level domain to spider.
|
97
|
+
#
|
98
|
+
# @param [Hash{Symbol => Object}] kwargs
|
99
|
+
# Additional keyword arguments. See {Agent#initialize}.
|
100
|
+
#
|
101
|
+
# @yield [agent]
|
102
|
+
# If a block is given, it will be passed the newly created agent
|
103
|
+
# before it begins spidering.
|
104
|
+
#
|
105
|
+
# @yieldparam [Agent] agent
|
106
|
+
# The newly created agent.
|
107
|
+
#
|
108
|
+
# @see https://rubydoc.info/gems/spidr/Spidr/Agent#domain-class_method
|
109
|
+
#
|
110
|
+
def self.domain(name,**kwargs,&block)
|
111
|
+
Agent.domain(name,**kwargs,&block)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gemspec = YAML.load_file('gemspec.yml')
|
7
|
+
|
8
|
+
gem.name = gemspec.fetch('name')
|
9
|
+
gem.version = gemspec.fetch('version') do
|
10
|
+
lib_dir = File.join(File.dirname(__FILE__),'lib')
|
11
|
+
$LOAD_PATH << lib_dir unless $LOAD_PATH.include?(lib_dir)
|
12
|
+
|
13
|
+
require 'ronin/web/spider/version'
|
14
|
+
Ronin::Web::Spider::VERSION
|
15
|
+
end
|
16
|
+
|
17
|
+
gem.summary = gemspec['summary']
|
18
|
+
gem.description = gemspec['description']
|
19
|
+
gem.licenses = Array(gemspec['license'])
|
20
|
+
gem.authors = Array(gemspec['authors'])
|
21
|
+
gem.email = gemspec['email']
|
22
|
+
gem.homepage = gemspec['homepage']
|
23
|
+
gem.metadata = gemspec['metadata'] if gemspec['metadata']
|
24
|
+
|
25
|
+
glob = lambda { |patterns| gem.files & Dir[*patterns] }
|
26
|
+
|
27
|
+
gem.files = `git ls-files`.split($/)
|
28
|
+
gem.files = glob[gemspec['files']] if gemspec['files']
|
29
|
+
gem.files += Array(gemspec['generated_files'])
|
30
|
+
|
31
|
+
gem.executables = gemspec.fetch('executables') do
|
32
|
+
glob['bin/*'].map { |path| File.basename(path) }
|
33
|
+
end
|
34
|
+
|
35
|
+
gem.extensions = glob[gemspec['extensions'] || 'ext/**/extconf.rb']
|
36
|
+
gem.test_files = glob[gemspec['test_files'] || 'spec/{**/}*_spec.rb']
|
37
|
+
gem.extra_rdoc_files = glob[gemspec['extra_doc_files'] || '*.{txt,md}']
|
38
|
+
|
39
|
+
gem.require_paths = Array(gemspec.fetch('require_paths') {
|
40
|
+
%w[ext lib].select { |dir| File.directory?(dir) }
|
41
|
+
})
|
42
|
+
|
43
|
+
gem.requirements = gemspec['requirements']
|
44
|
+
gem.required_ruby_version = gemspec['required_ruby_version']
|
45
|
+
gem.required_rubygems_version = gemspec['required_rubygems_version']
|
46
|
+
gem.post_install_message = gemspec['post_install_message']
|
47
|
+
|
48
|
+
split = lambda { |string| string.split(/,\s*/) }
|
49
|
+
|
50
|
+
if gemspec['dependencies']
|
51
|
+
gemspec['dependencies'].each do |name,versions|
|
52
|
+
gem.add_dependency(name,split[versions])
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
if gemspec['development_dependencies']
|
57
|
+
gemspec['development_dependencies'].each do |name,versions|
|
58
|
+
gem.add_development_dependency(name,split[versions])
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|