browser_crawler 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +2 -0
- data/.rubocop.yml +10 -0
- data/.travis.yml +29 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +277 -0
- data/Rakefile +7 -0
- data/bin/console +10 -0
- data/bin/crawl +51 -0
- data/bin/setup +8 -0
- data/browser_crawler.gemspec +47 -0
- data/lib/browser_crawler.rb +12 -0
- data/lib/browser_crawler/dsl/js_helpers.rb +13 -0
- data/lib/browser_crawler/dsl/sign_in.rb +37 -0
- data/lib/browser_crawler/engine.rb +156 -0
- data/lib/browser_crawler/engine_utilities/crawl_manager.rb +100 -0
- data/lib/browser_crawler/engine_utilities/inspect_page_process.rb +74 -0
- data/lib/browser_crawler/engine_utilities/link_inspector.rb +31 -0
- data/lib/browser_crawler/engine_utilities/link_scanner.rb +38 -0
- data/lib/browser_crawler/engine_utilities/page_inspector.rb +65 -0
- data/lib/browser_crawler/errors/invalid_hooks_type.rb +12 -0
- data/lib/browser_crawler/followups/screenshots_indexer.rb +40 -0
- data/lib/browser_crawler/followups/templates/index.html.erb +69 -0
- data/lib/browser_crawler/followups/wraith_integrator.rb +41 -0
- data/lib/browser_crawler/hooks_container.rb +31 -0
- data/lib/browser_crawler/hooks_operator.rb +44 -0
- data/lib/browser_crawler/options.rb +86 -0
- data/lib/browser_crawler/report_factory.rb +22 -0
- data/lib/browser_crawler/reports/csv_report.rb +75 -0
- data/lib/browser_crawler/reports/store.rb +114 -0
- data/lib/browser_crawler/reports/yaml_report.rb +15 -0
- data/lib/browser_crawler/screenshot_operator.rb +47 -0
- data/lib/browser_crawler/support/capybara.rb +20 -0
- data/lib/browser_crawler/url_tools.rb +32 -0
- data/lib/browser_crawler/version.rb +3 -0
- metadata +244 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require_relative 'reports/csv_report'
|
3
|
+
require_relative 'reports/yaml_report'
|
4
|
+
|
5
|
+
module BrowserCrawler
|
6
|
+
# It saves store data to yaml or csv report file.
|
7
|
+
module ReportFactory
|
8
|
+
module_function
|
9
|
+
|
10
|
+
REPORT_MATCHER = {
|
11
|
+
yaml: Reports::YamlReport,
|
12
|
+
csv: Reports::CsvReport
|
13
|
+
}.freeze
|
14
|
+
|
15
|
+
def save(store:, type:, save_folder_path:)
|
16
|
+
FileUtils.mkdir_p(save_folder_path)
|
17
|
+
REPORT_MATCHER[type]
|
18
|
+
.new(store: store)
|
19
|
+
.export(save_folder_path: save_folder_path)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module BrowserCrawler
|
4
|
+
module Reports
|
5
|
+
# It involves methods which allow to save a store to a csv file
|
6
|
+
class CsvReport
|
7
|
+
def initialize(store:)
|
8
|
+
@store = store
|
9
|
+
end
|
10
|
+
|
11
|
+
def export(save_folder_path:)
|
12
|
+
CSV.open("#{save_folder_path}/crawler_report.csv", 'wb') do |csv|
|
13
|
+
csv << ['pages',
|
14
|
+
'extracted links',
|
15
|
+
'is external',
|
16
|
+
'http status',
|
17
|
+
'http code']
|
18
|
+
|
19
|
+
@store.pages.each do |page, crawler_result|
|
20
|
+
save_to_csv(csv, page, crawler_result)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def filter_links(links)
|
28
|
+
return nil if links.nil?
|
29
|
+
|
30
|
+
links.select do |link|
|
31
|
+
link =~ /\A#{URI.regexp(%w[http https])}\z/
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def save_to_row(page, crawler_result, link = nil)
|
36
|
+
[page,
|
37
|
+
link,
|
38
|
+
crawler_result[:external],
|
39
|
+
humanize_code(crawler_result[:code]),
|
40
|
+
crawler_result[:code]]
|
41
|
+
end
|
42
|
+
|
43
|
+
def save_to_csv(csv, page, crawler_result)
|
44
|
+
extracted_links = filter_links(crawler_result[:extracted_links])
|
45
|
+
|
46
|
+
if extracted_links.nil? || extracted_links.empty?
|
47
|
+
csv << save_to_row(page, crawler_result)
|
48
|
+
return
|
49
|
+
end
|
50
|
+
|
51
|
+
extracted_links.each do |link|
|
52
|
+
csv << save_to_row(page, crawler_result, link)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def csv_header
|
57
|
+
['pages',
|
58
|
+
'extracted links',
|
59
|
+
'external?',
|
60
|
+
'http status',
|
61
|
+
'http code']
|
62
|
+
end
|
63
|
+
|
64
|
+
def humanize_code(code)
|
65
|
+
case code.to_i
|
66
|
+
when 200..225 then :active
|
67
|
+
when 401 then :unauthorized
|
68
|
+
when 301..308 then :redirect
|
69
|
+
else
|
70
|
+
:broken
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
module BrowserCrawler
|
2
|
+
module Reports
|
3
|
+
# Value object that contains crawling results.
|
4
|
+
# Example:
|
5
|
+
# {
|
6
|
+
# pages: {
|
7
|
+
# '/':
|
8
|
+
# {
|
9
|
+
# screenshot: 'file1.png',
|
10
|
+
# error: nil,
|
11
|
+
# extracted_links: ['http://welcome[pdf]', 'http://support']
|
12
|
+
# },
|
13
|
+
# 'welcome':
|
14
|
+
# {
|
15
|
+
# screenshot: 'file2.png',
|
16
|
+
# error: 'Invalid URI',
|
17
|
+
# extracted_links: nil
|
18
|
+
# }
|
19
|
+
# },
|
20
|
+
# metadata: {
|
21
|
+
# custom_attribute: 'Sample report title'
|
22
|
+
# },
|
23
|
+
# unrecognized_links: ['mailto://', 'javascript://'],
|
24
|
+
# crawler_error: {
|
25
|
+
# 'http://welcome.page' => {
|
26
|
+
# message: 'Something has a wrong type',
|
27
|
+
# backtrace: ['/call:10', '/sum: 11']
|
28
|
+
# }
|
29
|
+
# },
|
30
|
+
# started_at: 12345,
|
31
|
+
# finished_at: 123456
|
32
|
+
# }
|
33
|
+
|
34
|
+
# It involves methods which allow to save data to a store structure
|
35
|
+
class Store
|
36
|
+
attr_reader :pages, :metadata, :unrecognized_links, :crawler_error
|
37
|
+
attr_accessor :error
|
38
|
+
|
39
|
+
def initialize(pages: {},
|
40
|
+
metadata: {},
|
41
|
+
started_at: nil,
|
42
|
+
finished_at: nil)
|
43
|
+
@pages = pages
|
44
|
+
@metadata = metadata
|
45
|
+
@started_at = started_at
|
46
|
+
@finished_at = finished_at
|
47
|
+
@crawler_error = {}
|
48
|
+
@unrecognized_links = []
|
49
|
+
end
|
50
|
+
|
51
|
+
def start(url:)
|
52
|
+
@pages.clear
|
53
|
+
@started_at = Time.now
|
54
|
+
@metadata[:url] = url
|
55
|
+
end
|
56
|
+
|
57
|
+
def finish
|
58
|
+
@finished_at = Time.now
|
59
|
+
end
|
60
|
+
|
61
|
+
def to_h
|
62
|
+
{}.merge(pages: @pages)
|
63
|
+
.merge(@metadata)
|
64
|
+
.merge(
|
65
|
+
unrecognized_links: @unrecognized_links,
|
66
|
+
crawler_error: @crawler_error,
|
67
|
+
started_at: @started_at,
|
68
|
+
finished_at: @finished_at,
|
69
|
+
links_count: count_all_links
|
70
|
+
)
|
71
|
+
end
|
72
|
+
|
73
|
+
def record_unrecognized_link(link)
|
74
|
+
return if @unrecognized_links.include?(link)
|
75
|
+
|
76
|
+
@unrecognized_links << link unless @unrecognized_links.include?(link)
|
77
|
+
end
|
78
|
+
|
79
|
+
def record_page_visit(page:,
|
80
|
+
extracted_links: nil,
|
81
|
+
screenshot_filename: nil,
|
82
|
+
error: nil,
|
83
|
+
external: false,
|
84
|
+
code: nil)
|
85
|
+
@pages[page] = {
|
86
|
+
screenshot: screenshot_filename,
|
87
|
+
error: error,
|
88
|
+
extracted_links: extracted_links,
|
89
|
+
code: code,
|
90
|
+
external: external
|
91
|
+
}
|
92
|
+
end
|
93
|
+
|
94
|
+
def record_crawler_error(link:, error:)
|
95
|
+
@crawler_error[link] = {
|
96
|
+
message: error.message,
|
97
|
+
backtrace: error.backtrace
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
def visited_pages
|
102
|
+
@pages.keys
|
103
|
+
end
|
104
|
+
|
105
|
+
private
|
106
|
+
|
107
|
+
def count_all_links
|
108
|
+
@pages.inject(0) do |sum, (_, data)|
|
109
|
+
sum + data[:extracted_links]&.size.to_i if data && data[:extracted_links]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module BrowserCrawler
|
2
|
+
module Reports
|
3
|
+
# It involves methods which allow to save a store to an yaml file
|
4
|
+
class YamlReport
|
5
|
+
def initialize(store:)
|
6
|
+
@store = store
|
7
|
+
end
|
8
|
+
|
9
|
+
def export(save_folder_path:)
|
10
|
+
File.write("#{save_folder_path}/crawler_report.yaml",
|
11
|
+
@store.to_h.to_yaml)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module BrowserCrawler
|
2
|
+
# Control operations on screenshots
|
3
|
+
class ScreenshotOperator
|
4
|
+
attr_reader :format, :save_screenshots, :filename_base, :screenshots_folder
|
5
|
+
|
6
|
+
def initialize(save_screenshots: false,
|
7
|
+
save_screenshots_to: nil,
|
8
|
+
format: 'png',
|
9
|
+
filename: nil)
|
10
|
+
@screenshots_folder = save_screenshots_to
|
11
|
+
@format = format
|
12
|
+
@save_screenshots = save_screenshots
|
13
|
+
@filename_base = filename || 'screenshot'
|
14
|
+
end
|
15
|
+
|
16
|
+
def save_screenshots?
|
17
|
+
[screenshots_folder, save_screenshots].any?
|
18
|
+
end
|
19
|
+
|
20
|
+
def file_path(url: nil)
|
21
|
+
"#{save_path}/#{filename(url: url)}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def filename(url: nil)
|
25
|
+
if !filename_base_default? || url.nil?
|
26
|
+
"#{filename_prefix}_#{filename_base}.#{format}"
|
27
|
+
else
|
28
|
+
path = UrlTools.uri(url: url)&.path&.gsub('/', '%')&.gsub('.', '')
|
29
|
+
"#{filename_prefix}_#{path}.#{format}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def filename_base_default?
|
36
|
+
filename_base == 'screenshot'
|
37
|
+
end
|
38
|
+
|
39
|
+
def save_path
|
40
|
+
screenshots_folder || File.join(Dir.pwd, 'tmp', 'screenshots')
|
41
|
+
end
|
42
|
+
|
43
|
+
def filename_prefix
|
44
|
+
Time.now.getutc.to_s.tr(' ', '_')
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'capybara'
|
2
|
+
require 'capybara/cuprite'
|
3
|
+
|
4
|
+
# Register new driver for capybara
|
5
|
+
module Capybara
|
6
|
+
module_function
|
7
|
+
|
8
|
+
def register_chrome_driver(name, options: {})
|
9
|
+
unless options[:browser_options]
|
10
|
+
options[:browser_options] = {
|
11
|
+
'--headless' => nil, '--disable-gpu' => nil,
|
12
|
+
'--disable-extensions' => nil, '--no-sandbox' => nil
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
Capybara.register_driver name do |app|
|
17
|
+
::Capybara::Cuprite::Driver.new(app, options)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module BrowserCrawler
|
2
|
+
module UrlTools
|
3
|
+
def uri(url:)
|
4
|
+
uri!(url: url)
|
5
|
+
rescue URI::InvalidURIError
|
6
|
+
nil
|
7
|
+
end
|
8
|
+
|
9
|
+
def uri!(url:)
|
10
|
+
string_url = url.to_s
|
11
|
+
raise URI::InvalidURIError unless string_url =~ /\A#{URI.regexp(%w[http https])}\z/
|
12
|
+
|
13
|
+
URI(string_url)
|
14
|
+
end
|
15
|
+
|
16
|
+
def full_url(uri:)
|
17
|
+
path_query = get_path_query(uri: uri)
|
18
|
+
if uri.port == 80 || uri.port == 443
|
19
|
+
"#{uri.scheme}://#{uri.host}#{uri.path}#{path_query}"
|
20
|
+
else
|
21
|
+
"#{uri.scheme}://#{uri.host}:#{uri.port}#{uri.path}#{path_query}"
|
22
|
+
end.sub(%r{(/)+$}, '')
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_path_query(uri:)
|
26
|
+
uri_fragment = uri.query
|
27
|
+
uri_fragment.nil? || (uri_fragment == '') ? nil : "?#{uri.query}"
|
28
|
+
end
|
29
|
+
|
30
|
+
module_function :uri, :uri!, :full_url, :get_path_query
|
31
|
+
end
|
32
|
+
end
|
metadata
ADDED
@@ -0,0 +1,244 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: browser_crawler
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Dmytro Samodurov
|
8
|
+
- Artem Rumiantcev
|
9
|
+
- Denys Ivanchuk
|
10
|
+
- Sergiy Tyatin
|
11
|
+
autorequire:
|
12
|
+
bindir: bin
|
13
|
+
cert_chain: []
|
14
|
+
date: 2019-08-23 00:00:00.000000000 Z
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: activesupport
|
18
|
+
requirement: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - "~>"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '5.2'
|
23
|
+
- - ">="
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: 5.2.2
|
26
|
+
type: :runtime
|
27
|
+
prerelease: false
|
28
|
+
version_requirements: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - "~>"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '5.2'
|
33
|
+
- - ">="
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: 5.2.2
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: capybara
|
38
|
+
requirement: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 3.24.0
|
43
|
+
- - "~>"
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '3.24'
|
46
|
+
type: :runtime
|
47
|
+
prerelease: false
|
48
|
+
version_requirements: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 3.24.0
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '3.24'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: chromedriver-helper
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 2.1.0
|
63
|
+
- - "~>"
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '2.1'
|
66
|
+
type: :runtime
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 2.1.0
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '2.1'
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: cuprite
|
78
|
+
requirement: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.6.0
|
83
|
+
type: :runtime
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.6.0
|
90
|
+
- !ruby/object:Gem::Dependency
|
91
|
+
name: bundler
|
92
|
+
requirement: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 1.17.2
|
97
|
+
- - "~>"
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: 1.17.2
|
100
|
+
type: :development
|
101
|
+
prerelease: false
|
102
|
+
version_requirements: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - ">="
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: 1.17.2
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.17.2
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: pry-byebug
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - ">="
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '3.6'
|
117
|
+
- - "~>"
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '3.6'
|
120
|
+
type: :development
|
121
|
+
prerelease: false
|
122
|
+
version_requirements: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '3.6'
|
127
|
+
- - "~>"
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '3.6'
|
130
|
+
- !ruby/object:Gem::Dependency
|
131
|
+
name: rake
|
132
|
+
requirement: !ruby/object:Gem::Requirement
|
133
|
+
requirements:
|
134
|
+
- - "~>"
|
135
|
+
- !ruby/object:Gem::Version
|
136
|
+
version: '10.0'
|
137
|
+
type: :development
|
138
|
+
prerelease: false
|
139
|
+
version_requirements: !ruby/object:Gem::Requirement
|
140
|
+
requirements:
|
141
|
+
- - "~>"
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
version: '10.0'
|
144
|
+
- !ruby/object:Gem::Dependency
|
145
|
+
name: rspec
|
146
|
+
requirement: !ruby/object:Gem::Requirement
|
147
|
+
requirements:
|
148
|
+
- - "~>"
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
version: '3.0'
|
151
|
+
type: :development
|
152
|
+
prerelease: false
|
153
|
+
version_requirements: !ruby/object:Gem::Requirement
|
154
|
+
requirements:
|
155
|
+
- - "~>"
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: '3.0'
|
158
|
+
- !ruby/object:Gem::Dependency
|
159
|
+
name: rubocop
|
160
|
+
requirement: !ruby/object:Gem::Requirement
|
161
|
+
requirements:
|
162
|
+
- - "~>"
|
163
|
+
- !ruby/object:Gem::Version
|
164
|
+
version: '0.66'
|
165
|
+
type: :development
|
166
|
+
prerelease: false
|
167
|
+
version_requirements: !ruby/object:Gem::Requirement
|
168
|
+
requirements:
|
169
|
+
- - "~>"
|
170
|
+
- !ruby/object:Gem::Version
|
171
|
+
version: '0.66'
|
172
|
+
description: ''
|
173
|
+
email:
|
174
|
+
- dimasamodurov@gmail.com
|
175
|
+
- tema.place@gmail.com
|
176
|
+
executables:
|
177
|
+
- console
|
178
|
+
- crawl
|
179
|
+
- setup
|
180
|
+
extensions: []
|
181
|
+
extra_rdoc_files: []
|
182
|
+
files:
|
183
|
+
- ".gitignore"
|
184
|
+
- ".rspec"
|
185
|
+
- ".rubocop.yml"
|
186
|
+
- ".travis.yml"
|
187
|
+
- Gemfile
|
188
|
+
- LICENSE.txt
|
189
|
+
- README.md
|
190
|
+
- Rakefile
|
191
|
+
- bin/console
|
192
|
+
- bin/crawl
|
193
|
+
- bin/setup
|
194
|
+
- browser_crawler.gemspec
|
195
|
+
- lib/browser_crawler.rb
|
196
|
+
- lib/browser_crawler/dsl/js_helpers.rb
|
197
|
+
- lib/browser_crawler/dsl/sign_in.rb
|
198
|
+
- lib/browser_crawler/engine.rb
|
199
|
+
- lib/browser_crawler/engine_utilities/crawl_manager.rb
|
200
|
+
- lib/browser_crawler/engine_utilities/inspect_page_process.rb
|
201
|
+
- lib/browser_crawler/engine_utilities/link_inspector.rb
|
202
|
+
- lib/browser_crawler/engine_utilities/link_scanner.rb
|
203
|
+
- lib/browser_crawler/engine_utilities/page_inspector.rb
|
204
|
+
- lib/browser_crawler/errors/invalid_hooks_type.rb
|
205
|
+
- lib/browser_crawler/followups/screenshots_indexer.rb
|
206
|
+
- lib/browser_crawler/followups/templates/index.html.erb
|
207
|
+
- lib/browser_crawler/followups/wraith_integrator.rb
|
208
|
+
- lib/browser_crawler/hooks_container.rb
|
209
|
+
- lib/browser_crawler/hooks_operator.rb
|
210
|
+
- lib/browser_crawler/options.rb
|
211
|
+
- lib/browser_crawler/report_factory.rb
|
212
|
+
- lib/browser_crawler/reports/csv_report.rb
|
213
|
+
- lib/browser_crawler/reports/store.rb
|
214
|
+
- lib/browser_crawler/reports/yaml_report.rb
|
215
|
+
- lib/browser_crawler/screenshot_operator.rb
|
216
|
+
- lib/browser_crawler/support/capybara.rb
|
217
|
+
- lib/browser_crawler/url_tools.rb
|
218
|
+
- lib/browser_crawler/version.rb
|
219
|
+
homepage: https://github.com/DimaSamodurov/browser_crawler
|
220
|
+
licenses:
|
221
|
+
- MIT
|
222
|
+
metadata:
|
223
|
+
homepage_uri: https://github.com/DimaSamodurov/browser_crawler
|
224
|
+
source_code_uri: https://github.com/DimaSamodurov/browser_crawler
|
225
|
+
post_install_message:
|
226
|
+
rdoc_options: []
|
227
|
+
require_paths:
|
228
|
+
- lib
|
229
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
230
|
+
requirements:
|
231
|
+
- - ">="
|
232
|
+
- !ruby/object:Gem::Version
|
233
|
+
version: 2.5.0
|
234
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
235
|
+
requirements:
|
236
|
+
- - ">="
|
237
|
+
- !ruby/object:Gem::Version
|
238
|
+
version: '0'
|
239
|
+
requirements: []
|
240
|
+
rubygems_version: 3.0.1
|
241
|
+
signing_key:
|
242
|
+
specification_version: 4
|
243
|
+
summary: Simple site crawler using Capybara
|
244
|
+
test_files: []
|