recluse 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.rubocop.yml +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +235 -0
- data/Rakefile +48 -0
- data/exe/recluse +5 -0
- data/lib/recluse.rb +7 -0
- data/lib/recluse/cli/blacklist.rb +59 -0
- data/lib/recluse/cli/main.rb +287 -0
- data/lib/recluse/cli/profile.rb +117 -0
- data/lib/recluse/cli/roots.rb +59 -0
- data/lib/recluse/cli/whitelist.rb +59 -0
- data/lib/recluse/hashtree.rb +172 -0
- data/lib/recluse/info.rb +9 -0
- data/lib/recluse/link.rb +89 -0
- data/lib/recluse/profile.rb +292 -0
- data/lib/recluse/result.rb +42 -0
- data/lib/recluse/statuscode.rb +91 -0
- data/recluse.gemspec +34 -0
- metadata +233 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 9bbde60ecb2e16552e859d1bb4ccf9bf75f7c52f
|
4
|
+
data.tar.gz: 7783561aaa9cdb24f40be6c5a328102b44b76e48
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: eb9b94b6e6b15abf67b23b8ac27ea0206ad6dfb053377f68d441236e2dfc5bcece512e3b3ed21ff51ef3ea637f35f510801279f6714462fa1a5331d3dd5bf930
|
7
|
+
data.tar.gz: b2566d865fa51d7bd365fa92ce25807954fc8cd9330b8b75cbb5c46b242b583d57f6f78714a00687ce43dd8349be3c808439a150a07c8f8b8f0a19db626e3ab0
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 James Anthony Bruno
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,235 @@
|
|
1
|
+
# Recluse
|
2
|
+
|
3
|
+
**Recluse** is a web crawler meant to ease quality assurance. Currently, it has three crawling tests:
|
4
|
+
|
5
|
+
- **Status**—checks the [HTTP status codes](https://en.wikipedia.org/wiki/List_of_HTTP_status_codes) of links on the site. Good for detecting broken links.
|
6
|
+
- **Find**—finds pages with links matching the pattern. Good for ensuring that references to a page are removed or renamed.
|
7
|
+
- **Assert**—checks pages for the existence of HTML elements. Good for asserting that things are consistent across pages.
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add this line to your application's Gemfile:
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
gem 'recluse'
|
15
|
+
```
|
16
|
+
|
17
|
+
And then execute:
|
18
|
+
|
19
|
+
$ bundle
|
20
|
+
|
21
|
+
Or install it yourself as:
|
22
|
+
|
23
|
+
$ gem install recluse
|
24
|
+
|
25
|
+
## Profiles
|
26
|
+
|
27
|
+
Recluse depends on creating profiles for your sites. This way, the configuration can be reusable for frequent quality assurance checks. Profiles are saved as YAML files (.yaml) in `~/.recluse/` and have the following format:
|
28
|
+
|
29
|
+
```yaml
|
30
|
+
---
|
31
|
+
name: profile_name
|
32
|
+
roots:
|
33
|
+
- http://example.com/
|
34
|
+
- http://anotherroot.biz/subdir
|
35
|
+
email: email@domain.com
|
36
|
+
blacklist:
|
37
|
+
- http://example.com/dontgohere/*
|
38
|
+
whitelist:
|
39
|
+
- http://example.com/dontgohere/unlessitshere/*
|
40
|
+
internal_only: false
|
41
|
+
scheme_squash: false
|
42
|
+
redirect: false
|
43
|
+
```
|
44
|
+
|
45
|
+
### Profile options
|
46
|
+
|
47
|
+
| Name | Required | Type | Default | Description |
|
48
|
+
|------|----------|------|---------|-------------|
|
49
|
+
| name | Yes | String | | The name of your profile for identification. Should also match the filename (i.e., `site` has filename `site.yaml`). |
|
50
|
+
| roots | Yes | Array of URLs | | The roots to start from for spidering. Will spider all subdirectories and files. |
|
51
|
+
| email | Yes | String | | Your email. This is for identification of who is crawling a web page in case a system administrator has issues with it. |
|
52
|
+
| blacklist | No | Array of globs | Empty array | [Glob patterns](https://en.wikipedia.org/wiki/Glob_(programming)) of sites not to spider. Useful to keep Recluse focused only on the important stuff. |
|
53
|
+
| whitelist | No | Array of globs | Empty array | [Glob patterns](https://en.wikipedia.org/wiki/Glob_(programming)) of sites to spider, even if they are blacklisted. |
|
54
|
+
| internal_only | No | Boolean | `false` | If true, Recluse will not follow external links. If false, it will follow for the **status** mode. |
|
55
|
+
| scheme_squash | No | Boolean | `false` | Treats "http" URLs the same as "https". This way, Recluse will not redundantly spider secure and nonsecure duplicates of the same page. |
|
56
|
+
| redirect | No | Boolean | `false` | Follow the redirect to the resulting page if true. |
|
57
|
+
|
58
|
+
## Use
|
59
|
+
|
60
|
+
After installation, the `recluse` executable should be available for your command line.
|
61
|
+
|
62
|
+
### Tests
|
63
|
+
|
64
|
+
#### Status
|
65
|
+
|
66
|
+
Spiders through the profile and reports the [HTTP status codes](https://en.wikipedia.org/wiki/List_of_HTTP_status_codes) of the links. If the profile is not internal only, external links will also have their statuses checked.
|
67
|
+
|
68
|
+
$ recluse status csv_path profile1 [profile2] ... [options]
|
69
|
+
|
70
|
+
| Argument | Alias | Required | Type | Default | Description |
|
71
|
+
|----------|-------|----------|------|---------|-------------|
|
72
|
+
| csv_path | | Yes | String | | The path of where to save results. Results are saved as CSV (comma-separated values). |
|
73
|
+
| profiles | | Yes | Array of profile names | | List of profiles to check. More than one profile can be checked in one run. |
|
74
|
+
| group_by | `--group-by`<br/>`-g` | No | One of `none` or `url` | `none` | What to group by in the result output. If `none`, there will be a row for each pair of checked URL and the page it was found on. If `url`, there will be one row for each URL, and the page cell will have a list of every page the URL was found on. |
|
75
|
+
| include | `--include`<br/>`-i` | No | Array of status codes | Include all | Include these status codes in the results. Can be a specific number (ex: `200`) or a wildcard (ex: `2xx`). You can also include `idk` for pages that result in errors that prevent status code detection. |
|
76
|
+
| exclude | `--exclude`<br/>`-x` | No | Array of status codes | Exclude none | Exclude these status codes from the results. Same format as including. |
|
77
|
+
|
78
|
+
##### Output format
|
79
|
+
|
80
|
+
```csv
|
81
|
+
Status code,URL,On page,With error
|
82
|
+
```
|
83
|
+
|
84
|
+
#### Find
|
85
|
+
|
86
|
+
Spiders through the profiles and checks if a link matching one of the provided patterns is found. Will only go over internal pages.
|
87
|
+
|
88
|
+
$ recluse find csv_path profile1 [profile2] ... --globs pattern1 [pattern2] ... [options]
|
89
|
+
|
90
|
+
| Argument | Alias | Required | Type | Default | Description |
|
91
|
+
|----------|-------|----------|------|---------|-------------|
|
92
|
+
| csv_path | | Yes | String | | The path of where to save results. Results are saved as CSV (comma-separated values). |
|
93
|
+
| profiles | | Yes | Array of profile names | | List of profiles to check. More than one profile can be checked in one run. |
|
94
|
+
| globs | `--globs`<br/>`-G` | Yes | Array of globs | | [Glob patterns](https://en.wikipedia.org/wiki/Glob_(programming)) to find as URLs of links on the page. |
|
95
|
+
| group_by | `--group-by`<br/>`-g` | No | One of `none`, `url`, or `page` | `none` | What to group by in the result output. If `none`, there will be a row for each pair of checked URL and the page it was found on. If `url`, there will be one row for each URL, and the page cell will have a list of every page the URL was found on. If `page`, there will be one row for each page, and the URL cell will list every matching URL found on the page. |
|
96
|
+
|
97
|
+
##### Output format
|
98
|
+
|
99
|
+
###### Group by `none` or `url`
|
100
|
+
|
101
|
+
```csv
|
102
|
+
Matching URLs,Pages
|
103
|
+
```
|
104
|
+
|
105
|
+
###### Group by `page`
|
106
|
+
|
107
|
+
```csv
|
108
|
+
Page,Matching URLs
|
109
|
+
```
|
110
|
+
|
111
|
+
#### Assert
|
112
|
+
|
113
|
+
Asserts the existence of an HTML element using [CSS-style selectors](https://developer.mozilla.org/en-US/docs/Web/Guide/CSS/Getting_started/Selectors). Will only check internal pages.
|
114
|
+
|
115
|
+
$ recluse assert csv_path profile1 [profile2] ... --exists selector1 [selector2] ...
|
116
|
+
|
117
|
+
| Argument | Alias | Required | Type | Default | Description |
|
118
|
+
|----------|-------|----------|------|---------|-------------|
|
119
|
+
| csv_path | | Yes | String | | The path of where to save results. Results are saved as CSV (comma-separated values). |
|
120
|
+
| profiles | | Yes | Array of profile names | | List of profiles to check. More than one profile can be checked in one run. |
|
121
|
+
| true | `--true`<br/>`--report-true-only` | No | Boolean | `false` | Report only true assertions. Reports both true and false assertions by default. |
|
122
|
+
| false | `--false`<br/>`--report-false-only` | No | Boolean | `false` | Report only false assertions. Reports both true and false assertions by default. |
|
123
|
+
| exists | `--exists`<br/>`-e` | Yes | Array of CSS selectors | | CSS selectors to assert the existence of on each spidered page. |
|
124
|
+
|
125
|
+
##### Output format
|
126
|
+
|
127
|
+
```csv
|
128
|
+
Selector,Exists,On page
|
129
|
+
```
|
130
|
+
|
131
|
+
### Profile management
|
132
|
+
|
133
|
+
#### Where
|
134
|
+
|
135
|
+
Path where the profiles are stored for manual edits.
|
136
|
+
|
137
|
+
$ recluse where
|
138
|
+
|
139
|
+
#### Creation
|
140
|
+
|
141
|
+
Create a profile.
|
142
|
+
|
143
|
+
$ recluse profile create [options] name email root1 [root2] ...
|
144
|
+
|
145
|
+
For further description of the arguments, check the **Profile options** section.
|
146
|
+
|
147
|
+
| Argument | Alias | Required | Type | Default |
|
148
|
+
|----------|-------|----------|------|---------|
|
149
|
+
| name | | Yes | String | |
|
150
|
+
| email | | Yes | String | |
|
151
|
+
| roots | | Yes | Array of strings | |
|
152
|
+
| blacklist | `--blacklist` | No | Array of globs | Empty array |
|
153
|
+
| whitelist | `--whitelist` | No | Array of globs | Empty array |
|
154
|
+
| internal_only | `--internal_only`<br/>`--no-internal-only` | No | Boolean | `false` |
|
155
|
+
| scheme_squash | `--scheme-squash`<br/>`--no-scheme-squash` | No | Boolean | `false` |
|
156
|
+
| redirect | `--redirect`<br/>`--no-redirect` | No | Boolean | `false` |
|
157
|
+
|
158
|
+
#### Edit
|
159
|
+
|
160
|
+
Edit profile options. Any option not provided will stay as it was.
|
161
|
+
|
162
|
+
$ recluse profile edit name [options]
|
163
|
+
|
164
|
+
| Argument | Alias | Required | Type |
|
165
|
+
|----------|-------|----------|------|
|
166
|
+
| name | | Yes | String |
|
167
|
+
| email | `--email` | No | String |
|
168
|
+
| roots | `--roots` | No | Array of strings |
|
169
|
+
| blacklist | `--blacklist` | No | Array of globs |
|
170
|
+
| whitelist | `--whitelist` | No | Array of globs |
|
171
|
+
| internal_only | `--internal_only`<br/>`--no-internal-only` | No | Boolean |
|
172
|
+
| scheme_squash | `--scheme-squash`<br/>`--no-scheme-squash` | No | Boolean |
|
173
|
+
| redirect | `--redirect`<br/>`--no-redirect` | No | Boolean |
|
174
|
+
|
175
|
+
##### Blacklist, whitelist, and roots
|
176
|
+
|
177
|
+
More powerful blacklist and whitelist editing. All examples are interchangeable between the three list types. However, if the profile has no roots, it will not run.
|
178
|
+
|
179
|
+
###### Add
|
180
|
+
|
181
|
+
Add patterns/roots to the profile's list.
|
182
|
+
|
183
|
+
$ recluse profile blacklist add name new_thing1 [new_thing2] ...
|
184
|
+
|
185
|
+
###### Remove
|
186
|
+
|
187
|
+
Remove patterns/roots from the profile's list.
|
188
|
+
|
189
|
+
$ recluse profile blacklist remove name thing1 [thing2] ...
|
190
|
+
|
191
|
+
###### Clear
|
192
|
+
|
193
|
+
Remove all patterns/roots from the profile's list.
|
194
|
+
|
195
|
+
$ recluse profile blacklist clear name
|
196
|
+
|
197
|
+
###### List
|
198
|
+
|
199
|
+
List the patterns/roots in the profile's list.
|
200
|
+
|
201
|
+
$ recluse profile blacklist list name
|
202
|
+
|
203
|
+
#### Remove
|
204
|
+
|
205
|
+
Delete a profile.
|
206
|
+
|
207
|
+
$ recluse profile remove name
|
208
|
+
|
209
|
+
#### Rename
|
210
|
+
|
211
|
+
Rename a profile.
|
212
|
+
|
213
|
+
$ recluse profile rename old_name new_name
|
214
|
+
|
215
|
+
#### List
|
216
|
+
|
217
|
+
List all profiles.
|
218
|
+
|
219
|
+
$ recluse profile list
|
220
|
+
|
221
|
+
#### Info
|
222
|
+
|
223
|
+
List the YAML info of the profile.
|
224
|
+
|
225
|
+
$ recluse profile info name
|
226
|
+
|
227
|
+
## Contributing
|
228
|
+
|
229
|
+
Bug reports and pull requests are welcome on GitHub.
|
230
|
+
|
231
|
+
|
232
|
+
## License
|
233
|
+
|
234
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
235
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
task default: :spec
|
6
|
+
|
7
|
+
task :rubocop do
|
8
|
+
sh('rubocop -aSE') { |ok, res| }
|
9
|
+
end
|
10
|
+
|
11
|
+
namespace :test do
|
12
|
+
desc 'Setup test environment'
|
13
|
+
task :before do
|
14
|
+
$stderr.puts 'Starting server'
|
15
|
+
out_log = './test/site/logs/httpd-out.log'
|
16
|
+
err_log = './test/site/logs/httpd-err.log'
|
17
|
+
FileUtils.touch(out_log) unless File.exist?(out_log)
|
18
|
+
FileUtils.touch(err_log) unless File.exist?(err_log)
|
19
|
+
|
20
|
+
@server = Process.spawn(
|
21
|
+
'ruby -run -e httpd ./test/site/ -p 9533',
|
22
|
+
in: :close,
|
23
|
+
out: out_log,
|
24
|
+
err: err_log
|
25
|
+
)
|
26
|
+
sleep 1
|
27
|
+
end
|
28
|
+
|
29
|
+
Rake::TestTask.new(:run) do |t|
|
30
|
+
t.pattern = 'test/*.rb'
|
31
|
+
t.verbose = false
|
32
|
+
end
|
33
|
+
|
34
|
+
desc 'Teardown test environment'
|
35
|
+
task :after do
|
36
|
+
$stderr.puts 'Stopping server'
|
37
|
+
Process.kill 'TERM', @server
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
task :test do
|
42
|
+
Rake::Task['test:before'].invoke
|
43
|
+
begin
|
44
|
+
Rake::Task['test:run'].invoke
|
45
|
+
ensure
|
46
|
+
Rake::Task['test:after'].invoke
|
47
|
+
end
|
48
|
+
end
|
data/exe/recluse
ADDED
data/lib/recluse.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 'user_config'
|
3
|
+
|
4
|
+
module Recluse
|
5
|
+
module CLI
|
6
|
+
##
|
7
|
+
# Blacklist related commands.
|
8
|
+
class Blacklist < Thor #:nodoc: all
|
9
|
+
desc 'add profile pattern1 [pattern2] ...', 'add glob patterns to blacklist'
|
10
|
+
def add(name, *patterns)
|
11
|
+
uconf = UserConfig.new '.recluse'
|
12
|
+
unless uconf.exist?("#{name}.yaml")
|
13
|
+
puts "Profile #{name} doesn't exist"
|
14
|
+
exit(-1)
|
15
|
+
end
|
16
|
+
profile = uconf["#{name}.yaml"]
|
17
|
+
if profile.key?('blacklist')
|
18
|
+
profile['blacklist'] += patterns
|
19
|
+
else
|
20
|
+
profile['blacklist'] = patterns
|
21
|
+
end
|
22
|
+
profile.save
|
23
|
+
end
|
24
|
+
desc 'remove profile pattern1 [pattern2] ...', 'remove glob patterns from blacklist'
|
25
|
+
def remove(name, *patterns)
|
26
|
+
uconf = UserConfig.new '.recluse'
|
27
|
+
unless uconf.exist?("#{name}.yaml")
|
28
|
+
puts "Profile #{name} doesn't exist"
|
29
|
+
exit(-1)
|
30
|
+
end
|
31
|
+
profile = uconf["#{name}.yaml"]
|
32
|
+
return unless profile.key?('blacklist')
|
33
|
+
profile['blacklist'] -= patterns
|
34
|
+
profile.save
|
35
|
+
end
|
36
|
+
desc 'clear profile', 'remove all patterns in the blacklist'
|
37
|
+
def clear(name)
|
38
|
+
uconf = UserConfig.new '.recluse'
|
39
|
+
unless uconf.exist?("#{name}.yaml")
|
40
|
+
puts "Profile #{name} doesn't exist"
|
41
|
+
exit(-1)
|
42
|
+
end
|
43
|
+
profile = uconf["#{name}.yaml"]
|
44
|
+
profile['blacklist'] = []
|
45
|
+
profile.save
|
46
|
+
end
|
47
|
+
desc 'list profile', 'list patterns in blacklist'
|
48
|
+
def list(name)
|
49
|
+
uconf = UserConfig.new '.recluse'
|
50
|
+
unless uconf.exist?("#{name}.yaml")
|
51
|
+
puts "Profile #{name} doesn't exist"
|
52
|
+
exit(-1)
|
53
|
+
end
|
54
|
+
profile = uconf["#{name}.yaml"]
|
55
|
+
profile['blacklist'].each { |pattern| puts pattern } if profile.key?('blacklist')
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,287 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 'recluse/profile'
|
3
|
+
require 'recluse/statuscode'
|
4
|
+
require 'recluse/cli/profile'
|
5
|
+
require 'csv'
|
6
|
+
require 'user_config'
|
7
|
+
require 'set'
|
8
|
+
|
9
|
+
module Recluse
|
10
|
+
##
|
11
|
+
# Command-line interface segments.
|
12
|
+
module CLI
|
13
|
+
##
|
14
|
+
# Main commands.
|
15
|
+
class Main < Thor #:nodoc: all
|
16
|
+
no_commands do
|
17
|
+
def perc(num, den)
|
18
|
+
(num * 100.0 / den).round(2)
|
19
|
+
end
|
20
|
+
|
21
|
+
def find_save(profile, csv_path, group_by: :none)
|
22
|
+
puts "\nSaving report..."
|
23
|
+
child_count = 0
|
24
|
+
parent_count = 0
|
25
|
+
case group_by
|
26
|
+
when :page
|
27
|
+
report = profile.results.parents
|
28
|
+
CSV.open(csv_path, 'w+') do |csv|
|
29
|
+
csv << ['Page', 'Matching URLs']
|
30
|
+
report.each do |parent, children|
|
31
|
+
next if children.empty?
|
32
|
+
child_count += children.length
|
33
|
+
csv << [parent, children.join("\n")]
|
34
|
+
parent_count += 1
|
35
|
+
end
|
36
|
+
end
|
37
|
+
when :none
|
38
|
+
report = profile.results.parents
|
39
|
+
CSV.open(csv_path, 'w+') do |csv|
|
40
|
+
csv << ['Matching URL', 'Page']
|
41
|
+
report.each do |parent, children|
|
42
|
+
child_count += children.length
|
43
|
+
children.each do |child|
|
44
|
+
csv << [child, parent]
|
45
|
+
end
|
46
|
+
parent_count += 1 unless children.empty?
|
47
|
+
end
|
48
|
+
end
|
49
|
+
when :url
|
50
|
+
report = profile.results.children
|
51
|
+
CSV.open(csv_path, 'w+') do |csv|
|
52
|
+
csv << ['Matching URL', 'Pages']
|
53
|
+
parents = Set.new
|
54
|
+
report.each do |child, info|
|
55
|
+
child_count += 1
|
56
|
+
unless info[:parents].empty?
|
57
|
+
csv << [child, info[:parents].join("\n")]
|
58
|
+
parents += info[:parents]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
parent_count = parents.length
|
62
|
+
end
|
63
|
+
end
|
64
|
+
total = profile.results.parents.keys.length
|
65
|
+
puts "Total pages:\t#{total}"
|
66
|
+
puts "Matched URLs:\t#{child_count}"
|
67
|
+
puts "Pages with matches:\t#{parent_count}\t#{perc parent_count, total}%"
|
68
|
+
end
|
69
|
+
|
70
|
+
def status_save(profile, csv_path, group_by: :none, includes: [], excludes: [])
|
71
|
+
puts 'Saving report...'
|
72
|
+
counts = {}
|
73
|
+
case group_by
|
74
|
+
when :url
|
75
|
+
page_label = 'On pages'
|
76
|
+
to_csv = proc do |csv, status, child, parents, error|
|
77
|
+
csv << [status, child, parents.join("\n"), error || '']
|
78
|
+
end
|
79
|
+
when :none
|
80
|
+
page_label = 'On page'
|
81
|
+
to_csv = proc do |csv, status, child, parents, error|
|
82
|
+
parents.each do |parent|
|
83
|
+
csv << [status, child, parent, error || '']
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
valid_status = proc do |code|
|
88
|
+
(includes.any? { |include_code| include_code.equal?(code) }) && (excludes.none? { |exclude_code| exclude_code.equal?(code) })
|
89
|
+
end
|
90
|
+
report = profile.results.children
|
91
|
+
CSV.open(csv_path, 'w+') do |csv|
|
92
|
+
csv << ['Status code', 'URL', page_label, 'With error']
|
93
|
+
report.each do |child, info|
|
94
|
+
val = info[:value]
|
95
|
+
if val.nil?
|
96
|
+
status = 'idk'
|
97
|
+
error = 'Incomplete'
|
98
|
+
else
|
99
|
+
status = val.code
|
100
|
+
error = val.error
|
101
|
+
end
|
102
|
+
if valid_status.call(status)
|
103
|
+
to_csv.call(csv, status, child, info[:parents], error)
|
104
|
+
end
|
105
|
+
if counts.key?(status)
|
106
|
+
counts[status] += 1.0
|
107
|
+
else
|
108
|
+
counts[status] = 1.0
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
puts "Total:\t#{report.length}"
|
113
|
+
counts.each do |code, count|
|
114
|
+
valid = valid_status.call code
|
115
|
+
puts "#{code}:\t#{count.to_i}\t#{perc count, report.length}%\t#{valid ? 'Reported' : 'Unreported'}"
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def assert_save(profile, csv_path, report_vals)
|
120
|
+
puts 'Saving report...'
|
121
|
+
report = profile.results.children
|
122
|
+
counts = {}
|
123
|
+
CSV.open(csv_path, 'w+') do |csv|
|
124
|
+
csv << ['Selector', 'Exists', 'On page']
|
125
|
+
report.each do |child, info|
|
126
|
+
val = info[:value]
|
127
|
+
next if val.nil?
|
128
|
+
val.each do |selector, exists|
|
129
|
+
counts[selector] = { 'true' => 0, 'false' => 0 } unless counts.key? selector
|
130
|
+
counts[selector][exists.to_s] += 1
|
131
|
+
csv << [selector, exists.to_s, child] if report_vals.include?(exists)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
puts "Total pages:\t#{report.keys.length}"
|
136
|
+
counts.each do |selector, _info|
|
137
|
+
puts "#{selector}:"
|
138
|
+
puts "- True: #{counts[selector]['true']}\t#{perc counts[selector]['true'], report.keys.length}%"
|
139
|
+
puts "- False: #{counts[selector]['false']}\t#{perc counts[selector]['false'], report.keys.length}%"
|
140
|
+
unknown = report.keys.length - counts[selector]['false'] - counts[selector]['true']
|
141
|
+
puts "- Unknown: #{unknown}\t#{perc unknown, report.keys.length}%"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
method_option :group_by, type: :string, aliases: '-g', default: 'none', enum: %w(none url), desc: 'Group by key'
|
147
|
+
method_option :include, type: :array, aliases: '-i', default: ['xxx'], desc: "Include these status code results. Can be numbers or wildcards (4xx). 'idk' is a Recluse status code for when the status cannot be determined for the page."
|
148
|
+
method_option :exclude, type: :array, aliases: '-x', default: [], desc: "Exclude these status code results. Can be numbers or wildcards (4xx). 'idk' is a Recluse status code for when the status cannot be determined for the page."
|
149
|
+
desc 'status csv_path profile1 [profile2] ...', 'runs report on link statuses'
|
150
|
+
def status(csv_path, *profiles)
|
151
|
+
if profiles.empty?
|
152
|
+
puts 'No profile provided'
|
153
|
+
exit(-1)
|
154
|
+
end
|
155
|
+
begin
|
156
|
+
profile_queue = profiles.map { |profile_name| Recluse::Profile.load profile_name }
|
157
|
+
rescue ProfileError => e
|
158
|
+
puts e
|
159
|
+
exit(-1)
|
160
|
+
end
|
161
|
+
profile = profile_queue[0]
|
162
|
+
if options['group_by'] == 'page'
|
163
|
+
puts 'Page grouping only available with --find.'
|
164
|
+
exit(-1)
|
165
|
+
end
|
166
|
+
begin
|
167
|
+
includes = options[:include].map { |code| Recluse::StatusCode.new code }
|
168
|
+
if includes.empty?
|
169
|
+
puts 'No status codes'
|
170
|
+
exit(-1)
|
171
|
+
end
|
172
|
+
excludes = options[:exclude].map { |code| Recluse::StatusCode.new code }
|
173
|
+
rescue StatusCodeError => e
|
174
|
+
puts e
|
175
|
+
exit(-1)
|
176
|
+
end
|
177
|
+
ending = proc do
|
178
|
+
status_save profile, csv_path, group_by: options['group_by'].to_sym, includes: includes, excludes: excludes
|
179
|
+
exit
|
180
|
+
end
|
181
|
+
%w(INT TERM).each do |sig|
|
182
|
+
Signal.trap sig, &ending
|
183
|
+
end
|
184
|
+
(0...profile_queue.length).each do |i|
|
185
|
+
profile.results = profile_queue[i - 1].results unless i.zero?
|
186
|
+
profile.status
|
187
|
+
profile = profile_queue[i + 1] if i + 1 < profile_queue.length
|
188
|
+
end
|
189
|
+
%w(INT TERM).each do |sig|
|
190
|
+
Signal.trap sig, 'DEFAULT'
|
191
|
+
end
|
192
|
+
ending.call
|
193
|
+
end
|
194
|
+
method_option :globs, type: :array, aliases: '-G', required: true, banner: 'GLOB', desc: 'Find links matching any of the globs'
|
195
|
+
method_option :group_by, type: :string, aliases: '-g', default: 'none', enum: %w(none url page), desc: 'Group by key'
|
196
|
+
desc 'find csv_path profile1 [profile2] ... --globs glob1 [glob2] ...', 'find matching links'
|
197
|
+
def find(csv_path, *profiles)
|
198
|
+
if profiles.empty?
|
199
|
+
puts 'No profile provided'
|
200
|
+
exit(-1)
|
201
|
+
end
|
202
|
+
begin
|
203
|
+
profile_queue = profiles.map { |profile_name| Recluse::Profile.load profile_name }
|
204
|
+
rescue ProfileError => e
|
205
|
+
puts e
|
206
|
+
exit(-1)
|
207
|
+
end
|
208
|
+
profile = profile_queue[0]
|
209
|
+
has_globs = options['globs'].any? { |glob| !glob.strip.empty? }
|
210
|
+
unless has_globs
|
211
|
+
puts 'No glob patterns provided for --globs option'
|
212
|
+
exit(-1)
|
213
|
+
end
|
214
|
+
ending = proc do
|
215
|
+
find_save profile, csv_path, group_by: options['group_by'].to_sym
|
216
|
+
exit
|
217
|
+
end
|
218
|
+
%w(INT TERM).each do |sig|
|
219
|
+
Signal.trap sig, &ending
|
220
|
+
end
|
221
|
+
(0...profile_queue.length).each do |i|
|
222
|
+
profile.results = profile_queue[i - 1].results unless i.zero?
|
223
|
+
profile.find options['globs']
|
224
|
+
profile = profile_queue[i + 1] if i + 1 < profile_queue.length
|
225
|
+
end
|
226
|
+
%w(INT TERM).each do |sig|
|
227
|
+
Signal.trap sig, 'DEFAULT'
|
228
|
+
end
|
229
|
+
ending.call
|
230
|
+
end
|
231
|
+
method_option :exists, type: :array, aliases: '-e', required: true, banner: 'SELECTOR', desc: 'Assert existence of HTML elements matching CSS selector'
|
232
|
+
method_option :report_true_only, type: :boolean, aliases: '--true', default: false, desc: 'Report only true assertions. Default is to report both true and false.'
|
233
|
+
method_option :report_false_only, type: :boolean, aliases: '--false', default: false, desc: 'Report only false assertions. Default is to report both true and false.'
|
234
|
+
desc 'assert csv_path profile1 [profile2] ... [options] --exists selector1 [selector2] ...', 'assert HTML element existence'
|
235
|
+
def assert(csv_path, *profiles)
|
236
|
+
if profiles.empty?
|
237
|
+
puts 'No profile provided'
|
238
|
+
exit(-1)
|
239
|
+
end
|
240
|
+
begin
|
241
|
+
profile_queue = profiles.map { |profile_name| Recluse::Profile.load profile_name }
|
242
|
+
rescue ProfileError => e
|
243
|
+
puts e
|
244
|
+
exit(-1)
|
245
|
+
end
|
246
|
+
profile = profile_queue[0]
|
247
|
+
has_selectors = options['exists'].any? { |selector| !selector.strip.empty? }
|
248
|
+
unless has_selectors
|
249
|
+
puts 'No selector patterns provided for --exists option'
|
250
|
+
exit(-1)
|
251
|
+
end
|
252
|
+
report = []
|
253
|
+
if options[:report_false_only] == options[:report_true_only]
|
254
|
+
report = [true, false]
|
255
|
+
elsif options[:report_true_only]
|
256
|
+
report = [true]
|
257
|
+
elsif option[:report_false_only]
|
258
|
+
report = [false]
|
259
|
+
end
|
260
|
+
ending = proc do
|
261
|
+
assert_save profile, csv_path, report
|
262
|
+
exit
|
263
|
+
end
|
264
|
+
|
265
|
+
%w(INT TERM).each do |sig|
|
266
|
+
Signal.trap sig, &ending
|
267
|
+
end
|
268
|
+
(0...profile_queue.length).each do |i|
|
269
|
+
profile.results = profile_queue[i - 1].results unless i.zero?
|
270
|
+
profile.assert options['exists']
|
271
|
+
profile = profile_queue[i + 1] if i + 1 < profile_queue.length
|
272
|
+
end
|
273
|
+
%w(INT TERM).each do |sig|
|
274
|
+
Signal.trap sig, 'DEFAULT'
|
275
|
+
end
|
276
|
+
ending.call
|
277
|
+
end
|
278
|
+
desc 'where', 'location of profiles'
|
279
|
+
def where
|
280
|
+
uconf = UserConfig.new '.recluse'
|
281
|
+
puts uconf.directory
|
282
|
+
end
|
283
|
+
desc 'profile [subcommand] [options]', 'profile editor'
|
284
|
+
subcommand 'profile', Profile
|
285
|
+
end
|
286
|
+
end
|
287
|
+
end
|