crawlscope 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +31 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +323 -0
  5. data/exe/crawlscope +6 -0
  6. data/lib/crawlscope/audit.rb +128 -0
  7. data/lib/crawlscope/browser.rb +88 -0
  8. data/lib/crawlscope/cli.rb +245 -0
  9. data/lib/crawlscope/configuration.rb +123 -0
  10. data/lib/crawlscope/crawler.rb +28 -0
  11. data/lib/crawlscope/http.rb +77 -0
  12. data/lib/crawlscope/issue.rb +17 -0
  13. data/lib/crawlscope/issue_collection.rb +41 -0
  14. data/lib/crawlscope/page.rb +23 -0
  15. data/lib/crawlscope/railtie.rb +9 -0
  16. data/lib/crawlscope/reporter.rb +33 -0
  17. data/lib/crawlscope/result.rb +9 -0
  18. data/lib/crawlscope/rule_registry.rb +39 -0
  19. data/lib/crawlscope/rules/links.rb +220 -0
  20. data/lib/crawlscope/rules/metadata.rb +93 -0
  21. data/lib/crawlscope/rules/structured_data.rb +58 -0
  22. data/lib/crawlscope/rules/uniqueness.rb +88 -0
  23. data/lib/crawlscope/schema_registry.rb +431 -0
  24. data/lib/crawlscope/sitemap.rb +67 -0
  25. data/lib/crawlscope/structured_data/audit.rb +150 -0
  26. data/lib/crawlscope/structured_data/document.rb +93 -0
  27. data/lib/crawlscope/structured_data/report.rb +77 -0
  28. data/lib/crawlscope/structured_data/reporter.rb +73 -0
  29. data/lib/crawlscope/structured_data/writer.rb +26 -0
  30. data/lib/crawlscope/task.rb +131 -0
  31. data/lib/crawlscope/url.rb +43 -0
  32. data/lib/crawlscope/version.rb +5 -0
  33. data/lib/crawlscope.rb +34 -0
  34. data/lib/tasks/crawlscope_tasks.rake +44 -0
  35. data/test/crawlscope/audit_test.rb +165 -0
  36. data/test/crawlscope/cli_test.rb +157 -0
  37. data/test/crawlscope/configuration_test.rb +45 -0
  38. data/test/crawlscope/links_rule_test.rb +87 -0
  39. data/test/crawlscope/loader_test.rb +11 -0
  40. data/test/crawlscope/reporter_test.rb +50 -0
  41. data/test/crawlscope/schema_registry_test.rb +89 -0
  42. data/test/crawlscope/sitemap_test.rb +51 -0
  43. data/test/crawlscope/structured_data_audit_test.rb +118 -0
  44. data/test/crawlscope/structured_data_document_test.rb +28 -0
  45. data/test/crawlscope/structured_data_report_test.rb +37 -0
  46. data/test/crawlscope/structured_data_reporter_test.rb +32 -0
  47. data/test/crawlscope/structured_data_rule_test.rb +78 -0
  48. data/test/crawlscope/structured_data_writer_test.rb +32 -0
  49. data/test/crawlscope/task_test.rb +206 -0
  50. data/test/crawlscope/uniqueness_rule_test.rb +46 -0
  51. data/test/test_helper.rb +23 -0
  52. metadata +271 -0
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "stringio"
4
+ require "test_helper"
5
+
6
+ class CrawlscopeStructuredDataReporterTest < Minitest::Test
7
+ def test_reports_failures_and_report_path
8
+ result = Crawlscope::StructuredData::Audit::Result.new(
9
+ entries: [
10
+ Crawlscope::StructuredData::Audit::Entry.new(
11
+ url: "https://example.com/article",
12
+ status: 200,
13
+ structured_items: [{source: "json-ld", data: {"@type" => "Article"}}],
14
+ errors: [{type: "Article", source: "json-ld", errors: [{field: "headline", issue: "is required"}]}],
15
+ fetch_error: nil,
16
+ content_type: "text/html",
17
+ skipped_reason: nil
18
+ )
19
+ ]
20
+ )
21
+ io = StringIO.new
22
+
23
+ Crawlscope::StructuredData::Reporter.new(io: io, report_path: "/tmp/structured_data_report.json").report(result)
24
+
25
+ output = io.string
26
+
27
+ assert_includes output, "VALIDATION FAILED"
28
+ assert_includes output, "VALIDATION ERRORS (1):"
29
+ assert_includes output, "headline: is required"
30
+ assert_includes output, "/tmp/structured_data_report.json"
31
+ end
32
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "test_helper"
4
+
5
+ class CrawlscopeStructuredDataRuleTest < Minitest::Test
6
+ def test_reports_schema_errors_for_invalid_article_markup
7
+ issues = Crawlscope::IssueCollection.new
8
+ rule = Crawlscope::Rules::StructuredData.new
9
+ page = page(
10
+ url: "https://example.com/articles/test",
11
+ body: <<~HTML
12
+ <html>
13
+ <head>
14
+ <script type="application/ld+json">
15
+ {"@context":"https://schema.org","@type":"Article"}
16
+ </script>
17
+ </head>
18
+ <body>
19
+ <main><h1>Article</h1></main>
20
+ </body>
21
+ </html>
22
+ HTML
23
+ )
24
+
25
+ rule.call(
26
+ urls: [page.url],
27
+ pages: [page],
28
+ issues: issues,
29
+ context: {schema_registry: Crawlscope::SchemaRegistry.default}
30
+ )
31
+
32
+ assert_equal [:structured_data_schema_error], issues.to_a.map(&:code)
33
+ assert_includes issues.to_a.first.message, "headline"
34
+ end
35
+
36
+ def test_reports_parse_errors_for_invalid_json_ld
37
+ issues = Crawlscope::IssueCollection.new
38
+ rule = Crawlscope::Rules::StructuredData.new
39
+ page = page(
40
+ url: "https://example.com/articles/test",
41
+ body: <<~HTML
42
+ <html>
43
+ <head>
44
+ <script type="application/ld+json">
45
+ {"@context":"https://schema.org","@type":"Article"
46
+ </script>
47
+ </head>
48
+ </html>
49
+ HTML
50
+ )
51
+
52
+ rule.call(
53
+ urls: [page.url],
54
+ pages: [page],
55
+ issues: issues,
56
+ context: {schema_registry: Crawlscope::SchemaRegistry.default}
57
+ )
58
+
59
+ assert_equal [:structured_data_parse_error], issues.to_a.map(&:code)
60
+ end
61
+
62
+ private
63
+
64
+ def page(url:, body:)
65
+ doc = Nokogiri::HTML(body)
66
+
67
+ Crawlscope::Page.new(
68
+ url: url,
69
+ normalized_url: url,
70
+ final_url: url,
71
+ normalized_final_url: url,
72
+ status: 200,
73
+ headers: {"content-type" => "text/html"},
74
+ body: body,
75
+ doc: doc
76
+ )
77
+ end
78
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "test_helper"
5
+
6
+ class CrawlscopeStructuredDataWriterTest < Minitest::Test
7
+ def test_writes_json_report
8
+ result = Crawlscope::StructuredData::Audit::Result.new(
9
+ entries: [
10
+ Crawlscope::StructuredData::Audit::Entry.new(
11
+ url: "https://example.com/article",
12
+ status: 200,
13
+ structured_items: [{source: "json-ld", data: {"@type" => "Article"}}],
14
+ errors: [],
15
+ fetch_error: nil,
16
+ content_type: "text/html",
17
+ skipped_reason: nil
18
+ )
19
+ ]
20
+ )
21
+ tmp_dir = Dir.mktmpdir
22
+ path = File.join(tmp_dir, "structured_data_report.json")
23
+
24
+ Crawlscope::StructuredData::Writer.new(path: path).write(result)
25
+
26
+ payload = JSON.parse(File.read(path))
27
+ assert payload["generated_at"]
28
+ assert_equal 1, payload["results"]["https://example.com/article"]["json_ld_count"]
29
+ ensure
30
+ FileUtils.rm_rf(tmp_dir) if tmp_dir
31
+ end
32
+ end
@@ -0,0 +1,206 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "test_helper"
4
+
5
+ class CrawlscopeTaskTest < Minitest::Test
6
+ FakeResult = Data.define(:reported) do
7
+ def ok?
8
+ true
9
+ end
10
+ end
11
+
12
+ class FakeReporter
13
+ attr_reader :result
14
+
15
+ def report(result)
16
+ @result = result
17
+ end
18
+ end
19
+
20
+ class FakeAudit
21
+ def initialize(result:)
22
+ @result = result
23
+ end
24
+
25
+ def call
26
+ @result
27
+ end
28
+ end
29
+
30
+ class FakeConfiguration
31
+ attr_reader :base_url, :received_arguments, :sitemap_path
32
+
33
+ def initialize(result:, base_url: "https://example.com", sitemap_path: "/tmp/sitemap.xml")
34
+ @result = result
35
+ @base_url = base_url
36
+ @sitemap_path = sitemap_path
37
+ end
38
+
39
+ def audit(base_url:, sitemap_path:, rule_names:)
40
+ @received_arguments = {
41
+ base_url: base_url,
42
+ sitemap_path: sitemap_path,
43
+ rule_names: rule_names
44
+ }
45
+
46
+ FakeAudit.new(result: @result)
47
+ end
48
+ end
49
+
50
+ class LdjsonConfiguration
51
+ attr_reader :output
52
+
53
+ def initialize(output:, browser:)
54
+ @output = output
55
+ @browser = browser
56
+ end
57
+
58
+ def browser_factory
59
+ -> { @browser }
60
+ end
61
+
62
+ def network_idle_timeout_seconds
63
+ 5
64
+ end
65
+
66
+ def renderer
67
+ :browser
68
+ end
69
+
70
+ def schema_registry
71
+ Crawlscope::SchemaRegistry.default
72
+ end
73
+
74
+ def scroll_page?
75
+ false
76
+ end
77
+
78
+ def timeout_seconds
79
+ 20
80
+ end
81
+ end
82
+
83
+ class FakeBrowser
84
+ attr_reader :closed
85
+
86
+ def initialize(page:)
87
+ @page = page
88
+ @closed = false
89
+ end
90
+
91
+ def close
92
+ @closed = true
93
+ end
94
+
95
+ def fetch(_url)
96
+ @page
97
+ end
98
+ end
99
+
100
+ def test_validate_passes_rule_names_to_configuration_audit
101
+ result = FakeResult.new(reported: true)
102
+ configuration = FakeConfiguration.new(result: result)
103
+ reporter = FakeReporter.new
104
+
105
+ task = Crawlscope::Task.new(configuration: configuration, reporter: reporter)
106
+ returned_result = task.validate(rule_names: "links")
107
+
108
+ assert_equal(
109
+ {
110
+ base_url: "https://example.com",
111
+ sitemap_path: "/tmp/sitemap.xml",
112
+ rule_names: "links"
113
+ },
114
+ configuration.received_arguments
115
+ )
116
+ assert_same result, reporter.result
117
+ assert_same result, returned_result
118
+ end
119
+
120
+ def test_validate_defaults_to_base_url_sitemap_when_not_configured
121
+ result = FakeResult.new(reported: true)
122
+ configuration = FakeConfiguration.new(result: result, base_url: "https://example.com", sitemap_path: nil)
123
+ reporter = FakeReporter.new
124
+
125
+ Crawlscope::Task.new(configuration: configuration, reporter: reporter).validate
126
+
127
+ assert_equal(
128
+ {
129
+ base_url: "https://example.com",
130
+ sitemap_path: "https://example.com/sitemap.xml",
131
+ rule_names: nil
132
+ },
133
+ configuration.received_arguments
134
+ )
135
+ end
136
+
137
+ def test_validate_prefers_local_sitemap_for_localhost
138
+ result = FakeResult.new(reported: true)
139
+ configuration = FakeConfiguration.new(result: result, base_url: "http://localhost:3000", sitemap_path: nil)
140
+ reporter = FakeReporter.new
141
+ tmp_dir = Dir.mktmpdir
142
+ sitemap_path = File.join(tmp_dir, "public", "sitemap.xml")
143
+ FileUtils.mkdir_p(File.dirname(sitemap_path))
144
+ File.write(sitemap_path, "<urlset></urlset>")
145
+
146
+ Dir.chdir(tmp_dir) do
147
+ Crawlscope::Task.new(configuration: configuration, reporter: reporter).validate
148
+ end
149
+
150
+ assert_equal(
151
+ {
152
+ base_url: "http://localhost:3000",
153
+ sitemap_path: sitemap_path,
154
+ rule_names: nil
155
+ },
156
+ configuration.received_arguments
157
+ )
158
+ ensure
159
+ FileUtils.rm_rf(tmp_dir) if tmp_dir
160
+ end
161
+
162
+ def test_validate_ldjson_uses_real_audit_and_writes_report
163
+ body = <<~HTML
164
+ <html>
165
+ <head>
166
+ <script type="application/ld+json">
167
+ {"@type":"WebSite","name":"Example","url":"https://example.com"}
168
+ </script>
169
+ </head>
170
+ </html>
171
+ HTML
172
+ page = Crawlscope::Page.new(
173
+ url: "https://example.com",
174
+ normalized_url: "https://example.com",
175
+ final_url: "https://example.com",
176
+ normalized_final_url: "https://example.com",
177
+ status: 200,
178
+ headers: {"content-type" => "text/html"},
179
+ body: body,
180
+ doc: Nokogiri::HTML(body)
181
+ )
182
+ browser = FakeBrowser.new(page: page)
183
+ output = StringIO.new
184
+ configuration = LdjsonConfiguration.new(output: output, browser: browser)
185
+ report_dir = Dir.mktmpdir
186
+ report_path = File.join(report_dir, "structured-data.json")
187
+
188
+ result = Crawlscope::Task.new(configuration: configuration).validate_ldjson(
189
+ urls: [page.url],
190
+ debug: true,
191
+ report_path: report_path,
192
+ summary: true
193
+ )
194
+
195
+ assert result.ok?
196
+ assert browser.closed
197
+ assert File.exist?(report_path)
198
+ assert_includes File.read(report_path), "https://example.com"
199
+ assert_includes output.string, "JavaScript mode enabled (Ferrum)"
200
+ assert_includes output.string, "Validating JSON-LD on 1 URL(s)"
201
+ assert_includes output.string, "All valid!"
202
+ assert_includes output.string, "All 1 URLs passed validation."
203
+ ensure
204
+ FileUtils.rm_rf(report_dir) if report_dir
205
+ end
206
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "test_helper"
4
+
5
+ class CrawlscopeUniquenessRuleTest < Minitest::Test
6
+ def test_reports_duplicate_title_description_and_content
7
+ issues = Crawlscope::IssueCollection.new
8
+ rule = Crawlscope::Rules::Uniqueness.new
9
+ pages = [
10
+ page(url: "https://example.com/a"),
11
+ page(url: "https://example.com/b")
12
+ ]
13
+
14
+ rule.call(urls: pages.map(&:url), pages: pages, issues: issues, context: {})
15
+
16
+ assert_equal %i[duplicate_content_fingerprint duplicate_meta_description duplicate_title].sort, issues.to_a.map(&:code).sort
17
+ end
18
+
19
+ private
20
+
21
+ def page(url:)
22
+ repeated_text = ("Useful content " * 30).strip
23
+ body = <<~HTML
24
+ <html>
25
+ <head>
26
+ <title>Example Title</title>
27
+ <meta name="description" content="Example description">
28
+ </head>
29
+ <body>
30
+ <main>#{repeated_text}</main>
31
+ </body>
32
+ </html>
33
+ HTML
34
+
35
+ Crawlscope::Page.new(
36
+ url: url,
37
+ normalized_url: url,
38
+ final_url: url,
39
+ normalized_final_url: url,
40
+ status: 200,
41
+ headers: {"content-type" => "text/html"},
42
+ body: body,
43
+ doc: Nokogiri::HTML(body)
44
+ )
45
+ end
46
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift File.expand_path("../lib", __dir__)
4
+
5
+ require "simplecov"
6
+
7
+ SimpleCov.start do
8
+ enable_coverage :branch
9
+ add_filter "/test/"
10
+ end
11
+
12
+ require "minitest/autorun"
13
+ require "tmpdir"
14
+ require "date"
15
+ require "fileutils"
16
+ require "nokogiri"
17
+ require "webmock/minitest"
18
+
19
+ require "crawlscope"
20
+
21
+ if defined?(JSON::Validator)
22
+ JSON::Validator.use_multi_json = false
23
+ end
metadata ADDED
@@ -0,0 +1,271 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: crawlscope
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Paulo Fidalgo
8
+ - Ethos Link
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 1980-01-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: concurrent-ruby
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: faraday
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '2.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '2.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: faraday-follow_redirects
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0.3'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: json-schema
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '5.0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '5.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: nokogiri
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '1.16'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '1.16'
83
+ - !ruby/object:Gem::Dependency
84
+ name: railties
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '7.1'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '7.1'
97
+ - !ruby/object:Gem::Dependency
98
+ name: zeitwerk
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '2.6'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '2.6'
111
+ - !ruby/object:Gem::Dependency
112
+ name: minitest
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '5.0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '5.0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rake
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '13.0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '13.0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: simplecov
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '0.22'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '0.22'
153
+ - !ruby/object:Gem::Dependency
154
+ name: standard
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: '1.0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: '1.0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: webmock
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '3.0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '3.0'
181
+ description: A small Ruby gem for sitemap-driven SEO validation with structured issues,
182
+ configurable rules and schema registries, optional browser rendering, and Rails
183
+ rake task integration.
184
+ email:
185
+ - devel@ethos-link.com
186
+ executables:
187
+ - crawlscope
188
+ extensions: []
189
+ extra_rdoc_files: []
190
+ files:
191
+ - CHANGELOG.md
192
+ - LICENSE.txt
193
+ - README.md
194
+ - exe/crawlscope
195
+ - lib/crawlscope.rb
196
+ - lib/crawlscope/audit.rb
197
+ - lib/crawlscope/browser.rb
198
+ - lib/crawlscope/cli.rb
199
+ - lib/crawlscope/configuration.rb
200
+ - lib/crawlscope/crawler.rb
201
+ - lib/crawlscope/http.rb
202
+ - lib/crawlscope/issue.rb
203
+ - lib/crawlscope/issue_collection.rb
204
+ - lib/crawlscope/page.rb
205
+ - lib/crawlscope/railtie.rb
206
+ - lib/crawlscope/reporter.rb
207
+ - lib/crawlscope/result.rb
208
+ - lib/crawlscope/rule_registry.rb
209
+ - lib/crawlscope/rules/links.rb
210
+ - lib/crawlscope/rules/metadata.rb
211
+ - lib/crawlscope/rules/structured_data.rb
212
+ - lib/crawlscope/rules/uniqueness.rb
213
+ - lib/crawlscope/schema_registry.rb
214
+ - lib/crawlscope/sitemap.rb
215
+ - lib/crawlscope/structured_data/audit.rb
216
+ - lib/crawlscope/structured_data/document.rb
217
+ - lib/crawlscope/structured_data/report.rb
218
+ - lib/crawlscope/structured_data/reporter.rb
219
+ - lib/crawlscope/structured_data/writer.rb
220
+ - lib/crawlscope/task.rb
221
+ - lib/crawlscope/url.rb
222
+ - lib/crawlscope/version.rb
223
+ - lib/tasks/crawlscope_tasks.rake
224
+ - test/crawlscope/audit_test.rb
225
+ - test/crawlscope/cli_test.rb
226
+ - test/crawlscope/configuration_test.rb
227
+ - test/crawlscope/links_rule_test.rb
228
+ - test/crawlscope/loader_test.rb
229
+ - test/crawlscope/reporter_test.rb
230
+ - test/crawlscope/schema_registry_test.rb
231
+ - test/crawlscope/sitemap_test.rb
232
+ - test/crawlscope/structured_data_audit_test.rb
233
+ - test/crawlscope/structured_data_document_test.rb
234
+ - test/crawlscope/structured_data_report_test.rb
235
+ - test/crawlscope/structured_data_reporter_test.rb
236
+ - test/crawlscope/structured_data_rule_test.rb
237
+ - test/crawlscope/structured_data_writer_test.rb
238
+ - test/crawlscope/task_test.rb
239
+ - test/crawlscope/uniqueness_rule_test.rb
240
+ - test/test_helper.rb
241
+ homepage: https://www.ethos-link.com/opensource/crawlscope
242
+ licenses:
243
+ - MIT
244
+ metadata:
245
+ homepage_uri: https://www.ethos-link.com/opensource/crawlscope
246
+ source_code_uri: https://github.com/ethos-link/crawlscope
247
+ bug_tracker_uri: https://github.com/ethos-link/crawlscope/issues
248
+ changelog_uri: https://github.com/ethos-link/crawlscope/blob/main/CHANGELOG.md
249
+ documentation_uri: https://github.com/ethos-link/crawlscope/blob/main/README.md
250
+ funding_uri: https://www.reviato.com/
251
+ github_repo: ssh://github.com/ethos-link/crawlscope
252
+ allowed_push_host: https://rubygems.org
253
+ rubygems_mfa_required: 'true'
254
+ rdoc_options: []
255
+ require_paths:
256
+ - lib
257
+ required_ruby_version: !ruby/object:Gem::Requirement
258
+ requirements:
259
+ - - ">="
260
+ - !ruby/object:Gem::Version
261
+ version: 3.2.0
262
+ required_rubygems_version: !ruby/object:Gem::Requirement
263
+ requirements:
264
+ - - ">="
265
+ - !ruby/object:Gem::Version
266
+ version: '0'
267
+ requirements: []
268
+ rubygems_version: 4.0.6
269
+ specification_version: 4
270
+ summary: Audit sitemap URLs for metadata, structured data, uniqueness, and links
271
+ test_files: []