RubyGems - crawlscope - Versions diffs - 0.2.0 → 0.4.0 - Mend

crawlscope 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +67 -0
data/README.md +46 -9
data/lib/crawlscope/cli.rb +5 -0
data/lib/crawlscope/crawl.rb +6 -0
data/lib/crawlscope/document_text.rb +40 -0
data/lib/crawlscope/rule_registry.rb +3 -1
data/lib/crawlscope/rules/content_quality.rb +99 -0
data/lib/crawlscope/rules/indexability.rb +66 -0
data/lib/crawlscope/rules/links.rb +24 -6
data/lib/crawlscope/rules/metadata.rb +57 -11
data/lib/crawlscope/rules/structured_data.rb +47 -0
data/lib/crawlscope/rules/uniqueness.rb +76 -4
data/lib/crawlscope/schemas.rb +52 -1
data/lib/crawlscope/version.rb +1 -1
data/lib/tasks/crawlscope_tasks.rake +11 -1
data/test/crawlscope/cli_test.rb +19 -5
data/test/crawlscope/configuration_test.rb +8 -1
data/test/crawlscope/content_quality_rule_test.rb +68 -0
data/test/crawlscope/crawl_test.rb +23 -3
data/test/crawlscope/indexability_rule_test.rb +96 -0
data/test/crawlscope/links_rule_test.rb +39 -0
data/test/crawlscope/metadata_rule_test.rb +77 -0
data/test/crawlscope/structured_data_rule_test.rb +91 -0
data/test/crawlscope/uniqueness_rule_test.rb +43 -2
data/test/release_task_test.rb +86 -0
metadata +9 -2

data/lib/crawlscope/rules/structured_data.rb CHANGED Viewed

@@ -3,6 +3,8 @@
 module Crawlscope
   module Rules
     class StructuredData
+      CAREER_DETAIL_PATH = %r{/careers/[^/]+/?\z}
       attr_reader :code
       def initialize
@@ -65,6 +67,51 @@ module Crawlscope
             details: {errors: errors, source: source}
           )
         end
+        validate_job_posting_count(page, items, issues)
+      end
+      def validate_job_posting_count(page, items, issues)
+        job_postings = items.select { |item| structured_data_types(item.data).include?("JobPosting") }
+        return if job_postings.size == 1
+        if job_postings.size > 1
+          issues.add(
+            code: :multiple_job_postings,
+            severity: :warning,
+            category: :structured_data,
+            url: page.url,
+            message: "multiple JobPosting structured data blocks found",
+            details: {count: job_postings.size}
+          )
+        elsif career_detail_page?(page.url)
+          issues.add(
+            code: :missing_job_posting,
+            severity: :warning,
+            category: :structured_data,
+            url: page.url,
+            message: "career detail page missing JobPosting structured data",
+            details: {expected_type: "JobPosting"}
+          )
+        end
+      end
+      def structured_data_types(data)
+        return [] unless data.is_a?(Hash)
+        types = Array(data["@type"]).map(&:to_s)
+        if data["@graph"].is_a?(Array)
+          types.concat(data["@graph"].flat_map { |entry| structured_data_types(entry) })
+        end
+        types
+      end
+      def career_detail_page?(url)
+        URI(url).path.match?(CAREER_DETAIL_PATH)
+      rescue URI::InvalidURIError
+        false
       end
     end
   end

data/lib/crawlscope/rules/uniqueness.rb CHANGED Viewed

@@ -5,10 +5,24 @@ require "digest"
 module Crawlscope
   module Rules
     class Uniqueness
+      MINIMUM_SHINGLES = 10
+      MAX_NEAR_DUPLICATE_PAGES = 250
+      NEAR_DUPLICATE_THRESHOLD = 0.9
+      SHINGLE_SIZE = 5
       attr_reader :code
-      def initialize
+      def initialize(
+        near_duplicate_threshold: NEAR_DUPLICATE_THRESHOLD,
+        max_near_duplicate_pages: MAX_NEAR_DUPLICATE_PAGES,
+        minimum_shingles: MINIMUM_SHINGLES,
+        shingle_size: SHINGLE_SIZE
+      )
         @code = :uniqueness
+        @max_near_duplicate_pages = max_near_duplicate_pages
+        @minimum_shingles = minimum_shingles
+        @near_duplicate_threshold = near_duplicate_threshold
+        @shingle_size = shingle_size
       end
       def call(urls:, pages:, issues:, context:)
@@ -19,14 +33,13 @@ module Crawlscope
         end
         validate_duplicates(page_summaries, issues)
+        validate_near_duplicates(page_summaries, issues)
       end
       private
       def content_fingerprint_digest(doc)
-        text = doc.at_css("main")&.text.to_s
-        text = doc.at_css("body")&.text.to_s if text.empty?
-        normalized = text.gsub(/\s+/, " ").strip
+        normalized = DocumentText.text_for(doc)
         return if normalized.length < 200
         Digest::SHA256.hexdigest(normalized)
@@ -41,9 +54,12 @@ module Crawlscope
       end
       def summary_for(page)
+        tokens = DocumentText.tokens(DocumentText.text_for(page.doc))
         {
           content_fingerprint_digest: content_fingerprint_digest(page.doc),
           description: page.doc.at_css('meta[name="description"]')&.[]("content").to_s.strip,
+          shingles: shingles_for(tokens),
           title: page.doc.at_css("title")&.text.to_s.strip,
           url: page.url
         }
@@ -83,6 +99,62 @@ module Crawlscope
           )
         end
       end
+      def shingles_for(tokens)
+        return [] if tokens.size < @shingle_size
+        tokens.each_cons(@shingle_size).map { |items| items.join(" ") }.uniq
+      end
+      def validate_near_duplicates(page_summaries, issues)
+        if near_duplicate_scan_limit_exceeded?(page_summaries)
+          issues.add(
+            code: :near_duplicate_scan_skipped,
+            severity: :warning,
+            category: :uniqueness,
+            url: nil,
+            message: "near duplicate scan skipped for #{page_summaries.size} pages",
+            details: {max_pages: @max_near_duplicate_pages, page_count: page_summaries.size}
+          )
+          return
+        end
+        page_summaries.combination(2) do |left, right|
+          next if same_content_fingerprint?(left, right)
+          next if left[:shingles].size < @minimum_shingles || right[:shingles].size < @minimum_shingles
+          similarity = shingle_similarity(left[:shingles], right[:shingles])
+          next if similarity < @near_duplicate_threshold
+          urls = [left[:url], right[:url]]
+          issues.add(
+            code: :near_duplicate_content,
+            severity: :warning,
+            category: :uniqueness,
+            url: nil,
+            message: "near duplicate page content (#{format("%.2f", similarity)}) => #{urls.join(", ")}",
+            details: {similarity: similarity.round(3), threshold: @near_duplicate_threshold, urls: urls}
+          )
+        end
+      end
+      def near_duplicate_scan_limit_exceeded?(page_summaries)
+        !@max_near_duplicate_pages.nil? && page_summaries.size > @max_near_duplicate_pages
+      end
+      def same_content_fingerprint?(left, right)
+        !left[:content_fingerprint_digest].nil? &&
+          left[:content_fingerprint_digest] == right[:content_fingerprint_digest]
+      end
+      def shingle_similarity(left, right)
+        intersection_size = (left & right).size
+        smaller_set_size = [left.size, right.size].min
+        return 0.0 if smaller_set_size.zero?
+        intersection_size.to_f / smaller_set_size
+      end
     end
   end
 end

data/lib/crawlscope/schemas.rb CHANGED Viewed

@@ -330,6 +330,56 @@ module Crawlscope
       }
     }.freeze
+    JOB_POSTING = {
+      type: "object",
+      additionalProperties: true,
+      required: ["@type", "title", "description", "datePosted", "hiringOrganization"],
+      properties: {
+        "@context" => {enum: ["https://schema.org", "https://schema.org/"]},
+        "@type" => {const: "JobPosting"},
+        :title => {type: "string"},
+        :description => {type: "string"},
+        :identifier => {type: "object"},
+        :datePosted => {type: "string"},
+        :validThrough => {type: "string"},
+        :employmentType => {
+          anyOf: [
+            {type: "string"},
+            {type: "array", minItems: 1, items: {type: "string"}}
+          ]
+        },
+        :directApply => {type: "boolean"},
+        :hiringOrganization => {
+          type: "object",
+          required: ["@type", "name"],
+          properties: {
+            "@type" => {const: "Organization"},
+            :name => {type: "string"},
+            :sameAs => {type: "string", format: "uri"},
+            :logo => {type: "string", format: "uri"}
+          }
+        },
+        :applicantLocationRequirements => {
+          anyOf: [
+            {type: "object"},
+            {type: "array", minItems: 1, items: {type: "object"}}
+          ]
+        },
+        :jobLocationType => {type: "string"},
+        :jobLocation => {
+          anyOf: [
+            {type: "object"},
+            {type: "array", minItems: 1, items: {type: "object"}}
+          ]
+        },
+        :baseSalary => {type: "object"}
+      },
+      anyOf: [
+        {required: ["jobLocation"]},
+        {required: ["jobLocationType", "applicantLocationRequirements"]}
+      ]
+    }.freeze
     def self.schemas
       {
         "FAQPage" => FAQ_PAGE,
@@ -348,7 +398,8 @@ module Crawlscope
         "Recipe" => RECIPE,
         "Event" => EVENT,
         "VideoObject" => VIDEO_OBJECT,
-        "WebPage" => WEB_PAGE
+        "WebPage" => WEB_PAGE,
+        "JobPosting" => JOB_POSTING
       }
     end
   end

data/lib/crawlscope/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Crawlscope
-  VERSION = "0.2.0"
+  VERSION = "0.4.0"
 end

data/lib/tasks/crawlscope_tasks.rake CHANGED Viewed

@@ -5,11 +5,16 @@ namespace :crawlscope do
   end
   namespace :validate do
-    desc "Directly validate JSON-LD on one or more URLs. ENV: URL (required, semicolon-separated), DEBUG=1, JS=1, TIMEOUT, NETWORK_IDLE_TIMEOUT, REPORT_PATH, SUMMARY=1"
+    desc "Directly validate JSON-LD on one or more URLs. ENV: URL (semicolon-separated), DEBUG=1, JS=1, TIMEOUT, NETWORK_IDLE_TIMEOUT, REPORT_PATH, SUMMARY=1"
     task ldjson: :environment do
       Crawlscope::RakeTasks.ldjson
     end
+    desc "Validate URLs with the indexability rule. ENV: URL, SITEMAP, JS=1"
+    task indexability: :environment do
+      Crawlscope::RakeTasks.validate_rule("indexability")
+    end
     desc "Validate URLs with the metadata rule. ENV: URL, SITEMAP, JS=1"
     task metadata: :environment do
       Crawlscope::RakeTasks.validate_rule("metadata")
@@ -25,6 +30,11 @@ namespace :crawlscope do
       Crawlscope::RakeTasks.validate_rule("uniqueness")
     end
+    desc "Validate URLs with the content_quality rule. ENV: URL, SITEMAP, JS=1"
+    task content_quality: :environment do
+      Crawlscope::RakeTasks.validate_rule("content_quality")
+    end
     desc "Validate URLs with the links rule. ENV: URL, SITEMAP, JS=1"
     task links: :environment do
       Crawlscope::RakeTasks.validate_rule("links")

data/test/crawlscope/cli_test.rb CHANGED Viewed

@@ -4,9 +4,10 @@ require "test_helper"
 class CrawlscopeCliTest < Minitest::Test
   class FakeConfiguration
-    attr_accessor :concurrency, :network_idle_timeout_seconds, :output, :renderer, :timeout_seconds
+    attr_accessor :base_url, :concurrency, :network_idle_timeout_seconds, :output, :renderer, :timeout_seconds
     def initialize
+      @base_url = nil
       @concurrency = 10
       @network_idle_timeout_seconds = 5
       @renderer = :http
@@ -145,6 +146,17 @@ class CrawlscopeCliTest < Minitest::Test
     assert_empty err.string
   end
+  def test_ldjson_defaults_to_configured_base_url
+    configuration = FakeConfiguration.new
+    configuration.base_url = "https://example.com"
+    task = FakeTask.new
+    status = Crawlscope::Cli.start(["ldjson"], out: StringIO.new, err: StringIO.new, configuration: configuration, task: task)
+    assert_equal 0, status
+    assert_equal ["https://example.com"], task.json_ld_arguments[:urls]
+  end
   def test_validate_caps_default_browser_concurrency
     configuration = FakeConfiguration.new
     task = FakeTask.new
@@ -218,14 +230,16 @@ class CrawlscopeCliTest < Minitest::Test
     assert_equal 3, configuration.network_idle_timeout_seconds
   end
-  def test_ldjson_requires_urls
+  def test_ldjson_defaults_to_localhost
     out = StringIO.new
     err = StringIO.new
+    task = FakeTask.new
-    status = Crawlscope::Cli.start(["ldjson"], out: out, err: err, configuration: FakeConfiguration.new, task: FakeTask.new)
+    status = Crawlscope::Cli.start(["ldjson"], out: out, err: err, configuration: FakeConfiguration.new, task: task)
-    assert_equal 1, status
-    assert_includes err.string, "Crawlscope URL is not configured"
+    assert_equal 0, status
+    assert_equal ["http://localhost:3000"], task.json_ld_arguments[:urls]
+    assert_empty err.string
   end
   def test_invalid_integer_option_returns_error

data/test/crawlscope/configuration_test.rb CHANGED Viewed

@@ -20,7 +20,14 @@ class CrawlscopeConfigurationTest < Minitest::Test
     assert_equal "https://example.com", audit.instance_variable_get(:@base_url)
     assert_equal "/tmp/sitemap.xml", audit.instance_variable_get(:@sitemap_path)
     assert_equal 4, audit.instance_variable_get(:@concurrency)
-    assert_equal %i[metadata structured_data uniqueness links], audit.instance_variable_get(:@rules).map(&:code)
+    assert_equal %i[
+      indexability
+      metadata
+      structured_data
+      uniqueness
+      content_quality
+      links
+    ], audit.instance_variable_get(:@rules).map(&:code)
   end
   def test_audit_raises_without_base_url

data/test/crawlscope/content_quality_rule_test.rb ADDED Viewed

@@ -0,0 +1,68 @@
+# frozen_string_literal: true
+require "test_helper"
+class CrawlscopeContentQualityRuleTest < Minitest::Test
+  def test_reports_thin_visible_text_and_low_html_text_ratio
+    issues = Crawlscope::IssueCollection.new
+    page = page_with(main: "Short page <div>#{"<span></span>" * 500}</div>")
+    Crawlscope::Rules::ContentQuality.new.call(urls: [page.url], pages: [page], issues: issues)
+    codes = issues.to_a.map(&:code)
+    assert_includes codes, :thin_visible_text
+    assert_includes codes, :low_visible_text_ratio
+  end
+  def test_visible_text_ratio_ignores_markup_outside_main_content
+    issues = Crawlscope::IssueCollection.new
+    page = page_with(
+      main: Array.new(260) { |index| "word#{index}" }.join(" "),
+      head_markup: "<style>#{"body{}" * 10_000}</style>",
+      extra_markup: "<nav>#{"<a href=\"/\">Navigation</a>" * 500}</nav>"
+    )
+    Crawlscope::Rules::ContentQuality.new.call(urls: [page.url], pages: [page], issues: issues)
+    refute_includes issues.to_a.map(&:code), :low_visible_text_ratio
+  end
+  def test_reports_low_unique_token_ratio_for_repetitive_content
+    issues = Crawlscope::IssueCollection.new
+    page = page_with(main: ("hotel location service " * 100).strip)
+    Crawlscope::Rules::ContentQuality.new.call(urls: [page.url], pages: [page], issues: issues)
+    issue = issues.to_a.find { |item| item.code == :low_unique_token_ratio }
+    assert issue
+    assert_operator issue.details[:ratio], :<, issue.details[:threshold]
+  end
+  private
+  def page_with(main:, extra_markup: "", head_markup: "")
+    body = <<~HTML
+      <html>
+        <head>
+          <title>Content quality</title>
+          #{head_markup}
+        </head>
+        <body>
+          #{extra_markup}
+          <main>#{main}</main>
+        </body>
+      </html>
+    HTML
+    Crawlscope::Page.new(
+      url: "https://example.com/page",
+      normalized_url: "https://example.com/page",
+      final_url: "https://example.com/page",
+      normalized_final_url: "https://example.com/page",
+      status: 200,
+      headers: {"content-type" => "text/html"},
+      body: body,
+      doc: Nokogiri::HTML(body)
+    )
+  end
+end

data/test/crawlscope/crawl_test.rb CHANGED Viewed

@@ -31,8 +31,13 @@ class CrawlscopeCrawlTest < Minitest::Test
           <html>
             <head>
               <title>Pricing</title>
-              <meta name="description" content="Plans for hotels and restaurants">
+              <meta name="description" content="Plans for hotels and restaurants that need practical software checks, clear metadata, and dependable search previews.">
               <link rel="canonical" href="https://example.com/pricing">
+              <meta property="og:title" content="Pricing">
+              <meta property="og:description" content="Plans for hotels and restaurants that need practical software checks, clear metadata, and dependable search previews.">
+              <meta property="og:url" content="https://example.com/pricing">
+              <meta property="og:type" content="website">
+              <meta property="og:image" content="https://example.com/icon.png">
               <script type="application/ld+json">
                 {"@context":"https://schema.org","@type":"WebSite","name":"Example","url":"https://example.com"}
               </script>
@@ -40,6 +45,7 @@ class CrawlscopeCrawlTest < Minitest::Test
             <body>
               <main>
                 <h1>Pricing</h1>
+                <p>#{Array.new(260) { |index| "pricing#{index}" }.join(" ")}</p>
               </main>
             </body>
           </html>
@@ -95,7 +101,15 @@ class CrawlscopeCrawlTest < Minitest::Test
     ).call
     refute result.ok?
-    assert_equal %i[meta_description_too_long missing_canonical missing_h1 missing_structured_data title_repeats_site_name].sort, result.issues.to_a.map(&:code).uniq.sort
+    assert_equal %i[
+      incomplete_open_graph_tags
+      meta_description_too_long
+      missing_canonical
+      missing_h1
+      missing_structured_data
+      thin_visible_text
+      title_repeats_site_name
+    ].sort, result.issues.to_a.map(&:code).uniq.sort
   end
   def test_uses_browser_when_renderer_is_browser
@@ -128,8 +142,13 @@ class CrawlscopeCrawlTest < Minitest::Test
           <html>
             <head>
               <title>Pricing</title>
-              <meta name="description" content="Plans for hotels and restaurants">
+              <meta name="description" content="Plans for hotels and restaurants that need practical software checks, clear metadata, and dependable search previews.">
               <link rel="canonical" href="https://example.com/pricing">
+              <meta property="og:title" content="Pricing">
+              <meta property="og:description" content="Plans for hotels and restaurants that need practical software checks, clear metadata, and dependable search previews.">
+              <meta property="og:url" content="https://example.com/pricing">
+              <meta property="og:type" content="website">
+              <meta property="og:image" content="https://example.com/icon.png">
               <script type="application/ld+json">
                 {"@context":"https://schema.org","@type":"WebSite","name":"Example","url":"https://example.com"}
               </script>
@@ -137,6 +156,7 @@ class CrawlscopeCrawlTest < Minitest::Test
             <body>
               <main>
                 <h1>Pricing</h1>
+                <p>#{Array.new(260) { |index| "pricing#{index}" }.join(" ")}</p>
               </main>
             </body>
           </html>

data/test/crawlscope/indexability_rule_test.rb ADDED Viewed

@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+require "test_helper"
+class CrawlscopeIndexabilityRuleTest < Minitest::Test
+  def test_reports_meta_noindex
+    issues = Crawlscope::IssueCollection.new
+    page = page_with(
+      body: <<~HTML
+        <html>
+          <head><meta name="robots" content="noindex, follow"></head>
+          <body><main>Visible content</main></body>
+        </html>
+      HTML
+    )
+    Crawlscope::Rules::Indexability.new.call(urls: [page.url], pages: [page], issues: issues)
+    issue = issues.to_a.fetch(0)
+    assert_equal :noindex_meta, issue.code
+    assert_equal :error, issue.severity
+    assert_equal "noindex, follow", issue.details[:content]
+  end
+  def test_reports_x_robots_tag_noindex
+    issues = Crawlscope::IssueCollection.new
+    page = page_with(headers: {"X-Robots-Tag" => "noindex"})
+    Crawlscope::Rules::Indexability.new.call(urls: [page.url], pages: [page], issues: issues)
+    issue = issues.to_a.fetch(0)
+    assert_equal :noindex_header, issue.code
+    assert_equal :error, issue.severity
+    assert_equal "noindex", issue.details[:content]
+  end
+  def test_reports_x_robots_tag_noindex_for_non_html_response
+    issues = Crawlscope::IssueCollection.new
+    page = page_with(
+      body: "%PDF-1.7",
+      doc: nil,
+      headers: {"content-type" => "application/pdf", "X-Robots-Tag" => "noindex"}
+    )
+    Crawlscope::Rules::Indexability.new.call(urls: [page.url], pages: [page], issues: issues)
+    issue = issues.to_a.fetch(0)
+    assert_equal :noindex_header, issue.code
+    assert_equal :error, issue.severity
+    assert_equal "noindex", issue.details[:content]
+  end
+  def test_reports_scoped_x_robots_tag_noindex
+    issues = Crawlscope::IssueCollection.new
+    page = page_with(headers: {"X-Robots-Tag" => "googlebot: noindex, nofollow"})
+    Crawlscope::Rules::Indexability.new.call(urls: [page.url], pages: [page], issues: issues)
+    issue = issues.to_a.fetch(0)
+    assert_equal :noindex_header, issue.code
+    assert_equal "googlebot: noindex, nofollow", issue.details[:content]
+  end
+  def test_reports_x_robots_tag_none
+    issues = Crawlscope::IssueCollection.new
+    page = page_with(headers: {"X-Robots-Tag" => "none"})
+    Crawlscope::Rules::Indexability.new.call(urls: [page.url], pages: [page], issues: issues)
+    issue = issues.to_a.fetch(0)
+    assert_equal :noindex_header, issue.code
+    assert_equal "none", issue.details[:content]
+  end
+  private
+  def page_with(body: nil, doc: :parse, headers: {"content-type" => "text/html"})
+    body ||= <<~HTML
+      <html>
+        <head><title>Indexable</title></head>
+        <body><main>Visible content</main></body>
+      </html>
+    HTML
+    Crawlscope::Page.new(
+      url: "https://example.com/page",
+      normalized_url: "https://example.com/page",
+      final_url: "https://example.com/page",
+      normalized_final_url: "https://example.com/page",
+      status: 200,
+      headers: headers,
+      body: body,
+      doc: (doc == :parse) ? Nokogiri::HTML(body) : doc
+    )
+  end
+end

data/test/crawlscope/links_rule_test.rb CHANGED Viewed

@@ -118,6 +118,45 @@ class CrawlscopeLinksRuleTest < Minitest::Test
     assert_equal "https://example.com/guide", issues.to_a.first.url
   end
+  def test_counts_root_page_links_as_inbound_links
+    issues = Crawlscope::IssueCollection.new
+    Crawlscope::Rules::Links.new.call(
+      urls: ["https://example.com/", "https://example.com/about"],
+      pages: [
+        page(url: "https://example.com/", body: "<main><a href=\"/about\">About</a></main>"),
+        page(url: "https://example.com/about", body: "<main><p>About</p></main>")
+      ],
+      issues: issues,
+      context: context(resolver: ->(target_url) { {crawled: true, error: nil, final_url: target_url, status: 200} })
+    )
+    refute_includes issues.to_a.map(&:code), :low_inbound_anchor_links
+  end
+  def test_reports_internal_links_that_redirect
+    issues = Crawlscope::IssueCollection.new
+    resolver = lambda do |target_url|
+      {
+        crawled: false,
+        error: nil,
+        final_url: "https://example.com/pricing",
+        status: 200
+      }
+    end
+    Crawlscope::Rules::Links.new.call(
+      urls: ["https://example.com/guide"],
+      pages: [page(url: "https://example.com/guide", body: "<main><a href=\"/plans\">Plans</a></main>")],
+      issues: issues,
+      context: context(resolver: resolver)
+    )
+    redirect_issue = issues.to_a.find { |issue| issue.code == :internal_link_redirects }
+    assert redirect_issue
+    assert_includes redirect_issue.message, "https://example.com/pricing"
+  end
   def test_ignores_links_that_should_not_be_crawled
     issues = Crawlscope::IssueCollection.new