RubyGems - crawlscope - Versions diffs - 0.1.0 → 0.3.0 - Mend

crawlscope 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +7 -8
data/README.md +21 -14
data/lib/crawlscope/browser.rb +8 -0
data/lib/crawlscope/cli.rb +15 -10
data/lib/crawlscope/configuration.rb +20 -5
data/lib/crawlscope/context.rb +9 -0
data/lib/crawlscope/{audit.rb → crawl.rb} +68 -58
data/lib/crawlscope/crawler.rb +19 -1
data/lib/crawlscope/http.rb +1 -1
data/lib/crawlscope/rake_tasks.rb +28 -0
data/lib/crawlscope/rules/links.rb +99 -48
data/lib/crawlscope/rules/metadata.rb +57 -11
data/lib/crawlscope/rules/structured_data.rb +61 -1
data/lib/crawlscope/run.rb +60 -0
data/lib/crawlscope/schema_registry.rb +3 -349
data/lib/crawlscope/schemas.rb +406 -0
data/lib/crawlscope/sitemap.rb +18 -6
data/lib/crawlscope/structured_data/audit.rb +7 -7
data/lib/crawlscope/structured_data/check.rb +35 -0
data/lib/crawlscope/structured_data/reporter.rb +69 -0
data/lib/crawlscope/url.rb +14 -0
data/lib/crawlscope/version.rb +1 -1
data/lib/tasks/crawlscope_tasks.rake +12 -23
data/test/crawlscope/browser_test.rb +155 -0
data/test/crawlscope/cli_test.rb +143 -7
data/test/crawlscope/configuration_test.rb +49 -0
data/test/crawlscope/{audit_test.rb → crawl_test.rb} +23 -7
data/test/crawlscope/crawler_test.rb +34 -0
data/test/crawlscope/http_test.rb +56 -0
data/test/crawlscope/links_rule_test.rb +149 -5
data/test/crawlscope/metadata_rule_test.rb +77 -0
data/test/crawlscope/rule_registry_test.rb +32 -0
data/test/crawlscope/{task_test.rb → run_test.rb} +28 -33
data/test/crawlscope/schema_registry_test.rb +19 -0
data/test/crawlscope/sitemap_test.rb +55 -0
data/test/crawlscope/structured_data_document_test.rb +36 -0
data/test/crawlscope/structured_data_report_test.rb +3 -3
data/test/crawlscope/structured_data_reporter_test.rb +2 -2
data/test/crawlscope/structured_data_rule_test.rb +111 -0
data/test/crawlscope/structured_data_writer_test.rb +2 -2
data/test/crawlscope/url_test.rb +31 -0
metadata +15 -5
data/lib/crawlscope/task.rb +0 -131

data/test/crawlscope/links_rule_test.rb CHANGED Viewed

@@ -38,19 +38,163 @@ class CrawlscopeLinksRuleTest < Minitest::Test
       urls: ["https://example.com/guide", "https://example.com/pricing"],
       pages: pages,
       issues: issues,
-      context: {
-        allowed_statuses: [200, 301, 302],
-        base_url: "https://example.com",
-        resolve_target: method(:resolve_target)
-      }
+      context: context
     )
     assert_equal [:broken_internal_link], issues.to_a.map(&:code)
     assert_includes issues.to_a.first.message, "HTTP 404"
   end
+  def test_reports_unresolved_internal_links
+    issues = Crawlscope::IssueCollection.new
+    Crawlscope::Rules::Links.new.call(
+      urls: [],
+      pages: [page(url: "https://example.com/guide", body: "<main><a href=\"/unknown\">Unknown</a></main>")],
+      issues: issues,
+      context: context(resolver: ->(_target_url) {})
+    )
+    assert_includes issues.to_a.map(&:code), :unresolved_internal_link
+    assert_includes issues.to_a.find { |issue| issue.code == :unresolved_internal_link }.message, "unable to validate internal link"
+  end
+  def test_ignores_fetch_errors_for_urls_already_crawled
+    issues = Crawlscope::IssueCollection.new
+    resolver = lambda do |target_url|
+      {
+        crawled: true,
+        error: "Timeout::Error: timed out",
+        final_url: target_url,
+        status: nil
+      }
+    end
+    Crawlscope::Rules::Links.new.call(
+      urls: [],
+      pages: [page(url: "https://example.com/guide", body: "<main><a href=\"/timeout\">Timeout</a></main>")],
+      issues: issues,
+      context: context(resolver: resolver)
+    )
+    assert_empty issues.to_a
+  end
+  def test_reports_fetch_errors_for_uncrawled_targets
+    issues = Crawlscope::IssueCollection.new
+    resolver = lambda do |target_url|
+      {
+        crawled: false,
+        error: "Timeout::Error: timed out",
+        final_url: target_url,
+        status: nil
+      }
+    end
+    Crawlscope::Rules::Links.new.call(
+      urls: [],
+      pages: [page(url: "https://example.com/guide", body: "<main><a href=\"/timeout\">Timeout</a></main>")],
+      issues: issues,
+      context: context(resolver: resolver)
+    )
+    assert_equal [:unresolved_internal_link], issues.to_a.map(&:code)
+  end
+  def test_reports_low_inbound_anchor_links
+    issues = Crawlscope::IssueCollection.new
+    Crawlscope::Rules::Links.new.call(
+      urls: ["https://example.com/guide", "https://example.com/pricing"],
+      pages: [
+        page(url: "https://example.com/guide", body: "<main><a href=\"/pricing\">Pricing</a></main>"),
+        page(url: "https://example.com/pricing", body: "<main><p>Pricing</p></main>")
+      ],
+      issues: issues,
+      context: context
+    )
+    assert_equal [:low_inbound_anchor_links], issues.to_a.map(&:code)
+    assert_equal "https://example.com/guide", issues.to_a.first.url
+  end
+  def test_counts_root_page_links_as_inbound_links
+    issues = Crawlscope::IssueCollection.new
+    Crawlscope::Rules::Links.new.call(
+      urls: ["https://example.com/", "https://example.com/about"],
+      pages: [
+        page(url: "https://example.com/", body: "<main><a href=\"/about\">About</a></main>"),
+        page(url: "https://example.com/about", body: "<main><p>About</p></main>")
+      ],
+      issues: issues,
+      context: context(resolver: ->(target_url) { {crawled: true, error: nil, final_url: target_url, status: 200} })
+    )
+    refute_includes issues.to_a.map(&:code), :low_inbound_anchor_links
+  end
+  def test_reports_internal_links_that_redirect
+    issues = Crawlscope::IssueCollection.new
+    resolver = lambda do |target_url|
+      {
+        crawled: false,
+        error: nil,
+        final_url: "https://example.com/pricing",
+        status: 200
+      }
+    end
+    Crawlscope::Rules::Links.new.call(
+      urls: ["https://example.com/guide"],
+      pages: [page(url: "https://example.com/guide", body: "<main><a href=\"/plans\">Plans</a></main>")],
+      issues: issues,
+      context: context(resolver: resolver)
+    )
+    redirect_issue = issues.to_a.find { |issue| issue.code == :internal_link_redirects }
+    assert redirect_issue
+    assert_includes redirect_issue.message, "https://example.com/pricing"
+  end
+  def test_ignores_links_that_should_not_be_crawled
+    issues = Crawlscope::IssueCollection.new
+    Crawlscope::Rules::Links.new.call(
+      urls: ["https://example.com/guide"],
+      pages: [
+        page(
+          url: "https://example.com/guide",
+          body: <<~HTML
+            <html>
+              <body>
+                <a href="#section">Jump</a>
+                <a href="mailto:test@example.com">Email</a>
+                <a href="https://other.example.com/page">External</a>
+                <a href="/rails/info">Rails</a>
+                <a href="/empty">   </a>
+              </body>
+            </html>
+          HTML
+        )
+      ],
+      issues: issues,
+      context: context
+    )
+    assert_empty issues.to_a
+  end
   private
+  def context(resolver: method(:resolve_target))
+    {
+      allowed_statuses: [200, 301, 302],
+      base_url: "https://example.com",
+      resolve_target: resolver
+    }
+  end
   def page(url:, body:)
     doc = Nokogiri::HTML(body)

data/test/crawlscope/metadata_rule_test.rb ADDED Viewed

@@ -0,0 +1,77 @@
+# frozen_string_literal: true
+require "test_helper"
+class CrawlscopeMetadataRuleTest < Minitest::Test
+  def test_reports_short_meta_description_multiple_h1_and_incomplete_open_graph
+    issues = Crawlscope::IssueCollection.new
+    Crawlscope::Rules::Metadata.new.call(
+      urls: [page.url],
+      pages: [page],
+      issues: issues
+    )
+    codes = issues.to_a.map(&:code)
+    assert_includes codes, :meta_description_too_short
+    assert_includes codes, :multiple_h1
+    assert_includes codes, :incomplete_open_graph_tags
+  end
+  def test_allows_localhost_page_with_matching_production_canonical_path
+    issues = Crawlscope::IssueCollection.new
+    local_page = page(
+      url: "http://localhost:3000/about",
+      body: <<~HTML
+        <html>
+          <head>
+            <title>About</title>
+            <meta name="description" content="A clear description that is long enough for search snippets, local validation checks, and realistic production metadata audits.">
+            <link rel="canonical" href="https://www.example.com/about">
+            <meta property="og:title" content="About">
+            <meta property="og:description" content="About page">
+            <meta property="og:url" content="https://www.example.com/about">
+            <meta property="og:type" content="website">
+            <meta property="og:image" content="https://www.example.com/icon.png">
+          </head>
+          <body><main><h1>About</h1></main></body>
+        </html>
+      HTML
+    )
+    Crawlscope::Rules::Metadata.new.call(
+      urls: [local_page.url],
+      pages: [local_page],
+      issues: issues
+    )
+    refute_includes issues.to_a.map(&:code), :canonical_mismatch
+  end
+  private
+  def page(url: "https://example.com/about", body: nil)
+    body ||= <<~HTML
+      <html>
+        <head>
+          <title>About</title>
+          <meta name="description" content="Too short">
+          <link rel="canonical" href="https://example.com/about">
+          <meta property="og:title" content="About">
+        </head>
+        <body><main><h1>About</h1><h1>Team</h1></main></body>
+      </html>
+    HTML
+    Crawlscope::Page.new(
+      url: url,
+      normalized_url: Crawlscope::Url.normalize(url, base_url: url),
+      final_url: url,
+      normalized_final_url: Crawlscope::Url.normalize(url, base_url: url),
+      status: 200,
+      headers: {"content-type" => "text/html"},
+      body: body,
+      doc: Nokogiri::HTML(body)
+    )
+  end
+end

data/test/crawlscope/rule_registry_test.rb ADDED Viewed

@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+require "test_helper"
+class CrawlscopeRuleRegistryTest < Minitest::Test
+  Rule = Data.define(:code)
+  def test_rules_for_returns_defaults_when_names_are_blank
+    metadata = Rule.new(:metadata)
+    links = Rule.new(:links)
+    registry = Crawlscope::RuleRegistry.new(rules: [metadata, links], default_codes: %i[links])
+    assert_equal [links], registry.rules_for(nil)
+    assert_equal [links], registry.rules_for("")
+  end
+  def test_rules_for_accepts_csv_and_arrays
+    metadata = Rule.new(:metadata)
+    links = Rule.new(:links)
+    registry = Crawlscope::RuleRegistry.new(rules: [metadata, links])
+    assert_equal [metadata, links], registry.rules_for(["metadata, links"])
+  end
+  def test_rules_for_rejects_unknown_rules
+    registry = Crawlscope::RuleRegistry.new(rules: [Rule.new(:metadata)])
+    error = assert_raises(Crawlscope::ConfigurationError) { registry.rules_for("links") }
+    assert_equal "Unknown Crawlscope rules: links", error.message
+  end
+end

data/test/crawlscope/{task_test.rb → run_test.rb} RENAMED Viewed

@@ -2,7 +2,7 @@
 require "test_helper"
-class CrawlscopeTaskTest < Minitest::Test
+class CrawlscopeRunTest < Minitest::Test
   FakeResult = Data.define(:reported) do
     def ok?
       true
@@ -47,16 +47,29 @@ class CrawlscopeTaskTest < Minitest::Test
     end
   end
-  class LdjsonConfiguration
+  class JsonLdConfiguration
     attr_reader :output
-    def initialize(output:, browser:)
+    def initialize(output:, page:)
       @output = output
-      @browser = browser
+      @page = page
+      @closed = false
     end
     def browser_factory
-      -> { @browser }
+      -> { self }
+    end
+    def close
+      @closed = true
+    end
+    def closed?
+      @closed
+    end
+    def fetch(_url)
+      @page
     end
     def network_idle_timeout_seconds
@@ -80,30 +93,13 @@ class CrawlscopeTaskTest < Minitest::Test
     end
   end
-  class FakeBrowser
-    attr_reader :closed
-    def initialize(page:)
-      @page = page
-      @closed = false
-    end
-    def close
-      @closed = true
-    end
-    def fetch(_url)
-      @page
-    end
-  end
   def test_validate_passes_rule_names_to_configuration_audit
     result = FakeResult.new(reported: true)
     configuration = FakeConfiguration.new(result: result)
     reporter = FakeReporter.new
-    task = Crawlscope::Task.new(configuration: configuration, reporter: reporter)
-    returned_result = task.validate(rule_names: "links")
+    run = Crawlscope::Run.new(configuration: configuration, reporter: reporter)
+    returned_result = run.validate(rule_names: "links")
     assert_equal(
       {
@@ -122,7 +118,7 @@ class CrawlscopeTaskTest < Minitest::Test
     configuration = FakeConfiguration.new(result: result, base_url: "https://example.com", sitemap_path: nil)
     reporter = FakeReporter.new
-    Crawlscope::Task.new(configuration: configuration, reporter: reporter).validate
+    Crawlscope::Run.new(configuration: configuration, reporter: reporter).validate
     assert_equal(
       {
@@ -144,7 +140,7 @@ class CrawlscopeTaskTest < Minitest::Test
     File.write(sitemap_path, "<urlset></urlset>")
     Dir.chdir(tmp_dir) do
-      Crawlscope::Task.new(configuration: configuration, reporter: reporter).validate
+      Crawlscope::Run.new(configuration: configuration, reporter: reporter).validate
     end
     assert_equal(
@@ -159,7 +155,7 @@ class CrawlscopeTaskTest < Minitest::Test
     FileUtils.rm_rf(tmp_dir) if tmp_dir
   end
-  def test_validate_ldjson_uses_real_audit_and_writes_report
+  def test_validate_json_ld_reports_valid_structured_data
     body = <<~HTML
       <html>
         <head>
@@ -177,15 +173,14 @@ class CrawlscopeTaskTest < Minitest::Test
       status: 200,
       headers: {"content-type" => "text/html"},
       body: body,
-      doc: Nokogiri::HTML(body)
+      doc: nil
     )
-    browser = FakeBrowser.new(page: page)
     output = StringIO.new
-    configuration = LdjsonConfiguration.new(output: output, browser: browser)
+    configuration = JsonLdConfiguration.new(output: output, page: page)
     report_dir = Dir.mktmpdir
     report_path = File.join(report_dir, "structured-data.json")
-    result = Crawlscope::Task.new(configuration: configuration).validate_ldjson(
+    result = Crawlscope::Run.new(configuration: configuration).validate_json_ld(
       urls: [page.url],
       debug: true,
       report_path: report_path,
@@ -193,9 +188,9 @@ class CrawlscopeTaskTest < Minitest::Test
     )
     assert result.ok?
-    assert browser.closed
+    assert_predicate configuration, :closed?
     assert File.exist?(report_path)
-    assert_includes File.read(report_path), "https://example.com"
+    assert_equal ["https://example.com"], JSON.parse(File.read(report_path)).fetch("results").keys
     assert_includes output.string, "JavaScript mode enabled (Ferrum)"
     assert_includes output.string, "Validating JSON-LD on 1 URL(s)"
     assert_includes output.string, "All valid!"

data/test/crawlscope/schema_registry_test.rb CHANGED Viewed

@@ -86,4 +86,23 @@ class CrawlscopeSchemaRegistryTest < Minitest::Test
     assert_equal "Unknown Crawlscope rules: unknown", error.message
   end
+  def test_validate_accepts_arrays_graphs_unknown_types_and_non_hashes
+    registry = Crawlscope::SchemaRegistry.default
+    errors = registry.validate(
+      [
+        "ignored",
+        {"@type" => "UnknownThing"},
+        {
+          "@graph" => [
+            {"@type" => "Article"},
+            {"@type" => "WebSite", "name" => "Example"}
+          ]
+        }
+      ]
+    )
+    assert_equal ["Article"], errors.map { |error| error[:type] }
+  end
 end

data/test/crawlscope/sitemap_test.rb CHANGED Viewed

@@ -48,4 +48,59 @@ class CrawlscopeSitemapTest < Minitest::Test
     assert_equal ["https://www.example.com/features/reviews"], parser.urls(base_url: "https://www.example.com")
   end
+  def test_rebases_remote_sitemap_index_children_to_base_url
+    stub_request(:get, "http://localhost:3000/sitemap.xml")
+      .to_return(
+        status: 200,
+        body: <<~XML
+          <?xml version="1.0" encoding="UTF-8"?>
+          <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+            <sitemap><loc>https://www.example.com/sitemap-marketing.xml</loc></sitemap>
+          </sitemapindex>
+        XML
+      )
+    stub_request(:get, "http://localhost:3000/sitemap-marketing.xml")
+      .to_return(
+        status: 200,
+        body: <<~XML
+          <?xml version="1.0" encoding="UTF-8"?>
+          <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+            <url><loc>https://www.example.com/features/reviews</loc></url>
+          </urlset>
+        XML
+      )
+    parser = Crawlscope::Sitemap.new(path: "http://localhost:3000/sitemap.xml")
+    assert_equal ["http://localhost:3000/features/reviews"], parser.urls(base_url: "http://localhost:3000")
+  end
+  def test_parses_local_sitemap_index_with_absolute_child_sitemap_loc
+    Dir.mktmpdir do |dir|
+      File.write(
+        File.join(dir, "sitemap.xml"),
+        <<~XML
+          <?xml version="1.0" encoding="UTF-8"?>
+          <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+            <sitemap><loc>https://www.example.com/sitemap-pages.xml</loc></sitemap>
+          </sitemapindex>
+        XML
+      )
+      File.write(
+        File.join(dir, "sitemap-pages.xml"),
+        <<~XML
+          <?xml version="1.0" encoding="UTF-8"?>
+          <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+            <url><loc>https://www.example.com/features/reviews</loc></url>
+          </urlset>
+        XML
+      )
+      parser = Crawlscope::Sitemap.new(path: File.join(dir, "sitemap.xml"))
+      assert_equal ["http://localhost:3000/features/reviews"], parser.urls(base_url: "http://localhost:3000")
+    end
+  end
 end

data/test/crawlscope/structured_data_document_test.rb CHANGED Viewed

@@ -25,4 +25,40 @@ class CrawlscopeStructuredDataDocumentTest < Minitest::Test
     assert_equal ["json-ld", "microdata"], items.map(&:source)
     assert_equal "Hotel Test", document.json_ld_items.first["name"]
   end
+  def test_json_ld_handles_arrays_invalid_json_and_non_object_entries
+    html = <<~HTML
+      <script type="application/ld+json">
+        [{"@type":"WebSite","name":"Example"}, "ignored"]
+      </script>
+      <script type="application/ld+json">
+        {"@type":
+      </script>
+    HTML
+    document = Crawlscope::StructuredData::Document.new(html: html)
+    assert_equal 2, document.items.size
+    assert_equal ["WebSite"], document.json_ld_items.map { |item| item["@type"] }
+    assert_equal "Invalid JSON-LD", document.items.last.data[:error]
+  end
+  def test_microdata_extracts_common_value_attributes
+    html = <<~HTML
+      <div itemscope itemtype="https://schema.org/Event">
+        <meta itemprop="name" content="Launch">
+        <time itemprop="startDate" datetime="2026-04-24T10:00:00Z"></time>
+        <a itemprop="url" href="https://example.com/event">Event</a>
+        <data itemprop="position" value="1"></data>
+      </div>
+    HTML
+    item = Crawlscope::StructuredData::Document.new(html: html).items.first.data
+    assert_equal "Event", item["@type"]
+    assert_equal "Launch", item["name"]
+    assert_equal "2026-04-24T10:00:00Z", item["startDate"]
+    assert_equal "https://example.com/event", item["url"]
+    assert_equal "1", item["position"]
+  end
 end

data/test/crawlscope/structured_data_report_test.rb CHANGED Viewed

@@ -4,9 +4,9 @@ require "test_helper"
 class CrawlscopeStructuredDataReportTest < Minitest::Test
   def test_results_maps_validation_errors_and_skips
-    result = Crawlscope::StructuredData::Audit::Result.new(
+    result = Crawlscope::StructuredData::Audit::Outcome.new(
       entries: [
-        Crawlscope::StructuredData::Audit::Entry.new(
+        Crawlscope::StructuredData::Audit::Page.new(
           url: "https://example.com/article",
           status: 200,
           structured_items: [{source: "json-ld", data: {"@type" => "Article"}}],
@@ -15,7 +15,7 @@ class CrawlscopeStructuredDataReportTest < Minitest::Test
           content_type: "text/html",
           skipped_reason: nil
         ),
-        Crawlscope::StructuredData::Audit::Entry.new(
+        Crawlscope::StructuredData::Audit::Page.new(
           url: "https://example.com/feed.xml",
           status: 200,
           structured_items: [],

data/test/crawlscope/structured_data_reporter_test.rb CHANGED Viewed

@@ -5,9 +5,9 @@ require "test_helper"
 class CrawlscopeStructuredDataReporterTest < Minitest::Test
   def test_reports_failures_and_report_path
-    result = Crawlscope::StructuredData::Audit::Result.new(
+    result = Crawlscope::StructuredData::Audit::Outcome.new(
       entries: [
-        Crawlscope::StructuredData::Audit::Entry.new(
+        Crawlscope::StructuredData::Audit::Page.new(
           url: "https://example.com/article",
           status: 200,
           structured_items: [{source: "json-ld", data: {"@type" => "Article"}}],

data/test/crawlscope/structured_data_rule_test.rb CHANGED Viewed

@@ -59,6 +59,117 @@ class CrawlscopeStructuredDataRuleTest < Minitest::Test
     assert_equal [:structured_data_parse_error], issues.to_a.map(&:code)
   end
+  def test_reports_missing_structured_data_for_html_pages
+    issues = Crawlscope::IssueCollection.new
+    rule = Crawlscope::Rules::StructuredData.new
+    page = page(
+      url: "https://example.com/articles/test",
+      body: "<html><body><main><h1>Article</h1></main></body></html>"
+    )
+    rule.call(
+      urls: [page.url],
+      pages: [page],
+      issues: issues,
+      context: {schema_registry: Crawlscope::SchemaRegistry.default}
+    )
+    assert_equal [:missing_structured_data], issues.to_a.map(&:code)
+    assert_equal "no structured data found; add JSON-LD or microdata markup", issues.to_a.first.message
+    assert_equal ["json-ld", "microdata"], issues.to_a.first.details[:expected_sources]
+  end
+  def test_validates_job_posting_markup
+    issues = Crawlscope::IssueCollection.new
+    rule = Crawlscope::Rules::StructuredData.new
+    page = page(
+      url: "https://example.com/careers/sales-partner",
+      body: <<~HTML
+        <html>
+          <head>
+            <script type="application/ld+json">
+              {
+                "@context":"https://schema.org/",
+                "@type":"JobPosting",
+                "title":"Sales Partner",
+                "description":"A real role description.",
+                "datePosted":"2026-04-28",
+                "hiringOrganization":{"@type":"Organization","name":"Example","sameAs":"https://example.com/","logo":"https://example.com/icon.png"},
+                "jobLocationType":"TELECOMMUTE",
+                "applicantLocationRequirements":[{"@type":"Country","name":"South Africa"}]
+              }
+            </script>
+          </head>
+          <body><h1>Sales Partner</h1></body>
+        </html>
+      HTML
+    )
+    rule.call(
+      urls: [page.url],
+      pages: [page],
+      issues: issues,
+      context: {schema_registry: Crawlscope::SchemaRegistry.default}
+    )
+    assert_empty issues.to_a
+  end
+  def test_reports_schema_errors_for_invalid_job_posting_markup
+    issues = Crawlscope::IssueCollection.new
+    rule = Crawlscope::Rules::StructuredData.new
+    page = page(
+      url: "https://example.com/careers/sales-partner",
+      body: <<~HTML
+        <html>
+          <head>
+            <script type="application/ld+json">
+              {"@context":"https://schema.org","@type":"JobPosting","title":"Sales Partner"}
+            </script>
+          </head>
+          <body><h1>Sales Partner</h1></body>
+        </html>
+      HTML
+    )
+    rule.call(
+      urls: [page.url],
+      pages: [page],
+      issues: issues,
+      context: {schema_registry: Crawlscope::SchemaRegistry.default}
+    )
+    assert_equal [:structured_data_schema_error], issues.to_a.map(&:code)
+    assert_includes issues.to_a.first.message, "description"
+  end
+  def test_reports_missing_job_posting_for_career_detail_pages
+    issues = Crawlscope::IssueCollection.new
+    rule = Crawlscope::Rules::StructuredData.new
+    page = page(
+      url: "https://example.com/careers/sales-partner",
+      body: <<~HTML
+        <html>
+          <head>
+            <script type="application/ld+json">
+              {"@context":"https://schema.org","@type":"WebPage","name":"Sales Partner"}
+            </script>
+          </head>
+          <body><h1>Sales Partner</h1></body>
+        </html>
+      HTML
+    )
+    rule.call(
+      urls: [page.url],
+      pages: [page],
+      issues: issues,
+      context: {schema_registry: Crawlscope::SchemaRegistry.default}
+    )
+    assert_equal [:missing_job_posting], issues.to_a.map(&:code)
+  end
   private
   def page(url:, body:)