RubyGems - crawlscope - Versions diffs - 0.1.0 → 0.3.0 - Mend

crawlscope 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +7 -8
data/README.md +21 -14
data/lib/crawlscope/browser.rb +8 -0
data/lib/crawlscope/cli.rb +15 -10
data/lib/crawlscope/configuration.rb +20 -5
data/lib/crawlscope/context.rb +9 -0
data/lib/crawlscope/{audit.rb → crawl.rb} +68 -58
data/lib/crawlscope/crawler.rb +19 -1
data/lib/crawlscope/http.rb +1 -1
data/lib/crawlscope/rake_tasks.rb +28 -0
data/lib/crawlscope/rules/links.rb +99 -48
data/lib/crawlscope/rules/metadata.rb +57 -11
data/lib/crawlscope/rules/structured_data.rb +61 -1
data/lib/crawlscope/run.rb +60 -0
data/lib/crawlscope/schema_registry.rb +3 -349
data/lib/crawlscope/schemas.rb +406 -0
data/lib/crawlscope/sitemap.rb +18 -6
data/lib/crawlscope/structured_data/audit.rb +7 -7
data/lib/crawlscope/structured_data/check.rb +35 -0
data/lib/crawlscope/structured_data/reporter.rb +69 -0
data/lib/crawlscope/url.rb +14 -0
data/lib/crawlscope/version.rb +1 -1
data/lib/tasks/crawlscope_tasks.rake +12 -23
data/test/crawlscope/browser_test.rb +155 -0
data/test/crawlscope/cli_test.rb +143 -7
data/test/crawlscope/configuration_test.rb +49 -0
data/test/crawlscope/{audit_test.rb → crawl_test.rb} +23 -7
data/test/crawlscope/crawler_test.rb +34 -0
data/test/crawlscope/http_test.rb +56 -0
data/test/crawlscope/links_rule_test.rb +149 -5
data/test/crawlscope/metadata_rule_test.rb +77 -0
data/test/crawlscope/rule_registry_test.rb +32 -0
data/test/crawlscope/{task_test.rb → run_test.rb} +28 -33
data/test/crawlscope/schema_registry_test.rb +19 -0
data/test/crawlscope/sitemap_test.rb +55 -0
data/test/crawlscope/structured_data_document_test.rb +36 -0
data/test/crawlscope/structured_data_report_test.rb +3 -3
data/test/crawlscope/structured_data_reporter_test.rb +2 -2
data/test/crawlscope/structured_data_rule_test.rb +111 -0
data/test/crawlscope/structured_data_writer_test.rb +2 -2
data/test/crawlscope/url_test.rb +31 -0
metadata +15 -5
data/lib/crawlscope/task.rb +0 -131

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 58a83d74a7b2b8422df4f161db9d3a7fe3ff213495f0837fd29a08cc13715b86
-  data.tar.gz: 02bd5743bcaae94bfdcc169fb6fe782257527984da68b091f6b75db3420b4244
+  metadata.gz: b49aaaa6fdb5f7d5bd4dc63713d8c0090411e7063363645a900d8f59d803aaaa
+  data.tar.gz: 5dfcc35d60745c25db6faf3acaa4344e29e438c758740613d6216e2f47aeac6e
 SHA512:
-  metadata.gz: c566f6899f45633db13a8ee47ac15f5e6054a4adff087774ce17ef15c26b10340694bd395e0de0efbdb5b652cf8ea04e3cbbb452d9467fd8167143f3675d5642
-  data.tar.gz: 1c087e1f4233224ea2c6b9b14de3bf34f4007b4689cd4fa8b9a3ea7ba688f78beb12431d0ffc7b6f54cae1eead319e3ba8293cef325440c614ca191b6ebf0e8b
+  metadata.gz: 9f66627274ce2ea969b5bb9b53a339215718c37baf47393c75bcf3a528c5c73658c6a71903fdbbf9e53796aaf3680be5f99ab4151b834efbf9450e05abbab83b
+  data.tar.gz: 3cf2e2c7f251a6af7b931f00da63436eaa7e09f078d73de112852a10665cf16eefb561c7d61d6bc8b0c3c014ca0db2df217d31c00b9f0ed321565ed554574261

data/CHANGELOG.md CHANGED Viewed

@@ -5,27 +5,26 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## [0.1.0] - 2026-04-23
+## [0.3.0] - 2026-04-28
 ### Added
-- add crawlkit release-ready audit gem
+- add JobPost structured data
-- add standalone validation commands
-- move default schema rules into crawlkit
+### Documentation
+- fix missing changelog entry
-### Changed
-- strengthen public API coverage
-- load shared test dependencies
-- rename crawlkit to crawlscope
+### Fixed
+- ldjson check now uses the same convention for default URL

data/README.md CHANGED Viewed

@@ -58,16 +58,16 @@ gem "ferrum"
 ## CLI Usage
-Validate a site directly from the gem:
+Validate a site from its default sitemap:
 ```bash
-crawlscope validate --base-url https://example.com
+crawlscope validate --url https://example.com
 ```
 Validate only specific rules:
 ```bash
-crawlscope validate --base-url https://example.com --rules metadata,links
+crawlscope validate --url https://example.com --rules metadata,links
 ```
 Validate structured data on one or more URLs:
@@ -77,10 +77,11 @@ crawlscope ldjson --url https://example.com/article
 crawlscope ldjson --url https://example.com/a --url https://example.com/b --summary
 ```
-If you do not pass `--sitemap`, `crawlscope` defaults to:
+To use a non-default sitemap, pass `--sitemap`:
-- `https://example.com/sitemap.xml` for real site URLs
-- `public/sitemap.xml` for localhost-style development URLs when that file exists
+```bash
+crawlscope validate --url https://example.com --sitemap https://example.com/sitemap.xml
+```
 Child sitemap indexes are supported automatically.
@@ -89,14 +90,14 @@ Child sitemap indexes are supported automatically.
 ```ruby
 require "crawlscope"
-audit = Crawlscope::Audit.new(
+crawl = Crawlscope::Crawl.new(
   base_url: "https://example.com",
   sitemap_path: "https://example.com/sitemap.xml",
   rules: Crawlscope::RuleRegistry.default(site_name: "Example").rules,
   schema_registry: Crawlscope::SchemaRegistry.default
 )
-result = audit.call
+result = crawl.call
 puts result.ok?
 puts result.issues.to_a.map(&:message)
@@ -104,7 +105,7 @@ puts result.issues.to_a.map(&:message)
 ## Result Shape
-`Crawlscope::Audit` returns a `Crawlscope::Result` with:
+`Crawlscope::Crawl` returns a `Crawlscope::Result` with:
 - `urls`: sitemap URLs selected for validation
 - `pages`: fetched page snapshots
@@ -133,7 +134,7 @@ bin/rails crawlscope:validate
 Available environment overrides:
-- `BASE_URL`
+- `URL`
 - `SITEMAP`
 - `RULES=metadata,links`
 - `JS=1` or `RENDERER=browser`
@@ -149,17 +150,21 @@ bin/rails crawlscope:validate:metadata
 bin/rails crawlscope:validate:structured_data
 bin/rails crawlscope:validate:uniqueness
 bin/rails crawlscope:validate:links
-bin/rails crawlscope:validate:ldjson URL=https://example.com/article
+bin/rails crawlscope:validate:ldjson
 ```
 The same validation surface is also available in the gem repository itself through plain `rake`:
 ```bash
-bundle exec rake crawlscope:validate BASE_URL=https://example.com
-bundle exec rake crawlscope:validate:metadata BASE_URL=https://example.com
+bundle exec rake crawlscope:validate URL=https://example.com
+bundle exec rake crawlscope:validate:metadata URL=https://example.com
 bundle exec rake crawlscope:validate:ldjson URL=https://example.com/article
 ```
+`crawlscope:validate` runs all default sitemap rules: metadata, structured data, uniqueness, and links. `URL` is the site base. Without `SITEMAP`, Crawlscope uses `/sitemap.xml`. With `SITEMAP`, Crawlscope uses `URL` as the site base and validates URLs from that sitemap. `SITEMAP` may be a full URL or a local file path.
+`crawlscope:validate:ldjson` is separate because it directly checks the URL or semicolon-separated URLs in `URL`; it does not crawl the sitemap. Without `URL`, it checks the configured base URL, falling back to `http://localhost:3000`.
 ### Structured Data URL Audit
 For one-off structured-data checks:
@@ -174,7 +179,7 @@ Optional flags:
 - `DEBUG=1`: print detected items
 - `SUMMARY=1`: print grouped failures
-- `REPORT_PATH=...`: write a JSON report
+- `REPORT_PATH=...`: write a JSON report. Treat this as trusted operator input; Crawlscope writes to the path the task process can access.
 - `JS=1` or `RENDERER=browser`: render with Ferrum
 ## Rules
@@ -237,6 +242,8 @@ Checks:
 - `WebApplication`
 - `WebSite`
+The default schema definitions live in `Crawlscope::Schemas`; `Crawlscope::SchemaRegistry` owns registration and validation.
 Host apps can replace or extend the registry:
 ```ruby

data/lib/crawlscope/browser.rb CHANGED Viewed

@@ -45,6 +45,8 @@ module Crawlscope
         doc: Nokogiri::HTML(body)
       )
     rescue => error
+      raise unless browser_error?(error)
       Page.new(
         url: url,
         normalized_url: Url.normalize(url, base_url: @base_url),
@@ -84,5 +86,11 @@ module Crawlscope
     rescue Ferrum::TimeoutError
       raise Timeout::Error, "Timed out waiting for browser network idle"
     end
+    def browser_error?(error)
+      error.is_a?(Timeout::Error) ||
+        error.is_a?(SystemCallError) ||
+        error.class.name.to_s.start_with?("Ferrum::")
+    end
   end
 end

data/lib/crawlscope/cli.rb CHANGED Viewed

@@ -37,7 +37,7 @@ module Crawlscope
         @err.puts(general_usage)
         1
       end
-    rescue OptionParser::InvalidOption, OptionParser::MissingArgument, ConfigurationError, ArgumentError => error
+    rescue OptionParser::InvalidOption, OptionParser::MissingArgument, ConfigurationError, ValidationError, ArgumentError => error
       @err.puts(error.message)
       @err.puts("")
       @err.puts(general_usage)
@@ -49,12 +49,12 @@ module Crawlscope
     def general_usage
       <<~TEXT
         Usage:
-          crawlscope validate --base-url https://example.com [options]
+          crawlscope validate --url https://example.com [options]
           crawlscope ldjson --url https://example.com/page [options]
           crawlscope version
         Commands:
-          validate    Audit sitemap URLs for metadata, structured data, uniqueness, and links
+          validate    Audit URLs for metadata, structured data, uniqueness, and links
           ldjson      Validate structured data on one or more URLs
           version     Print the gem version
       TEXT
@@ -105,11 +105,12 @@ module Crawlscope
       parser.parse!(@argv)
       urls = options[:urls].map(&:strip).reject(&:empty?)
+      urls = default_urls if urls.empty?
       raise ConfigurationError, "Crawlscope URL is not configured" if urls.empty?
       configure_renderer(options[:renderer])
-      result = task.validate_ldjson(
+      result = task.validate_json_ld(
         urls: urls,
         debug: options[:debug],
         renderer: options[:renderer],
@@ -123,7 +124,7 @@ module Crawlscope
     def run_validate
       options = {
-        base_url: normalized_string(ENV["BASE_URL"]),
+        url: normalized_string(ENV["URL"]),
         rule_names: normalized_string(ENV["RULES"]),
         sitemap_path: normalized_string(ENV["SITEMAP"])
       }
@@ -134,10 +135,10 @@ module Crawlscope
       @configuration.timeout_seconds = resolved_integer("TIMEOUT", default: @configuration.timeout_seconds, minimum: 1)
       parser = OptionParser.new do |opts|
-        opts.banner = "Usage: crawlscope validate --base-url https://example.com [options]"
+        opts.banner = "Usage: crawlscope validate --url https://example.com [options]"
-        opts.on("--base-url URL", "Set the site base URL") do |value|
-          options[:base_url] = value
+        opts.on("--url URL", "Set the site URL") do |value|
+          options[:url] = value
         end
         opts.on("--sitemap PATH_OR_URL", "Set the sitemap path or URL") do |value|
@@ -168,7 +169,7 @@ module Crawlscope
       parser.parse!(@argv)
       result = task.validate(
-        base_url: options[:base_url],
+        base_url: options[:url],
         sitemap_path: options[:sitemap_path],
         rule_names: options[:rule_names]
       )
@@ -238,8 +239,12 @@ module Crawlscope
       raw_urls.split(";").map(&:strip).reject(&:empty?)
     end
+    def default_urls
+      [normalized_string(@configuration.base_url) || "http://localhost:3000"]
+    end
     def task
-      @task ||= Task.new(configuration: @configuration, reporter: Reporter.new(io: @out))
+      @task ||= Run.new(configuration: @configuration, reporter: Reporter.new(io: @out))
     end
   end
 end

data/lib/crawlscope/configuration.rb CHANGED Viewed

@@ -7,6 +7,7 @@ module Crawlscope
     DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS = 5
     DEFAULT_BROWSER_SCROLL_PAGE = true
     DEFAULT_CONCURRENCY = 10
+    RENDERERS = %i[http browser].freeze
     DEFAULT_TIMEOUT_SECONDS = 20
     attr_writer :allowed_statuses, :base_url, :browser_factory, :concurrency, :network_idle_timeout_seconds, :output, :renderer, :rule_registry, :schema_registry, :scroll_page, :site_name, :sitemap_path, :timeout_seconds
@@ -26,7 +27,7 @@ module Crawlscope
     def concurrency
       value = resolve(@concurrency)
-      value.nil? ? DEFAULT_CONCURRENCY : value.to_i
+      positive_integer(value, default: DEFAULT_CONCURRENCY, name: "concurrency")
     end
     def browser_concurrency
@@ -42,7 +43,7 @@ module Crawlscope
     def network_idle_timeout_seconds
       value = resolve(@network_idle_timeout_seconds)
-      value.nil? ? DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS : value.to_i
+      positive_integer(value, default: DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS, name: "network_idle_timeout_seconds")
     end
     def output
@@ -55,7 +56,10 @@ module Crawlscope
       normalized_value = value.to_s.strip
       normalized_value = "http" if normalized_value.empty?
-      normalized_value.to_sym
+      renderer = normalized_value.to_sym
+      return renderer if RENDERERS.include?(renderer)
+      raise ConfigurationError, "Crawlscope renderer must be http or browser"
     end
     def rule_registry
@@ -74,7 +78,7 @@ module Crawlscope
         raise ConfigurationError, "Crawlscope sitemap_path is not configured"
       end
-      Audit.new(
+      Crawl.new(
         base_url: base_url,
         sitemap_path: sitemap_path,
         browser_factory: browser_factory,
@@ -111,7 +115,7 @@ module Crawlscope
     def timeout_seconds
       value = resolve(@timeout_seconds)
-      value.nil? ? DEFAULT_TIMEOUT_SECONDS : value.to_i
+      positive_integer(value, default: DEFAULT_TIMEOUT_SECONDS, name: "timeout_seconds")
     end
     private
@@ -119,5 +123,16 @@ module Crawlscope
     def resolve(value)
       value.respond_to?(:call) ? value.call : value
     end
+    def positive_integer(value, default:, name:)
+      return default if value.nil?
+      integer = value.is_a?(Integer) ? value : Integer(value, 10)
+      raise ArgumentError if integer < 1
+      integer
+    rescue ArgumentError, TypeError
+      raise ConfigurationError, "Crawlscope #{name} must be an integer >= 1"
+    end
   end
 end

data/lib/crawlscope/context.rb ADDED Viewed

@@ -0,0 +1,9 @@
+# frozen_string_literal: true
+module Crawlscope
+  Context = Data.define(:allowed_statuses, :base_url, :resolve_target, :schema_registry) do
+    def fetch(name)
+      public_send(name)
+    end
+  end
+end

data/lib/crawlscope/{audit.rb → crawl.rb} RENAMED Viewed

@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 module Crawlscope
-  class Audit
+  class Crawl
     def initialize(base_url:, sitemap_path:, rules:, schema_registry:, browser_factory: nil, concurrency: Configuration::DEFAULT_CONCURRENCY, network_idle_timeout_seconds: Configuration::DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS, renderer: :http, scroll_page: Configuration::DEFAULT_BROWSER_SCROLL_PAGE, timeout_seconds: Configuration::DEFAULT_TIMEOUT_SECONDS, allowed_statuses: Configuration::DEFAULT_ALLOWED_STATUSES)
       @base_url = base_url
       @sitemap_path = sitemap_path
@@ -17,28 +17,15 @@ module Crawlscope
     end
     def call
-      urls = Sitemap.new(path: @sitemap_path).urls(base_url: @base_url)
-      raise ValidationError, "No URLs found in sitemap: #{@sitemap_path}" if urls.empty?
-      @page_fetcher = build_page
-      pages = Crawler.new(
-        page_fetcher: @page_fetcher,
-        concurrency: @concurrency
-      ).call(urls)
+      urls = sitemap_urls
+      @page_fetcher = page
+      pages = Crawler.new(page_fetcher: @page_fetcher, concurrency: @concurrency).call(urls)
       issues = IssueCollection.new
-      collect_crawl_issues(pages, issues)
-      cache_pages(pages)
-      context = {
-        allowed_statuses: @allowed_statuses,
-        base_url: @base_url,
-        resolve_target: method(:resolve_target),
-        schema_registry: @schema_registry
-      }
-      @rules.each do |rule|
-        rule.call(urls: urls, pages: pages, issues: issues, context: context)
-      end
+      collect(pages, issues)
+      cache(pages)
+      scan(urls, pages, issues)
       Result.new(
         base_url: @base_url,
@@ -53,8 +40,15 @@ module Crawlscope
     private
-    def build_browser
-      Crawlscope::Browser.new(
+    def sitemap_urls
+      urls = Sitemap.new(path: @sitemap_path).urls(base_url: @base_url)
+      raise ValidationError, "No URLs found in sitemap: #{@sitemap_path}" if urls.empty?
+      urls
+    end
+    def browser
+      Browser.new(
         base_url: @base_url,
         timeout_seconds: @timeout_seconds,
         network_idle_timeout_seconds: @network_idle_timeout_seconds,
@@ -64,65 +58,81 @@ module Crawlscope
       raise ConfigurationError, "Browser rendering requires the ferrum gem (#{error.message})"
     end
-    def build_page
+    def page
       if @renderer == :browser
-        browser_factory = @browser_factory || method(:build_browser)
-        browser_factory.call
+        (@browser_factory || method(:browser)).call
       else
         Http.new(base_url: @base_url, timeout_seconds: @timeout_seconds)
       end
     end
-    def build_target_resolution(page, normalized_target_url, crawled:)
-      {
-        crawled: crawled,
-        error: page.error,
-        final_url: page.normalized_final_url || normalized_target_url,
-        status: page.status
-      }
-    end
-    def cache_pages(pages)
-      @page_by_url = {}
-      @target_resolution_cache = {}
-      pages.each do |page|
-        @page_by_url[page.normalized_url] = page unless page.normalized_url.to_s.empty?
-        @page_by_url[page.normalized_final_url] = page unless page.normalized_final_url.to_s.empty?
-      end
+    def context
+      Context.new(
+        allowed_statuses: @allowed_statuses,
+        base_url: @base_url,
+        resolve_target: method(:resolve),
+        schema_registry: @schema_registry
+      )
     end
-    def collect_crawl_issues(pages, issues)
+    def collect(pages, issues)
       pages.each do |page|
         if page.error
           issues.add(code: :fetch_failed, severity: :error, category: :crawl, url: page.url, message: page.error, details: {})
         elsif !@allowed_statuses.include?(page.status)
           issues.add(code: :unexpected_status, severity: :error, category: :crawl, url: page.url, message: "HTTP #{page.status}", details: {status: page.status})
+        elsif redirected?(page)
+          issues.add(code: :redirected_page, severity: :warning, category: :crawl, url: page.url, message: "redirects to #{page.final_url}", details: {final_url: page.final_url, status: page.status})
         end
       end
     end
-    def resolve_target(target_url)
-      normalized_target_url = Url.normalize(target_url, base_url: @base_url)
-      return @target_resolution_cache[normalized_target_url] if @target_resolution_cache.key?(normalized_target_url)
+    def cache(pages)
+      @pages = {}
+      @targets = {}
-      resolution = resolve_from_crawled_page(normalized_target_url)
-      resolution ||= resolve_by_fetching_target(normalized_target_url)
-      @target_resolution_cache[normalized_target_url] = resolution
+      pages.each do |page|
+        @pages[page.normalized_url] = page unless page.normalized_url.to_s.empty?
+        @pages[page.normalized_final_url] = page unless page.normalized_final_url.to_s.empty?
+      end
     end
-    def resolve_by_fetching_target(normalized_target_url)
-      page = @page_fetcher.fetch(normalized_target_url)
-      @page_by_url[page.normalized_url] = page unless page.normalized_url.to_s.empty?
-      @page_by_url[page.normalized_final_url] = page unless page.normalized_final_url.to_s.empty?
-      build_target_resolution(page, normalized_target_url, crawled: false)
+    def scan(urls, pages, issues)
+      @rules.each do |rule|
+        rule.call(urls: urls, pages: pages, issues: issues, context: context)
+      end
     end
-    def resolve_from_crawled_page(normalized_target_url)
-      page = @page_by_url[normalized_target_url]
-      return if page.nil?
+    def resolve(target_url)
+      normalized_url = Url.normalize(target_url, base_url: @base_url)
+      return @targets[normalized_url] if @targets.key?(normalized_url)
+      @targets[normalized_url] = resolved_page(normalized_url) || fetched_page(normalized_url)
+    end
+    def fetched_page(normalized_url)
+      page = @page_fetcher.fetch(normalized_url)
+      @pages[page.normalized_url] = page unless page.normalized_url.to_s.empty?
+      @pages[page.normalized_final_url] = page unless page.normalized_final_url.to_s.empty?
+      resolution(page, normalized_url, crawled: false)
+    end
+    def resolved_page(normalized_url)
+      page = @pages[normalized_url]
+      resolution(page, normalized_url, crawled: true) if page
+    end
+    def resolution(page, normalized_url, crawled:)
+      {
+        crawled: crawled,
+        error: page.error,
+        final_url: page.normalized_final_url || normalized_url,
+        status: page.status
+      }
+    end
-      build_target_resolution(page, normalized_target_url, crawled: true)
+    def redirected?(page)
+      page.normalized_url.to_s != page.normalized_final_url.to_s
     end
   end
 end

data/lib/crawlscope/crawler.rb CHANGED Viewed

@@ -15,7 +15,7 @@ module Crawlscope
       urls.each do |url|
         pool.post do
-          pages << @page_fetcher.fetch(url)
+          pages << fetch(url)
         end
       end
@@ -24,5 +24,23 @@ module Crawlscope
       pages.to_a
     end
+    private
+    def fetch(url)
+      @page_fetcher.fetch(url)
+    rescue => error
+      Page.new(
+        url: url,
+        normalized_url: Url.normalize(url, base_url: url),
+        final_url: url,
+        normalized_final_url: Url.normalize(url, base_url: url),
+        status: nil,
+        headers: {},
+        body: nil,
+        doc: nil,
+        error: "#{error.class}: #{error.message}"
+      )
+    end
   end
 end

data/lib/crawlscope/http.rb CHANGED Viewed

@@ -43,7 +43,7 @@ module Crawlscope
         body: body,
         doc: doc
       )
-    rescue => error
+    rescue Faraday::Error, SocketError, SystemCallError, Timeout::Error => error
       Page.new(
         url: url,
         normalized_url: Url.normalize(url, base_url: @base_url),

data/lib/crawlscope/rake_tasks.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+module Crawlscope
+  module RakeTasks
+    module_function
+    def validate
+      run("validate")
+    end
+    def ldjson
+      run("ldjson")
+    end
+    def validate_rule(rule)
+      original_rules = ENV["RULES"]
+      ENV["RULES"] = rule
+      validate
+    ensure
+      ENV["RULES"] = original_rules
+    end
+    def run(command)
+      status = Cli.start([command], out: $stdout, err: $stderr)
+      exit(status) unless status.zero?
+    end
+  end
+end