RubyGems - scrapio - Versions diffs - 1.0.0 - Mend

scrapio 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +7 -0
data/README.md +140 -0
data/lib/scrapio/client.rb +28 -0
data/lib/scrapio/errors.rb +14 -0
data/lib/scrapio/http_client.rb +53 -0
data/lib/scrapio/resources/amazon.rb +15 -0
data/lib/scrapio/resources/crawl.rb +16 -0
data/lib/scrapio/resources/fetch.rb +16 -0
data/lib/scrapio/resources/google.rb +15 -0
data/lib/scrapio/resources/interact.rb +15 -0
data/lib/scrapio/resources/jobs.rb +31 -0
data/lib/scrapio/resources/walmart.rb +15 -0
data/lib/scrapio/resources/youtube.rb +19 -0
data/lib/scrapio.rb +8 -0
metadata +60 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 0b01cff647c80bdc948a24eef3b9dfe4095466678b6eda74cf01bbfc6a706c57
+  data.tar.gz: '08880b616f26a60b6ce18949253cd96c2d24aca227e702843b6371dcab60c7d6'
+SHA512:
+  metadata.gz: 0f321767a8ef0e4775ee52836ba7fa620df758ad341c3d19abf5d52ddaf2a90ba4e9c2e46b22d76d7cbc3ee056cbed63b34d8801d5a3737dbd471c51768122e4
+  data.tar.gz: da66af15d073cce2719fab455fd940d272215d76e3abb894c332197ed817d9b79a1392302e04c85f74e896d9726bd4957886d8f833cf34a949906c932fdd8c80

data/README.md ADDED Viewed

@@ -0,0 +1,140 @@
+# scrapio
+Official Ruby SDK for [Scrapio](https://scrapio.dev) — fetch, crawl, search, and extract structured data from any URL.
+## Install
+```bash
+gem install scrapio
+```
+Or add to your Gemfile:
+```ruby
+gem "scrapio"
+```
+Requires Ruby 2.7 or later.
+## Quickstart
+```ruby
+require "scrapio"
+client = Scrapio::Client.new(ENV["SCRAPIO_API_KEY"])
+result = client.fetch.fetch(url: "https://example.com", output: ["markdown"])
+puts result["outputs"]["markdown"]
+```
+## Usage
+### Fetch a page
+```ruby
+result = client.fetch.fetch(
+  url:       "https://news.ycombinator.com",
+  render_js: true,
+  output:    ["markdown"]
+)
+```
+### Google Search
+```ruby
+results = client.google.search(
+  search:       "best web scraping API 2025",
+  country_code: "us"
+)
+puts results["results"]
+```
+### Amazon product
+```ruby
+product = client.amazon.get_product("B08N5WRWNW")
+puts "#{product["title"]} — $#{product["price"]}"
+```
+### Walmart search
+```ruby
+items = client.walmart.search("headphones")
+```
+### YouTube video
+```ruby
+video = client.youtube.get_video("dQw4w9WgXcQ")
+```
+### Browser automation
+```ruby
+result = client.interact.interact(
+  url:     "https://example.com",
+  actions: [
+    { type: "click", selector: "#login" },
+    { type: "type",  selector: "#email", value: "user@example.com" },
+  ]
+)
+```
+### Crawl a site
+```ruby
+result = client.crawl.crawl(
+  seeds:     ["https://docs.example.com"],
+  max_pages: 50,
+  output:    ["markdown"]
+)
+puts result["result"]["summary"]["pages_succeeded"]
+```
+### Async jobs
+```ruby
+job = client.jobs.create(
+  job_type: "fetch",
+  payload:  { url: "https://example.com", output: ["markdown"] }
+)
+result = client.jobs.wait_for_completion(job["job_id"])
+```
+## Configuration
+```ruby
+client = Scrapio::Client.new(
+  ENV["SCRAPIO_API_KEY"],
+  base_url: "https://api.scrapio.dev",  # optional override
+  timeout:  30                           # optional, default 30s
+)
+```
+## Error handling
+```ruby
+begin
+  result = client.fetch.fetch(url: "https://example.com")
+rescue Scrapio::AuthError
+  puts "Invalid API key"
+rescue Scrapio::CreditsExhaustedError
+  puts "No credits remaining"
+rescue Scrapio::RateLimitError
+  puts "Rate limited — back off and retry"
+rescue Scrapio::ScrapioError => e
+  puts "API error #{e.status_code}: #{e.message}"
+end
+```
+## Links
+- [Documentation](https://scrapio.dev/docs)
+- [API Reference](https://scrapio.dev/docs/api-reference/fetch)
+- [Dashboard](https://app.scrapio.dev)
+- [Get an API key](https://scrapio.dev#pricing)
+## License
+MIT

data/lib/scrapio/client.rb ADDED Viewed

@@ -0,0 +1,28 @@
+require_relative "http_client"
+require_relative "resources/fetch"
+require_relative "resources/google"
+require_relative "resources/amazon"
+require_relative "resources/walmart"
+require_relative "resources/youtube"
+require_relative "resources/jobs"
+require_relative "resources/crawl"
+require_relative "resources/interact"
+module Scrapio
+  class Client
+    attr_reader :fetch, :google, :amazon, :walmart, :youtube, :jobs, :crawl, :interact
+    def initialize(api_key, base_url: HttpClient::DEFAULT_BASE_URL, timeout: HttpClient::DEFAULT_TIMEOUT)
+      http = HttpClient.new(api_key, base_url: base_url, timeout: timeout)
+      @fetch    = Resources::Fetch.new(http)
+      @google   = Resources::Google.new(http)
+      @amazon   = Resources::Amazon.new(http)
+      @walmart  = Resources::Walmart.new(http)
+      @youtube  = Resources::YouTube.new(http)
+      @jobs     = Resources::Jobs.new(http)
+      @crawl    = Resources::Crawl.new(http)
+      @interact = Resources::Interact.new(http)
+    end
+  end
+end

data/lib/scrapio/errors.rb ADDED Viewed

@@ -0,0 +1,14 @@
+module Scrapio
+  class ScrapioError < StandardError
+    attr_reader :status_code
+    def initialize(message, status_code: 0)
+      super(message)
+      @status_code = status_code
+    end
+  end
+  class AuthError < ScrapioError; end
+  class RateLimitError < ScrapioError; end
+  class CreditsExhaustedError < ScrapioError; end
+end

data/lib/scrapio/http_client.rb ADDED Viewed

@@ -0,0 +1,53 @@
+require "net/http"
+require "uri"
+require "json"
+module Scrapio
+  class HttpClient
+    DEFAULT_BASE_URL = "https://api.scrapio.dev"
+    DEFAULT_TIMEOUT  = 30
+    def initialize(api_key, base_url: DEFAULT_BASE_URL, timeout: DEFAULT_TIMEOUT)
+      @api_key  = api_key
+      @base_url = base_url
+      @timeout  = timeout
+    end
+    def get(path, params = {})
+      uri = URI("#{@base_url}#{path}")
+      filtered = params.compact
+      uri.query = URI.encode_www_form(filtered) unless filtered.empty?
+      request(Net::HTTP::Get.new(uri))
+    end
+    def post(path, body = {})
+      uri = URI("#{@base_url}#{path}")
+      req = Net::HTTP::Post.new(uri)
+      req["Content-Type"] = "application/json"
+      req.body = JSON.generate(body.compact)
+      request(req)
+    end
+    private
+    def request(req)
+      req["Authorization"] = "Bearer #{@api_key}"
+      req["Accept"]        = "application/json"
+      uri = req.uri
+      Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https",
+                      read_timeout: @timeout, open_timeout: @timeout) do |http|
+        resp = http.request(req)
+        data = JSON.parse(resp.body)
+        case resp.code.to_i
+        when 200..299 then data
+        when 401, 403 then raise AuthError.new(data["message"] || "Unauthorized", status_code: resp.code.to_i)
+        when 402      then raise CreditsExhaustedError.new(data["message"] || "Credits exhausted", status_code: 402)
+        when 429      then raise RateLimitError.new(data["message"] || "Rate limited", status_code: 429)
+        else               raise ScrapioError.new(data["message"] || "HTTP #{resp.code}", status_code: resp.code.to_i)
+        end
+      end
+    end
+  end
+end

data/lib/scrapio/resources/amazon.rb ADDED Viewed

@@ -0,0 +1,15 @@
+module Scrapio
+  module Resources
+    class Amazon
+      def initialize(http) = @http = http
+      def get_product(asin, country: nil)
+        @http.get("/v1/amazon/product", { asin: asin, country: country })
+      end
+      def search(query, country: nil, page: nil)
+        @http.get("/v1/amazon/search", { query: query, country: country, page: page })
+      end
+    end
+  end
+end

data/lib/scrapio/resources/crawl.rb ADDED Viewed

@@ -0,0 +1,16 @@
+module Scrapio
+  module Resources
+    class Crawl
+      def initialize(http) = @http = http
+      def crawl(seeds:, max_pages: nil, max_depth: nil, same_domain_only: nil,
+                output: nil, extract: nil, timeout_ms: nil)
+        @http.post("/v1/crawl", {
+          seeds: seeds, max_pages: max_pages, max_depth: max_depth,
+          same_domain_only: same_domain_only, output: output,
+          extract: extract, timeout_ms: timeout_ms,
+        })
+      end
+    end
+  end
+end

data/lib/scrapio/resources/fetch.rb ADDED Viewed

@@ -0,0 +1,16 @@
+module Scrapio
+  module Resources
+    class Fetch
+      def initialize(http) = @http = http
+      def fetch(url:, render_js: nil, device: nil, session: nil, output: nil,
+                extract: nil, actions: nil, timeout: nil, proxy: nil, country: nil)
+        @http.post("/v1/fetch", {
+          url: url, render_js: render_js, device: device, session: session,
+          output: output, extract: extract, actions: actions,
+          timeout: timeout, proxy: proxy, country: country,
+        })
+      end
+    end
+  end
+end

data/lib/scrapio/resources/google.rb ADDED Viewed

@@ -0,0 +1,15 @@
+module Scrapio
+  module Resources
+    class Google
+      def initialize(http) = @http = http
+      def search(search:, search_type: nil, country_code: nil, language: nil,
+                 device: nil, page: nil, date_range: nil, **opts)
+        @http.get("/v1/google/search", {
+          search: search, search_type: search_type, country_code: country_code,
+          language: language, device: device, page: page, date_range: date_range,
+        }.merge(opts))
+      end
+    end
+  end
+end

data/lib/scrapio/resources/interact.rb ADDED Viewed

@@ -0,0 +1,15 @@
+module Scrapio
+  module Resources
+    class Interact
+      def initialize(http) = @http = http
+      def interact(url:, actions:, device: nil, session: nil,
+                   output: nil, extract: nil, timeout_ms: nil)
+        @http.post("/v1/interact", {
+          url: url, actions: actions, device: device, session: session,
+          output: output, extract: extract, timeout_ms: timeout_ms,
+        })
+      end
+    end
+  end
+end

data/lib/scrapio/resources/jobs.rb ADDED Viewed

@@ -0,0 +1,31 @@
+module Scrapio
+  module Resources
+    class Jobs
+      TERMINAL = %w[completed partial failed cancelled].freeze
+      def initialize(http) = @http = http
+      def create(job_type:, payload:, webhook_url: nil)
+        @http.post("/v1/jobs", { job_type: job_type, payload: payload, webhook_url: webhook_url })
+      end
+      def get(job_id)
+        @http.get("/v1/jobs/#{job_id}")
+      end
+      def get_result(job_id)
+        @http.get("/v1/jobs/#{job_id}/result")
+      end
+      def wait_for_completion(job_id, poll_interval: 2.0, timeout: 300.0)
+        deadline = Time.now + timeout
+        loop do
+          job = get(job_id)
+          return get_result(job_id) if TERMINAL.include?(job["status"])
+          raise ScrapioError, "Job #{job_id} did not complete within #{timeout}s" if Time.now >= deadline
+          sleep(poll_interval)
+        end
+      end
+    end
+  end
+end

data/lib/scrapio/resources/walmart.rb ADDED Viewed

@@ -0,0 +1,15 @@
+module Scrapio
+  module Resources
+    class Walmart
+      def initialize(http) = @http = http
+      def get_product(product_id, country: nil)
+        @http.get("/v1/walmart/product", { product_id: product_id, country: country })
+      end
+      def search(query, country: nil, page: nil)
+        @http.get("/v1/walmart/search", { query: query, country: country, page: page })
+      end
+    end
+  end
+end

data/lib/scrapio/resources/youtube.rb ADDED Viewed

@@ -0,0 +1,19 @@
+module Scrapio
+  module Resources
+    class YouTube
+      def initialize(http) = @http = http
+      def get_video(video_id)
+        @http.get("/v1/youtube/videos/#{URI.encode_uri_component(video_id)}")
+      end
+      def search(query, page: nil, country: nil, language: nil)
+        @http.get("/v1/youtube/search", { query: query, page: page, country: country, language: language })
+      end
+      def get_subtitles(video_id, language: nil)
+        @http.get("/v1/youtube/subtitles", { video_id: video_id, language: language })
+      end
+    end
+  end
+end

data/lib/scrapio.rb ADDED Viewed

@@ -0,0 +1,8 @@
+require_relative "scrapio/errors"
+require_relative "scrapio/client"
+module Scrapio
+  def self.new(api_key, **opts)
+    Client.new(api_key, **opts)
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,60 @@
+--- !ruby/object:Gem::Specification
+name: scrapio
+version: !ruby/object:Gem::Version
+  version: 1.0.0
+platform: ruby
+authors:
+- Scrapio
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2026-06-29 00:00:00.000000000 Z
+dependencies: []
+description: Fetch, crawl, search, and extract structured data from any URL. Includes
+  Google Search, YouTube transcripts, Amazon and Walmart product data, browser automation,
+  and async jobs.
+email: support@scrapio.dev
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- README.md
+- lib/scrapio.rb
+- lib/scrapio/client.rb
+- lib/scrapio/errors.rb
+- lib/scrapio/http_client.rb
+- lib/scrapio/resources/amazon.rb
+- lib/scrapio/resources/crawl.rb
+- lib/scrapio/resources/fetch.rb
+- lib/scrapio/resources/google.rb
+- lib/scrapio/resources/interact.rb
+- lib/scrapio/resources/jobs.rb
+- lib/scrapio/resources/walmart.rb
+- lib/scrapio/resources/youtube.rb
+homepage: https://scrapio.dev
+licenses:
+- MIT
+metadata:
+  homepage_uri: https://scrapio.dev
+  source_code_uri: https://github.com/xsronhou/scrapping-tool
+  documentation_uri: https://scrapio.dev/docs
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '2.7'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubygems_version: 3.5.22
+signing_key:
+specification_version: 4
+summary: Official Ruby SDK for Scrapio
+test_files: []